| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.03451197479245361, | |
| "eval_steps": 500, | |
| "global_step": 40000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 4.3139968490567015e-05, | |
| "grad_norm": 36.896514892578125, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 2.8457, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 8.627993698113403e-05, | |
| "grad_norm": 35.37440490722656, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 2.1361, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.00012941990547170104, | |
| "grad_norm": 31.632505416870117, | |
| "learning_rate": 1e-05, | |
| "loss": 0.777, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.00017255987396226806, | |
| "grad_norm": 0.4134848415851593, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.4916, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.00021569984245283508, | |
| "grad_norm": 37.35564422607422, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.381, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0002588398109434021, | |
| "grad_norm": 45.536712646484375, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2516, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0003019797794339691, | |
| "grad_norm": 19.42644500732422, | |
| "learning_rate": 1.9999136977245545e-05, | |
| "loss": 0.2801, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0003451197479245361, | |
| "grad_norm": 54.93406295776367, | |
| "learning_rate": 1.9998273954491085e-05, | |
| "loss": 0.1867, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.00038825971641510314, | |
| "grad_norm": 41.097774505615234, | |
| "learning_rate": 1.9997410931736628e-05, | |
| "loss": 0.1246, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.00043139968490567016, | |
| "grad_norm": 0.38864001631736755, | |
| "learning_rate": 1.9996547908982168e-05, | |
| "loss": 0.1049, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0004745396533962372, | |
| "grad_norm": 0.041049182415008545, | |
| "learning_rate": 1.999568488622771e-05, | |
| "loss": 0.2315, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.0005176796218868042, | |
| "grad_norm": 0.002712072106078267, | |
| "learning_rate": 1.9994821863473255e-05, | |
| "loss": 0.1082, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0005608195903773712, | |
| "grad_norm": 2.7014479201170616e-05, | |
| "learning_rate": 1.9993958840718798e-05, | |
| "loss": 0.0578, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.0006039595588679382, | |
| "grad_norm": 1.4746110439300537, | |
| "learning_rate": 1.9993095817964338e-05, | |
| "loss": 0.2376, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.0006470995273585052, | |
| "grad_norm": 28.263187408447266, | |
| "learning_rate": 1.999223279520988e-05, | |
| "loss": 0.0866, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.0006902394958490722, | |
| "grad_norm": 11.379521369934082, | |
| "learning_rate": 1.999136977245542e-05, | |
| "loss": 0.0782, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0007333794643396393, | |
| "grad_norm": 0.0013383959885686636, | |
| "learning_rate": 1.9990506749700965e-05, | |
| "loss": 0.1511, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.0007765194328302063, | |
| "grad_norm": 6.122570991516113, | |
| "learning_rate": 1.9989643726946505e-05, | |
| "loss": 0.0885, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.0008196594013207733, | |
| "grad_norm": 0.15970896184444427, | |
| "learning_rate": 1.9988780704192048e-05, | |
| "loss": 0.1027, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.0008627993698113403, | |
| "grad_norm": 0.19297116994857788, | |
| "learning_rate": 1.998791768143759e-05, | |
| "loss": 0.07, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0009059393383019073, | |
| "grad_norm": 0.00016763704479672015, | |
| "learning_rate": 1.998705465868313e-05, | |
| "loss": 0.1106, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.0009490793067924744, | |
| "grad_norm": 0.0003569670661818236, | |
| "learning_rate": 1.9986191635928675e-05, | |
| "loss": 0.0801, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.0009922192752830413, | |
| "grad_norm": 34.73747253417969, | |
| "learning_rate": 1.9985328613174218e-05, | |
| "loss": 0.152, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.0010353592437736083, | |
| "grad_norm": 0.5465057492256165, | |
| "learning_rate": 1.9984465590419758e-05, | |
| "loss": 0.082, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.0010784992122641753, | |
| "grad_norm": 0.0005398567882366478, | |
| "learning_rate": 1.99836025676653e-05, | |
| "loss": 0.1238, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.0011216391807547423, | |
| "grad_norm": 28.343412399291992, | |
| "learning_rate": 1.9982739544910845e-05, | |
| "loss": 0.079, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.0011647791492453094, | |
| "grad_norm": 0.18938343226909637, | |
| "learning_rate": 1.9981876522156385e-05, | |
| "loss": 0.0968, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.0012079191177358764, | |
| "grad_norm": 18.69659996032715, | |
| "learning_rate": 1.9981013499401928e-05, | |
| "loss": 0.152, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.0012510590862264434, | |
| "grad_norm": 26.612380981445312, | |
| "learning_rate": 1.9980150476647468e-05, | |
| "loss": 0.0549, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.0012941990547170104, | |
| "grad_norm": 0.0005812590825371444, | |
| "learning_rate": 1.997928745389301e-05, | |
| "loss": 0.0668, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.0013373390232075775, | |
| "grad_norm": 0.15176478028297424, | |
| "learning_rate": 1.997842443113855e-05, | |
| "loss": 0.1209, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.0013804789916981445, | |
| "grad_norm": 32.04401779174805, | |
| "learning_rate": 1.9977561408384094e-05, | |
| "loss": 0.1198, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.0014236189601887115, | |
| "grad_norm": 0.6346271634101868, | |
| "learning_rate": 1.9976698385629638e-05, | |
| "loss": 0.0893, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.0014667589286792785, | |
| "grad_norm": 40.96885681152344, | |
| "learning_rate": 1.997583536287518e-05, | |
| "loss": 0.1393, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.0015098988971698455, | |
| "grad_norm": 0.29022184014320374, | |
| "learning_rate": 1.997497234012072e-05, | |
| "loss": 0.0832, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.0015530388656604126, | |
| "grad_norm": 0.6716536283493042, | |
| "learning_rate": 1.9974109317366264e-05, | |
| "loss": 0.0558, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.0015961788341509796, | |
| "grad_norm": 0.19002307951450348, | |
| "learning_rate": 1.9973246294611804e-05, | |
| "loss": 0.0881, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.0016393188026415466, | |
| "grad_norm": 0.24587740004062653, | |
| "learning_rate": 1.9972383271857348e-05, | |
| "loss": 0.0776, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.0016824587711321136, | |
| "grad_norm": 28.058324813842773, | |
| "learning_rate": 1.9971520249102888e-05, | |
| "loss": 0.0907, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.0017255987396226807, | |
| "grad_norm": 80.17859649658203, | |
| "learning_rate": 1.997065722634843e-05, | |
| "loss": 0.0566, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.0017687387081132477, | |
| "grad_norm": 0.020453251898288727, | |
| "learning_rate": 1.9969794203593974e-05, | |
| "loss": 0.066, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.0018118786766038147, | |
| "grad_norm": 1.5788724340382032e-05, | |
| "learning_rate": 1.9968931180839514e-05, | |
| "loss": 0.1197, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.0018550186450943817, | |
| "grad_norm": 0.008944077417254448, | |
| "learning_rate": 1.9968068158085058e-05, | |
| "loss": 0.0495, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.0018981586135849487, | |
| "grad_norm": 3.117482719972031e-06, | |
| "learning_rate": 1.99672051353306e-05, | |
| "loss": 0.0969, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.0019412985820755155, | |
| "grad_norm": 4.803666114807129, | |
| "learning_rate": 1.996634211257614e-05, | |
| "loss": 0.0534, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.0019844385505660826, | |
| "grad_norm": 2.6723075279733166e-05, | |
| "learning_rate": 1.9965479089821684e-05, | |
| "loss": 0.0841, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.0020275785190566496, | |
| "grad_norm": 0.0005806431290693581, | |
| "learning_rate": 1.9964616067067228e-05, | |
| "loss": 0.1267, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.0020707184875472166, | |
| "grad_norm": 6.426816253224388e-05, | |
| "learning_rate": 1.9963753044312767e-05, | |
| "loss": 0.0404, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.0021138584560377836, | |
| "grad_norm": 5.425294876098633, | |
| "learning_rate": 1.996289002155831e-05, | |
| "loss": 0.1306, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.0021569984245283507, | |
| "grad_norm": 0.5509458780288696, | |
| "learning_rate": 1.996202699880385e-05, | |
| "loss": 0.1097, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.0022001383930189177, | |
| "grad_norm": 1.9030728992674995e-08, | |
| "learning_rate": 1.9961163976049394e-05, | |
| "loss": 0.0954, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.0022432783615094847, | |
| "grad_norm": 5.162133693695068, | |
| "learning_rate": 1.9960300953294934e-05, | |
| "loss": 0.0495, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.0022864183300000517, | |
| "grad_norm": 0.001043809694238007, | |
| "learning_rate": 1.9959437930540477e-05, | |
| "loss": 0.0578, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.0023295582984906187, | |
| "grad_norm": 10.08859634399414, | |
| "learning_rate": 1.995857490778602e-05, | |
| "loss": 0.0479, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.0023726982669811858, | |
| "grad_norm": 0.00013425115321297199, | |
| "learning_rate": 1.9957711885031564e-05, | |
| "loss": 0.0999, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.002415838235471753, | |
| "grad_norm": 0.5551994442939758, | |
| "learning_rate": 1.9956848862277104e-05, | |
| "loss": 0.0828, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.00245897820396232, | |
| "grad_norm": 1.9412257671356201, | |
| "learning_rate": 1.9955985839522647e-05, | |
| "loss": 0.0374, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.002502118172452887, | |
| "grad_norm": 0.2069123089313507, | |
| "learning_rate": 1.9955122816768187e-05, | |
| "loss": 0.0201, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.002545258140943454, | |
| "grad_norm": 1.6855838111951016e-05, | |
| "learning_rate": 1.995425979401373e-05, | |
| "loss": 0.0395, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.002588398109434021, | |
| "grad_norm": 4.1953666141125723e-07, | |
| "learning_rate": 1.9953396771259274e-05, | |
| "loss": 0.081, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.002631538077924588, | |
| "grad_norm": 29.5993709564209, | |
| "learning_rate": 1.9952533748504814e-05, | |
| "loss": 0.0553, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.002674678046415155, | |
| "grad_norm": 3.231801031233772e-07, | |
| "learning_rate": 1.9951670725750357e-05, | |
| "loss": 0.153, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.002717818014905722, | |
| "grad_norm": 2.516810655593872, | |
| "learning_rate": 1.9950807702995897e-05, | |
| "loss": 0.0492, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.002760957983396289, | |
| "grad_norm": 1.1921870708465576, | |
| "learning_rate": 1.994994468024144e-05, | |
| "loss": 0.0816, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.002804097951886856, | |
| "grad_norm": 2.2311925888061523, | |
| "learning_rate": 1.9949081657486984e-05, | |
| "loss": 0.0785, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.002847237920377423, | |
| "grad_norm": 6.03306652919855e-05, | |
| "learning_rate": 1.9948218634732527e-05, | |
| "loss": 0.0706, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.00289037788886799, | |
| "grad_norm": 0.014764097519218922, | |
| "learning_rate": 1.9947355611978067e-05, | |
| "loss": 0.0731, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.002933517857358557, | |
| "grad_norm": 0.007481596898287535, | |
| "learning_rate": 1.994649258922361e-05, | |
| "loss": 0.0535, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.002976657825849124, | |
| "grad_norm": 0.35124772787094116, | |
| "learning_rate": 1.994562956646915e-05, | |
| "loss": 0.056, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.003019797794339691, | |
| "grad_norm": 2.102785583701916e-05, | |
| "learning_rate": 1.9944766543714694e-05, | |
| "loss": 0.0412, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.003062937762830258, | |
| "grad_norm": 11.827704429626465, | |
| "learning_rate": 1.9943903520960234e-05, | |
| "loss": 0.0194, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.003106077731320825, | |
| "grad_norm": 0.0012801631819456816, | |
| "learning_rate": 1.9943040498205777e-05, | |
| "loss": 0.0573, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.003149217699811392, | |
| "grad_norm": 6.006156905158377e-09, | |
| "learning_rate": 1.994217747545132e-05, | |
| "loss": 0.0459, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.003192357668301959, | |
| "grad_norm": 2.7759302412277975e-08, | |
| "learning_rate": 1.994131445269686e-05, | |
| "loss": 0.1287, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.003235497636792526, | |
| "grad_norm": 2.103457186208857e-09, | |
| "learning_rate": 1.9940451429942404e-05, | |
| "loss": 0.0164, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.0032786376052830932, | |
| "grad_norm": 2.2541730981817665e-10, | |
| "learning_rate": 1.9939588407187947e-05, | |
| "loss": 0.0747, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.0033217775737736602, | |
| "grad_norm": 5.149080607225187e-05, | |
| "learning_rate": 1.9938725384433487e-05, | |
| "loss": 0.0055, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.0033649175422642273, | |
| "grad_norm": 1.0809308290481567, | |
| "learning_rate": 1.993786236167903e-05, | |
| "loss": 0.0665, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.0034080575107547943, | |
| "grad_norm": 40.65428924560547, | |
| "learning_rate": 1.9936999338924574e-05, | |
| "loss": 0.0494, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.0034511974792453613, | |
| "grad_norm": 2.0113883018493652, | |
| "learning_rate": 1.9936136316170113e-05, | |
| "loss": 0.0212, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.0034943374477359283, | |
| "grad_norm": 4.40586519241333, | |
| "learning_rate": 1.9935273293415657e-05, | |
| "loss": 0.0988, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.0035374774162264954, | |
| "grad_norm": 1.0736999684013426e-05, | |
| "learning_rate": 1.9934410270661197e-05, | |
| "loss": 0.093, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.0035806173847170624, | |
| "grad_norm": 8.809935820863757e-07, | |
| "learning_rate": 1.993354724790674e-05, | |
| "loss": 0.0384, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.0036237573532076294, | |
| "grad_norm": 2.5714776515960693, | |
| "learning_rate": 1.993268422515228e-05, | |
| "loss": 0.0904, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.0036668973216981964, | |
| "grad_norm": 1.5415873022561755e-09, | |
| "learning_rate": 1.9931821202397823e-05, | |
| "loss": 0.0201, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.0037100372901887634, | |
| "grad_norm": 0.0013566080015152693, | |
| "learning_rate": 1.9930958179643367e-05, | |
| "loss": 0.116, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.0037531772586793305, | |
| "grad_norm": 0.036448314785957336, | |
| "learning_rate": 1.993009515688891e-05, | |
| "loss": 0.0538, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.0037963172271698975, | |
| "grad_norm": 1.335322380065918, | |
| "learning_rate": 1.992923213413445e-05, | |
| "loss": 0.0343, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.0038394571956604645, | |
| "grad_norm": 0.001166568254120648, | |
| "learning_rate": 1.9928369111379993e-05, | |
| "loss": 0.0827, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.003882597164151031, | |
| "grad_norm": 2.504633656030819e-08, | |
| "learning_rate": 1.9927506088625533e-05, | |
| "loss": 0.0871, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.0039257371326415985, | |
| "grad_norm": 1.2820944903069176e-05, | |
| "learning_rate": 1.9926643065871077e-05, | |
| "loss": 0.0427, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.003968877101132165, | |
| "grad_norm": 0.0003728326119016856, | |
| "learning_rate": 1.9925780043116617e-05, | |
| "loss": 0.0448, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.004012017069622733, | |
| "grad_norm": 0.1788995862007141, | |
| "learning_rate": 1.992491702036216e-05, | |
| "loss": 0.0341, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.004055157038113299, | |
| "grad_norm": 1.2131690709793475e-05, | |
| "learning_rate": 1.9924053997607703e-05, | |
| "loss": 0.0456, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.004098297006603867, | |
| "grad_norm": 0.09960448741912842, | |
| "learning_rate": 1.9923190974853243e-05, | |
| "loss": 0.0219, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.004141436975094433, | |
| "grad_norm": 0.00010674689110601321, | |
| "learning_rate": 1.9922327952098786e-05, | |
| "loss": 0.0585, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.004184576943585001, | |
| "grad_norm": 12.699185371398926, | |
| "learning_rate": 1.992146492934433e-05, | |
| "loss": 0.1029, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.004227716912075567, | |
| "grad_norm": 3.298513320260099e-06, | |
| "learning_rate": 1.9920601906589873e-05, | |
| "loss": 0.0596, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.004270856880566135, | |
| "grad_norm": 7.301036021090113e-06, | |
| "learning_rate": 1.9919738883835413e-05, | |
| "loss": 0.0433, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.004313996849056701, | |
| "grad_norm": 1.848353167588357e-05, | |
| "learning_rate": 1.9918875861080956e-05, | |
| "loss": 0.0439, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.004357136817547269, | |
| "grad_norm": 3.848089909297414e-05, | |
| "learning_rate": 1.9918012838326496e-05, | |
| "loss": 0.1067, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.004400276786037835, | |
| "grad_norm": 5.0859394832514226e-05, | |
| "learning_rate": 1.991714981557204e-05, | |
| "loss": 0.0592, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.004443416754528403, | |
| "grad_norm": 3.35551449097693e-05, | |
| "learning_rate": 1.991628679281758e-05, | |
| "loss": 0.1294, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.004486556723018969, | |
| "grad_norm": 0.4632960259914398, | |
| "learning_rate": 1.9915423770063123e-05, | |
| "loss": 0.0896, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.004529696691509537, | |
| "grad_norm": 26.527536392211914, | |
| "learning_rate": 1.9914560747308663e-05, | |
| "loss": 0.0405, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.0045728366600001034, | |
| "grad_norm": 1.0410542017780244e-05, | |
| "learning_rate": 1.9913697724554206e-05, | |
| "loss": 0.0509, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.004615976628490671, | |
| "grad_norm": 29.268795013427734, | |
| "learning_rate": 1.991283470179975e-05, | |
| "loss": 0.0698, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.0046591165969812375, | |
| "grad_norm": 4.8836263886187226e-05, | |
| "learning_rate": 1.9911971679045293e-05, | |
| "loss": 0.0155, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.004702256565471805, | |
| "grad_norm": 0.228873610496521, | |
| "learning_rate": 1.9911108656290833e-05, | |
| "loss": 0.0254, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.0047453965339623715, | |
| "grad_norm": 0.5368197560310364, | |
| "learning_rate": 1.9910245633536376e-05, | |
| "loss": 0.0429, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.004788536502452939, | |
| "grad_norm": 2.2967957193031907e-05, | |
| "learning_rate": 1.9909382610781916e-05, | |
| "loss": 0.048, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.004831676470943506, | |
| "grad_norm": 0.20427367091178894, | |
| "learning_rate": 1.990851958802746e-05, | |
| "loss": 0.0611, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.004874816439434073, | |
| "grad_norm": 9.368510246276855, | |
| "learning_rate": 1.9907656565273003e-05, | |
| "loss": 0.0462, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.00491795640792464, | |
| "grad_norm": 0.08957739174365997, | |
| "learning_rate": 1.9906793542518543e-05, | |
| "loss": 0.063, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.004961096376415207, | |
| "grad_norm": 0.0012034045066684484, | |
| "learning_rate": 1.9905930519764086e-05, | |
| "loss": 0.0335, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.005004236344905774, | |
| "grad_norm": 0.02072218433022499, | |
| "learning_rate": 1.9905067497009626e-05, | |
| "loss": 0.0799, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.005047376313396341, | |
| "grad_norm": 0.008446129970252514, | |
| "learning_rate": 1.990420447425517e-05, | |
| "loss": 0.0395, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.005090516281886908, | |
| "grad_norm": 47.39201736450195, | |
| "learning_rate": 1.9903341451500713e-05, | |
| "loss": 0.0857, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.005133656250377474, | |
| "grad_norm": 4.237736720824614e-05, | |
| "learning_rate": 1.9902478428746256e-05, | |
| "loss": 0.1098, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.005176796218868042, | |
| "grad_norm": 3.733102630576468e-06, | |
| "learning_rate": 1.9901615405991796e-05, | |
| "loss": 0.0516, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.005219936187358608, | |
| "grad_norm": 0.0014495301293209195, | |
| "learning_rate": 1.990075238323734e-05, | |
| "loss": 0.009, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.005263076155849176, | |
| "grad_norm": 1.5238803143802215e-06, | |
| "learning_rate": 1.989988936048288e-05, | |
| "loss": 0.065, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.005306216124339742, | |
| "grad_norm": 3.455934120211168e-06, | |
| "learning_rate": 1.9899026337728423e-05, | |
| "loss": 0.0879, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.00534935609283031, | |
| "grad_norm": 1.4700952988278004e-07, | |
| "learning_rate": 1.9898163314973963e-05, | |
| "loss": 0.047, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.005392496061320876, | |
| "grad_norm": 0.3679034411907196, | |
| "learning_rate": 1.9897300292219506e-05, | |
| "loss": 0.0449, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.005435636029811444, | |
| "grad_norm": 0.8546851873397827, | |
| "learning_rate": 1.989643726946505e-05, | |
| "loss": 0.0829, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.0054787759983020105, | |
| "grad_norm": 0.003740283427760005, | |
| "learning_rate": 1.989557424671059e-05, | |
| "loss": 0.0324, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.005521915966792578, | |
| "grad_norm": 0.11098367720842361, | |
| "learning_rate": 1.9894711223956133e-05, | |
| "loss": 0.0848, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.0055650559352831445, | |
| "grad_norm": 6.6278211363624e-08, | |
| "learning_rate": 1.9893848201201676e-05, | |
| "loss": 0.0153, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.005608195903773712, | |
| "grad_norm": 8.399548079296437e-08, | |
| "learning_rate": 1.9892985178447216e-05, | |
| "loss": 0.0509, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.0056513358722642786, | |
| "grad_norm": 0.010032990016043186, | |
| "learning_rate": 1.989212215569276e-05, | |
| "loss": 0.0894, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.005694475840754846, | |
| "grad_norm": 3.270921524745063e-06, | |
| "learning_rate": 1.9891259132938302e-05, | |
| "loss": 0.0484, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.005737615809245413, | |
| "grad_norm": 4.165988445281982, | |
| "learning_rate": 1.9890396110183842e-05, | |
| "loss": 0.0521, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.00578075577773598, | |
| "grad_norm": 0.16357873380184174, | |
| "learning_rate": 1.9889533087429386e-05, | |
| "loss": 0.0437, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.005823895746226547, | |
| "grad_norm": 1.4861450381431496e-07, | |
| "learning_rate": 1.9888670064674926e-05, | |
| "loss": 0.0291, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.005867035714717114, | |
| "grad_norm": 0.000343196967151016, | |
| "learning_rate": 1.988780704192047e-05, | |
| "loss": 0.0642, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.005910175683207681, | |
| "grad_norm": 1.720488944556564e-05, | |
| "learning_rate": 1.988694401916601e-05, | |
| "loss": 0.0981, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.005953315651698248, | |
| "grad_norm": 0.05200350657105446, | |
| "learning_rate": 1.9886080996411552e-05, | |
| "loss": 0.0184, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.005996455620188815, | |
| "grad_norm": 23.398279190063477, | |
| "learning_rate": 1.9885217973657096e-05, | |
| "loss": 0.049, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.006039595588679382, | |
| "grad_norm": 5.3464435040950775e-05, | |
| "learning_rate": 1.988435495090264e-05, | |
| "loss": 0.0248, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.006082735557169949, | |
| "grad_norm": 0.01494416780769825, | |
| "learning_rate": 1.988349192814818e-05, | |
| "loss": 0.0294, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.006125875525660516, | |
| "grad_norm": 6.322508852463216e-05, | |
| "learning_rate": 1.9882628905393722e-05, | |
| "loss": 0.0075, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.006169015494151083, | |
| "grad_norm": 0.007586951367557049, | |
| "learning_rate": 1.9881765882639262e-05, | |
| "loss": 0.052, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.00621215546264165, | |
| "grad_norm": 2.4987362873263308e-11, | |
| "learning_rate": 1.9880902859884806e-05, | |
| "loss": 0.0137, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.006255295431132217, | |
| "grad_norm": 8.16138744354248, | |
| "learning_rate": 1.9880039837130345e-05, | |
| "loss": 0.109, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.006298435399622784, | |
| "grad_norm": 0.002273560268804431, | |
| "learning_rate": 1.987917681437589e-05, | |
| "loss": 0.0204, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.006341575368113351, | |
| "grad_norm": 0.00022486828675027937, | |
| "learning_rate": 1.9878313791621432e-05, | |
| "loss": 0.0764, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.006384715336603918, | |
| "grad_norm": 0.00014589431521017104, | |
| "learning_rate": 1.9877450768866972e-05, | |
| "loss": 0.0407, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.006427855305094485, | |
| "grad_norm": 0.0005719369510188699, | |
| "learning_rate": 1.9876587746112515e-05, | |
| "loss": 0.0648, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.006470995273585052, | |
| "grad_norm": 3.020178610313451e-06, | |
| "learning_rate": 1.987572472335806e-05, | |
| "loss": 0.0525, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.006514135242075619, | |
| "grad_norm": 4.380962934646959e-07, | |
| "learning_rate": 1.9874861700603602e-05, | |
| "loss": 0.0392, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.0065572752105661864, | |
| "grad_norm": 1.5524530681432225e-05, | |
| "learning_rate": 1.9873998677849142e-05, | |
| "loss": 0.047, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.006600415179056753, | |
| "grad_norm": 1.0878498869715258e-05, | |
| "learning_rate": 1.9873135655094685e-05, | |
| "loss": 0.0453, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.0066435551475473205, | |
| "grad_norm": 10.473357200622559, | |
| "learning_rate": 1.9872272632340225e-05, | |
| "loss": 0.0621, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.006686695116037887, | |
| "grad_norm": 0.05818796157836914, | |
| "learning_rate": 1.987140960958577e-05, | |
| "loss": 0.0403, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.0067298350845284545, | |
| "grad_norm": 3.924364833096661e-09, | |
| "learning_rate": 1.987054658683131e-05, | |
| "loss": 0.0317, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.006772975053019021, | |
| "grad_norm": 3.545212848621304e-06, | |
| "learning_rate": 1.9869683564076852e-05, | |
| "loss": 0.0633, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.006816115021509589, | |
| "grad_norm": 0.17004750669002533, | |
| "learning_rate": 1.9868820541322392e-05, | |
| "loss": 0.0147, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.006859254990000155, | |
| "grad_norm": 5.974680243525654e-05, | |
| "learning_rate": 1.9867957518567935e-05, | |
| "loss": 0.0579, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.006902394958490723, | |
| "grad_norm": 3.9863412126806e-08, | |
| "learning_rate": 1.986709449581348e-05, | |
| "loss": 0.0248, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.006945534926981289, | |
| "grad_norm": 0.8195998668670654, | |
| "learning_rate": 1.9866231473059022e-05, | |
| "loss": 0.1239, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.006988674895471857, | |
| "grad_norm": 0.0003940521564800292, | |
| "learning_rate": 1.9865368450304562e-05, | |
| "loss": 0.0727, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.007031814863962423, | |
| "grad_norm": 0.0001462361979065463, | |
| "learning_rate": 1.9864505427550105e-05, | |
| "loss": 0.0264, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.007074954832452991, | |
| "grad_norm": 4.075237214351546e-08, | |
| "learning_rate": 1.9863642404795645e-05, | |
| "loss": 0.0154, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.007118094800943557, | |
| "grad_norm": 5.172235432837624e-06, | |
| "learning_rate": 1.986277938204119e-05, | |
| "loss": 0.0256, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.007161234769434125, | |
| "grad_norm": 0.0007250295020639896, | |
| "learning_rate": 1.9861916359286732e-05, | |
| "loss": 0.0325, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.007204374737924691, | |
| "grad_norm": 8.068302154541016, | |
| "learning_rate": 1.9861053336532272e-05, | |
| "loss": 0.032, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.007247514706415259, | |
| "grad_norm": 11.65196704864502, | |
| "learning_rate": 1.9860190313777815e-05, | |
| "loss": 0.006, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.007290654674905825, | |
| "grad_norm": 1.6602513808194885e-09, | |
| "learning_rate": 1.9859327291023355e-05, | |
| "loss": 0.0565, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.007333794643396393, | |
| "grad_norm": 0.22325988113880157, | |
| "learning_rate": 1.98584642682689e-05, | |
| "loss": 0.0493, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.007376934611886959, | |
| "grad_norm": 0.0023358704056590796, | |
| "learning_rate": 1.985760124551444e-05, | |
| "loss": 0.0061, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.007420074580377527, | |
| "grad_norm": 1.97016873926259e-07, | |
| "learning_rate": 1.9856738222759985e-05, | |
| "loss": 0.0704, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.0074632145488680935, | |
| "grad_norm": 0.0003019660944119096, | |
| "learning_rate": 1.9855875200005525e-05, | |
| "loss": 0.0156, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.007506354517358661, | |
| "grad_norm": 0.014269077219069004, | |
| "learning_rate": 1.9855012177251068e-05, | |
| "loss": 0.0606, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.0075494944858492275, | |
| "grad_norm": 0.010774667374789715, | |
| "learning_rate": 1.9854149154496608e-05, | |
| "loss": 0.0129, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.007592634454339795, | |
| "grad_norm": 14.643841743469238, | |
| "learning_rate": 1.985328613174215e-05, | |
| "loss": 0.0368, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.0076357744228303616, | |
| "grad_norm": 0.004390218295156956, | |
| "learning_rate": 1.985242310898769e-05, | |
| "loss": 0.0193, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.007678914391320929, | |
| "grad_norm": 0.00026494322810322046, | |
| "learning_rate": 1.9851560086233235e-05, | |
| "loss": 0.0377, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.007722054359811496, | |
| "grad_norm": 0.3454723656177521, | |
| "learning_rate": 1.9850697063478778e-05, | |
| "loss": 0.0271, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.007765194328302062, | |
| "grad_norm": 1.240284319692364e-07, | |
| "learning_rate": 1.9849834040724318e-05, | |
| "loss": 0.0312, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.00780833429679263, | |
| "grad_norm": 0.0001445577945560217, | |
| "learning_rate": 1.984897101796986e-05, | |
| "loss": 0.0305, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.007851474265283197, | |
| "grad_norm": 4.175523482530252e-09, | |
| "learning_rate": 1.9848107995215405e-05, | |
| "loss": 0.0979, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.007894614233773764, | |
| "grad_norm": 0.035435471683740616, | |
| "learning_rate": 1.9847244972460945e-05, | |
| "loss": 0.0098, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.00793775420226433, | |
| "grad_norm": 26.931116104125977, | |
| "learning_rate": 1.9846381949706488e-05, | |
| "loss": 0.0262, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.007980894170754897, | |
| "grad_norm": 8.122495273710229e-06, | |
| "learning_rate": 1.984551892695203e-05, | |
| "loss": 0.0602, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.008024034139245465, | |
| "grad_norm": 2.0076650411593455e-11, | |
| "learning_rate": 1.984465590419757e-05, | |
| "loss": 0.0514, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.008067174107736032, | |
| "grad_norm": 2.9286837843756075e-08, | |
| "learning_rate": 1.9843792881443115e-05, | |
| "loss": 0.0657, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.008110314076226598, | |
| "grad_norm": 1.5581694841384888, | |
| "learning_rate": 1.9842929858688655e-05, | |
| "loss": 0.0335, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.008153454044717165, | |
| "grad_norm": 3.392365144350151e-08, | |
| "learning_rate": 1.9842066835934198e-05, | |
| "loss": 0.0701, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.008196594013207733, | |
| "grad_norm": 0.03891870751976967, | |
| "learning_rate": 1.9841203813179738e-05, | |
| "loss": 0.1115, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.0082397339816983, | |
| "grad_norm": 5.497531890869141, | |
| "learning_rate": 1.984034079042528e-05, | |
| "loss": 0.0065, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.008282873950188866, | |
| "grad_norm": 0.0006867619813419878, | |
| "learning_rate": 1.9839477767670825e-05, | |
| "loss": 0.0231, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.008326013918679433, | |
| "grad_norm": 0.000866669462993741, | |
| "learning_rate": 1.9838614744916368e-05, | |
| "loss": 0.0234, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.008369153887170001, | |
| "grad_norm": 0.061681024730205536, | |
| "learning_rate": 1.9837751722161908e-05, | |
| "loss": 0.0371, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.008412293855660568, | |
| "grad_norm": 7.284898515536042e-07, | |
| "learning_rate": 1.983688869940745e-05, | |
| "loss": 0.039, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.008455433824151135, | |
| "grad_norm": 5.737701980201848e-10, | |
| "learning_rate": 1.983602567665299e-05, | |
| "loss": 0.0145, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.008498573792641701, | |
| "grad_norm": 6.553115099450224e-07, | |
| "learning_rate": 1.9835162653898534e-05, | |
| "loss": 0.024, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.00854171376113227, | |
| "grad_norm": 7.23102075994575e-08, | |
| "learning_rate": 1.9834299631144074e-05, | |
| "loss": 0.0458, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.008584853729622836, | |
| "grad_norm": 5.95320443608216e-06, | |
| "learning_rate": 1.9833436608389618e-05, | |
| "loss": 0.0918, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.008627993698113403, | |
| "grad_norm": 7.1469521571998484e-06, | |
| "learning_rate": 1.983257358563516e-05, | |
| "loss": 0.0287, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.00867113366660397, | |
| "grad_norm": 0.00036231454578228295, | |
| "learning_rate": 1.98317105628807e-05, | |
| "loss": 0.0284, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.008714273635094538, | |
| "grad_norm": 8.159648132277653e-05, | |
| "learning_rate": 1.9830847540126244e-05, | |
| "loss": 0.0589, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.008757413603585104, | |
| "grad_norm": 0.0002320503263035789, | |
| "learning_rate": 1.9829984517371788e-05, | |
| "loss": 0.0296, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.00880055357207567, | |
| "grad_norm": 0.001181815518066287, | |
| "learning_rate": 1.982912149461733e-05, | |
| "loss": 0.0244, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.008843693540566237, | |
| "grad_norm": 2.497093198883249e-09, | |
| "learning_rate": 1.982825847186287e-05, | |
| "loss": 0.0337, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.008886833509056806, | |
| "grad_norm": 0.00030890764901414514, | |
| "learning_rate": 1.9827395449108414e-05, | |
| "loss": 0.033, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.008929973477547372, | |
| "grad_norm": 24.577367782592773, | |
| "learning_rate": 1.9826532426353954e-05, | |
| "loss": 0.0135, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.008973113446037939, | |
| "grad_norm": 1.9483505487442017, | |
| "learning_rate": 1.9825669403599498e-05, | |
| "loss": 0.0299, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.009016253414528505, | |
| "grad_norm": 0.0004972516908310354, | |
| "learning_rate": 1.9824806380845038e-05, | |
| "loss": 0.0107, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.009059393383019074, | |
| "grad_norm": 1.4932817649082608e-08, | |
| "learning_rate": 1.982394335809058e-05, | |
| "loss": 0.0567, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.00910253335150964, | |
| "grad_norm": 0.004500082693994045, | |
| "learning_rate": 1.982308033533612e-05, | |
| "loss": 0.0163, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.009145673320000207, | |
| "grad_norm": 6.5830713538161945e-06, | |
| "learning_rate": 1.9822217312581664e-05, | |
| "loss": 0.0359, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.009188813288490773, | |
| "grad_norm": 0.09253023564815521, | |
| "learning_rate": 1.9821354289827207e-05, | |
| "loss": 0.0179, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.009231953256981342, | |
| "grad_norm": 0.004253961145877838, | |
| "learning_rate": 1.982049126707275e-05, | |
| "loss": 0.0433, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.009275093225471908, | |
| "grad_norm": 0.0014189484063535929, | |
| "learning_rate": 1.981962824431829e-05, | |
| "loss": 0.0467, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.009318233193962475, | |
| "grad_norm": 0.0005026152357459068, | |
| "learning_rate": 1.9818765221563834e-05, | |
| "loss": 0.0137, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.009361373162453042, | |
| "grad_norm": 0.003253827104344964, | |
| "learning_rate": 1.9817902198809374e-05, | |
| "loss": 0.0135, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.00940451313094361, | |
| "grad_norm": 0.5753559470176697, | |
| "learning_rate": 1.9817039176054917e-05, | |
| "loss": 0.0707, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.009447653099434176, | |
| "grad_norm": 2.7666785626934143e-06, | |
| "learning_rate": 1.981617615330046e-05, | |
| "loss": 0.0042, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.009490793067924743, | |
| "grad_norm": 0.0010713053634390235, | |
| "learning_rate": 1.9815313130546e-05, | |
| "loss": 0.0248, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.00953393303641531, | |
| "grad_norm": 0.00012337288353592157, | |
| "learning_rate": 1.9814450107791544e-05, | |
| "loss": 0.0101, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.009577073004905878, | |
| "grad_norm": 2.0991153704130738e-08, | |
| "learning_rate": 1.9813587085037084e-05, | |
| "loss": 0.0319, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.009620212973396445, | |
| "grad_norm": 0.0001735202531563118, | |
| "learning_rate": 1.9812724062282627e-05, | |
| "loss": 0.0065, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.009663352941887011, | |
| "grad_norm": 0.0007401935290545225, | |
| "learning_rate": 1.981186103952817e-05, | |
| "loss": 0.0184, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.009706492910377578, | |
| "grad_norm": 5.382436825129844e-07, | |
| "learning_rate": 1.9810998016773714e-05, | |
| "loss": 0.0766, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.009749632878868146, | |
| "grad_norm": 5.5672944654361345e-06, | |
| "learning_rate": 1.9810134994019254e-05, | |
| "loss": 0.0082, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.009792772847358713, | |
| "grad_norm": 2.2267850852131232e-07, | |
| "learning_rate": 1.9809271971264797e-05, | |
| "loss": 0.019, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.00983591281584928, | |
| "grad_norm": 0.23477919399738312, | |
| "learning_rate": 1.9808408948510337e-05, | |
| "loss": 0.0295, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.009879052784339846, | |
| "grad_norm": 9.228908304237393e-09, | |
| "learning_rate": 1.980754592575588e-05, | |
| "loss": 0.0575, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.009922192752830414, | |
| "grad_norm": 0.00020697819127235562, | |
| "learning_rate": 1.980668290300142e-05, | |
| "loss": 0.0269, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.00996533272132098, | |
| "grad_norm": 0.19181561470031738, | |
| "learning_rate": 1.9805819880246964e-05, | |
| "loss": 0.0093, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.010008472689811547, | |
| "grad_norm": 3.362165080034174e-05, | |
| "learning_rate": 1.9804956857492507e-05, | |
| "loss": 0.0373, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.010051612658302114, | |
| "grad_norm": 0.3552068769931793, | |
| "learning_rate": 1.9804093834738047e-05, | |
| "loss": 0.0599, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.010094752626792682, | |
| "grad_norm": 1.6512422007508576e-05, | |
| "learning_rate": 1.980323081198359e-05, | |
| "loss": 0.0255, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.010137892595283249, | |
| "grad_norm": 6.555333614349365, | |
| "learning_rate": 1.9802367789229134e-05, | |
| "loss": 0.0162, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.010181032563773815, | |
| "grad_norm": 2.48828387260437, | |
| "learning_rate": 1.9801504766474674e-05, | |
| "loss": 0.0377, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.010224172532264382, | |
| "grad_norm": 0.005198315717279911, | |
| "learning_rate": 1.9800641743720217e-05, | |
| "loss": 0.0071, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.010267312500754949, | |
| "grad_norm": 0.0014223635662347078, | |
| "learning_rate": 1.979977872096576e-05, | |
| "loss": 0.0286, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.010310452469245517, | |
| "grad_norm": 1.555037556499883e-06, | |
| "learning_rate": 1.97989156982113e-05, | |
| "loss": 0.0429, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.010353592437736083, | |
| "grad_norm": 7.9471330642700195, | |
| "learning_rate": 1.9798052675456844e-05, | |
| "loss": 0.0401, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.01039673240622665, | |
| "grad_norm": 0.0001056401088135317, | |
| "learning_rate": 1.9797189652702384e-05, | |
| "loss": 0.008, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.010439872374717217, | |
| "grad_norm": 3.85151979571674e-05, | |
| "learning_rate": 1.9796326629947927e-05, | |
| "loss": 0.0309, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.010483012343207785, | |
| "grad_norm": 16.898605346679688, | |
| "learning_rate": 1.9795463607193467e-05, | |
| "loss": 0.0234, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.010526152311698352, | |
| "grad_norm": 3.0313758170308347e-09, | |
| "learning_rate": 1.979460058443901e-05, | |
| "loss": 0.0237, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.010569292280188918, | |
| "grad_norm": 0.0001202192361233756, | |
| "learning_rate": 1.9793737561684553e-05, | |
| "loss": 0.026, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.010612432248679485, | |
| "grad_norm": 1.240387376144625e-10, | |
| "learning_rate": 1.9792874538930097e-05, | |
| "loss": 0.0527, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.010655572217170053, | |
| "grad_norm": 11.890090942382812, | |
| "learning_rate": 1.9792011516175637e-05, | |
| "loss": 0.0577, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.01069871218566062, | |
| "grad_norm": 2.300609958183486e-05, | |
| "learning_rate": 1.979114849342118e-05, | |
| "loss": 0.0413, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.010741852154151186, | |
| "grad_norm": 3.607681719586253e-05, | |
| "learning_rate": 1.979028547066672e-05, | |
| "loss": 0.056, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.010784992122641753, | |
| "grad_norm": 0.007184322457760572, | |
| "learning_rate": 1.9789422447912263e-05, | |
| "loss": 0.0356, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.010828132091132321, | |
| "grad_norm": 0.03649460896849632, | |
| "learning_rate": 1.9788559425157807e-05, | |
| "loss": 0.0653, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.010871272059622888, | |
| "grad_norm": 2.2537233235198073e-05, | |
| "learning_rate": 1.9787696402403347e-05, | |
| "loss": 0.0298, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.010914412028113454, | |
| "grad_norm": 0.012440712191164494, | |
| "learning_rate": 1.978683337964889e-05, | |
| "loss": 0.0027, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.010957551996604021, | |
| "grad_norm": 0.0001454145967727527, | |
| "learning_rate": 1.978597035689443e-05, | |
| "loss": 0.0428, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.01100069196509459, | |
| "grad_norm": 4.73512305754209e-11, | |
| "learning_rate": 1.9785107334139973e-05, | |
| "loss": 0.0266, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.011043831933585156, | |
| "grad_norm": 0.4899098873138428, | |
| "learning_rate": 1.9784244311385517e-05, | |
| "loss": 0.0423, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.011086971902075722, | |
| "grad_norm": 3.1542436772724614e-05, | |
| "learning_rate": 1.978338128863106e-05, | |
| "loss": 0.0201, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.011130111870566289, | |
| "grad_norm": 2.1234811242720752e-08, | |
| "learning_rate": 1.97825182658766e-05, | |
| "loss": 0.0602, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.011173251839056857, | |
| "grad_norm": 2.4936113174334196e-09, | |
| "learning_rate": 1.9781655243122143e-05, | |
| "loss": 0.0289, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 0.011216391807547424, | |
| "grad_norm": 7.835155884095002e-07, | |
| "learning_rate": 1.9780792220367683e-05, | |
| "loss": 0.0126, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.01125953177603799, | |
| "grad_norm": 3.1845395369600737e-06, | |
| "learning_rate": 1.9779929197613227e-05, | |
| "loss": 0.0133, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.011302671744528557, | |
| "grad_norm": 6.416823072896705e-09, | |
| "learning_rate": 1.9779066174858766e-05, | |
| "loss": 0.0413, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.011345811713019125, | |
| "grad_norm": 6.80740213394165, | |
| "learning_rate": 1.977820315210431e-05, | |
| "loss": 0.0443, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 0.011388951681509692, | |
| "grad_norm": 0.012771312147378922, | |
| "learning_rate": 1.977734012934985e-05, | |
| "loss": 0.0383, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.011432091650000259, | |
| "grad_norm": 0.0008403750252909958, | |
| "learning_rate": 1.9776477106595393e-05, | |
| "loss": 0.0434, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.011475231618490825, | |
| "grad_norm": 1.2084444761276245, | |
| "learning_rate": 1.9775614083840936e-05, | |
| "loss": 0.0066, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.011518371586981394, | |
| "grad_norm": 9.330961781017777e-09, | |
| "learning_rate": 1.977475106108648e-05, | |
| "loss": 0.0325, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.01156151155547196, | |
| "grad_norm": 0.00011164277384523302, | |
| "learning_rate": 1.977388803833202e-05, | |
| "loss": 0.0956, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.011604651523962527, | |
| "grad_norm": 2.7169560326001374e-06, | |
| "learning_rate": 1.9773025015577563e-05, | |
| "loss": 0.0207, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 0.011647791492453093, | |
| "grad_norm": 0.0006356360972858965, | |
| "learning_rate": 1.9772161992823103e-05, | |
| "loss": 0.0045, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.011690931460943662, | |
| "grad_norm": 6.926347850821912e-05, | |
| "learning_rate": 1.9771298970068646e-05, | |
| "loss": 0.0176, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 0.011734071429434228, | |
| "grad_norm": 0.00017402067896910012, | |
| "learning_rate": 1.977043594731419e-05, | |
| "loss": 0.0284, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.011777211397924795, | |
| "grad_norm": 1.069779334561538e-10, | |
| "learning_rate": 1.976957292455973e-05, | |
| "loss": 0.0292, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.011820351366415361, | |
| "grad_norm": 0.17523643374443054, | |
| "learning_rate": 1.9768709901805273e-05, | |
| "loss": 0.0273, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.01186349133490593, | |
| "grad_norm": 9.821783065795898, | |
| "learning_rate": 1.9767846879050813e-05, | |
| "loss": 0.0706, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.011906631303396496, | |
| "grad_norm": 1.8948287561215693e-07, | |
| "learning_rate": 1.9766983856296356e-05, | |
| "loss": 0.0165, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.011949771271887063, | |
| "grad_norm": 5.998489086778136e-06, | |
| "learning_rate": 1.97661208335419e-05, | |
| "loss": 0.0372, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 0.01199291124037763, | |
| "grad_norm": 1.5009301900863647, | |
| "learning_rate": 1.9765257810787443e-05, | |
| "loss": 0.0387, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.012036051208868198, | |
| "grad_norm": 5.223755650263229e-09, | |
| "learning_rate": 1.9764394788032983e-05, | |
| "loss": 0.0828, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.012079191177358764, | |
| "grad_norm": 0.1192856878042221, | |
| "learning_rate": 1.9763531765278526e-05, | |
| "loss": 0.0286, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.012122331145849331, | |
| "grad_norm": 5.815771601191955e-06, | |
| "learning_rate": 1.9762668742524066e-05, | |
| "loss": 0.0541, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 0.012165471114339898, | |
| "grad_norm": 11.029925346374512, | |
| "learning_rate": 1.976180571976961e-05, | |
| "loss": 0.039, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.012208611082830466, | |
| "grad_norm": 0.00015492299280595034, | |
| "learning_rate": 1.976094269701515e-05, | |
| "loss": 0.0354, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 0.012251751051321032, | |
| "grad_norm": 4.5061292439640965e-06, | |
| "learning_rate": 1.9760079674260693e-05, | |
| "loss": 0.0364, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.012294891019811599, | |
| "grad_norm": 0.45702916383743286, | |
| "learning_rate": 1.9759216651506236e-05, | |
| "loss": 0.0313, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.012338030988302166, | |
| "grad_norm": 1.0066764311034149e-08, | |
| "learning_rate": 1.9758353628751776e-05, | |
| "loss": 0.0344, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.012381170956792734, | |
| "grad_norm": 7.227523610708886e-07, | |
| "learning_rate": 1.975749060599732e-05, | |
| "loss": 0.0351, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 0.0124243109252833, | |
| "grad_norm": 5.080125653478262e-09, | |
| "learning_rate": 1.9756627583242863e-05, | |
| "loss": 0.0136, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.012467450893773867, | |
| "grad_norm": 0.016180645674467087, | |
| "learning_rate": 1.9755764560488403e-05, | |
| "loss": 0.0407, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 0.012510590862264434, | |
| "grad_norm": 0.061310265213251114, | |
| "learning_rate": 1.9754901537733946e-05, | |
| "loss": 0.01, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.012553730830755002, | |
| "grad_norm": 7.248584552144166e-06, | |
| "learning_rate": 1.975403851497949e-05, | |
| "loss": 0.0178, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.012596870799245569, | |
| "grad_norm": 7.203379154205322, | |
| "learning_rate": 1.975317549222503e-05, | |
| "loss": 0.062, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.012640010767736135, | |
| "grad_norm": 1.126842835219577e-05, | |
| "learning_rate": 1.9752312469470573e-05, | |
| "loss": 0.0173, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 0.012683150736226702, | |
| "grad_norm": 0.0011432298924773932, | |
| "learning_rate": 1.9751449446716112e-05, | |
| "loss": 0.0923, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.01272629070471727, | |
| "grad_norm": 1.9043671954932506e-06, | |
| "learning_rate": 1.9750586423961656e-05, | |
| "loss": 0.047, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.012769430673207837, | |
| "grad_norm": 0.20942749083042145, | |
| "learning_rate": 1.9749723401207196e-05, | |
| "loss": 0.0156, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.012812570641698403, | |
| "grad_norm": 1.760947014872727e-07, | |
| "learning_rate": 1.974886037845274e-05, | |
| "loss": 0.0481, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.01285571061018897, | |
| "grad_norm": 2.280950639033108e-06, | |
| "learning_rate": 1.9747997355698282e-05, | |
| "loss": 0.0073, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.012898850578679536, | |
| "grad_norm": 0.019771773368120193, | |
| "learning_rate": 1.9747134332943826e-05, | |
| "loss": 0.0109, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 0.012941990547170105, | |
| "grad_norm": 0.7483711838722229, | |
| "learning_rate": 1.9746271310189366e-05, | |
| "loss": 0.0025, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.012985130515660671, | |
| "grad_norm": 0.0011623813770711422, | |
| "learning_rate": 1.974540828743491e-05, | |
| "loss": 0.0097, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 0.013028270484151238, | |
| "grad_norm": 0.00023206142941489816, | |
| "learning_rate": 1.974454526468045e-05, | |
| "loss": 0.0211, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.013071410452641805, | |
| "grad_norm": 8.044224841796677e-07, | |
| "learning_rate": 1.9743682241925992e-05, | |
| "loss": 0.0678, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.013114550421132373, | |
| "grad_norm": 2.867023241037714e-08, | |
| "learning_rate": 1.9742819219171536e-05, | |
| "loss": 0.0332, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.01315769038962294, | |
| "grad_norm": 3.529981640326696e-08, | |
| "learning_rate": 1.9741956196417076e-05, | |
| "loss": 0.0811, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.013200830358113506, | |
| "grad_norm": 3.617996844695881e-05, | |
| "learning_rate": 1.974109317366262e-05, | |
| "loss": 0.0281, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.013243970326604073, | |
| "grad_norm": 0.0002957701508421451, | |
| "learning_rate": 1.974023015090816e-05, | |
| "loss": 0.0005, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 0.013287110295094641, | |
| "grad_norm": 0.1449277251958847, | |
| "learning_rate": 1.9739367128153702e-05, | |
| "loss": 0.026, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.013330250263585208, | |
| "grad_norm": 3.980770713063464e-10, | |
| "learning_rate": 1.9738504105399246e-05, | |
| "loss": 0.0121, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.013373390232075774, | |
| "grad_norm": 6.5806302629312086e-09, | |
| "learning_rate": 1.973764108264479e-05, | |
| "loss": 0.0131, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.01341653020056634, | |
| "grad_norm": 10.989927291870117, | |
| "learning_rate": 1.973677805989033e-05, | |
| "loss": 0.0375, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 0.013459670169056909, | |
| "grad_norm": 0.028256021440029144, | |
| "learning_rate": 1.9735915037135872e-05, | |
| "loss": 0.005, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.013502810137547476, | |
| "grad_norm": 3.129288234049454e-05, | |
| "learning_rate": 1.9735052014381412e-05, | |
| "loss": 0.0145, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 0.013545950106038042, | |
| "grad_norm": 1.9001586970546214e-09, | |
| "learning_rate": 1.9734188991626955e-05, | |
| "loss": 0.0662, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.013589090074528609, | |
| "grad_norm": 2.575715734565165e-05, | |
| "learning_rate": 1.9733325968872495e-05, | |
| "loss": 0.0169, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.013632230043019177, | |
| "grad_norm": 0.00017570947238709778, | |
| "learning_rate": 1.973246294611804e-05, | |
| "loss": 0.094, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.013675370011509744, | |
| "grad_norm": 2.5118701563187074e-10, | |
| "learning_rate": 1.973159992336358e-05, | |
| "loss": 0.0255, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 0.01371850998000031, | |
| "grad_norm": 4.180213952764689e-09, | |
| "learning_rate": 1.9730736900609122e-05, | |
| "loss": 0.0447, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.013761649948490877, | |
| "grad_norm": 9.289252744792975e-08, | |
| "learning_rate": 1.9729873877854665e-05, | |
| "loss": 0.0102, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.013804789916981445, | |
| "grad_norm": 9.842972659157567e-09, | |
| "learning_rate": 1.972901085510021e-05, | |
| "loss": 0.0263, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.013847929885472012, | |
| "grad_norm": 5.562032222747803, | |
| "learning_rate": 1.972814783234575e-05, | |
| "loss": 0.0495, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.013891069853962578, | |
| "grad_norm": 0.07973814755678177, | |
| "learning_rate": 1.9727284809591292e-05, | |
| "loss": 0.0315, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.013934209822453145, | |
| "grad_norm": 4.2199195604553097e-07, | |
| "learning_rate": 1.9726421786836832e-05, | |
| "loss": 0.0532, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 0.013977349790943713, | |
| "grad_norm": 0.00012909203360322863, | |
| "learning_rate": 1.9725558764082375e-05, | |
| "loss": 0.0331, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.01402048975943428, | |
| "grad_norm": 0.023724447935819626, | |
| "learning_rate": 1.972469574132792e-05, | |
| "loss": 0.0243, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.014063629727924847, | |
| "grad_norm": 7.801064384693746e-06, | |
| "learning_rate": 1.972383271857346e-05, | |
| "loss": 0.0729, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.014106769696415413, | |
| "grad_norm": 0.00012842965952586383, | |
| "learning_rate": 1.9722969695819002e-05, | |
| "loss": 0.0076, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 0.014149909664905981, | |
| "grad_norm": 3.283237148821172e-08, | |
| "learning_rate": 1.9722106673064542e-05, | |
| "loss": 0.0496, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.014193049633396548, | |
| "grad_norm": 5.932063174007851e-10, | |
| "learning_rate": 1.9721243650310085e-05, | |
| "loss": 0.0793, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 0.014236189601887115, | |
| "grad_norm": 0.07802320271730423, | |
| "learning_rate": 1.972038062755563e-05, | |
| "loss": 0.0157, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.014279329570377681, | |
| "grad_norm": 1.4036957907137548e-07, | |
| "learning_rate": 1.9719517604801172e-05, | |
| "loss": 0.0221, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 0.01432246953886825, | |
| "grad_norm": 3.236153389707397e-08, | |
| "learning_rate": 1.9718654582046712e-05, | |
| "loss": 0.0001, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.014365609507358816, | |
| "grad_norm": 3.180664539337158, | |
| "learning_rate": 1.9717791559292255e-05, | |
| "loss": 0.0684, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 0.014408749475849383, | |
| "grad_norm": 6.371417839545757e-06, | |
| "learning_rate": 1.9716928536537795e-05, | |
| "loss": 0.006, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.01445188944433995, | |
| "grad_norm": 6.981757906032726e-06, | |
| "learning_rate": 1.971606551378334e-05, | |
| "loss": 0.0743, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 0.014495029412830518, | |
| "grad_norm": 0.9886574745178223, | |
| "learning_rate": 1.9715202491028878e-05, | |
| "loss": 0.0285, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.014538169381321084, | |
| "grad_norm": 36.159725189208984, | |
| "learning_rate": 1.971433946827442e-05, | |
| "loss": 0.0159, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 0.01458130934981165, | |
| "grad_norm": 4.837416648864746, | |
| "learning_rate": 1.9713476445519965e-05, | |
| "loss": 0.0125, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.014624449318302217, | |
| "grad_norm": 4.346982677816413e-06, | |
| "learning_rate": 1.9712613422765505e-05, | |
| "loss": 0.0152, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 0.014667589286792786, | |
| "grad_norm": 8.429530962139609e-10, | |
| "learning_rate": 1.9711750400011048e-05, | |
| "loss": 0.0035, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.014710729255283352, | |
| "grad_norm": 1.2465453437471297e-05, | |
| "learning_rate": 1.971088737725659e-05, | |
| "loss": 0.0067, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 0.014753869223773919, | |
| "grad_norm": 6.187327699080925e-07, | |
| "learning_rate": 1.971002435450213e-05, | |
| "loss": 0.0159, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.014797009192264485, | |
| "grad_norm": 0.316834419965744, | |
| "learning_rate": 1.9709161331747675e-05, | |
| "loss": 0.0063, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 0.014840149160755054, | |
| "grad_norm": 0.00014671437384095043, | |
| "learning_rate": 1.9708298308993218e-05, | |
| "loss": 0.0784, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.01488328912924562, | |
| "grad_norm": 1.0954934737128497e-08, | |
| "learning_rate": 1.9707435286238758e-05, | |
| "loss": 0.0238, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 0.014926429097736187, | |
| "grad_norm": 0.5361968278884888, | |
| "learning_rate": 1.97065722634843e-05, | |
| "loss": 0.0195, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.014969569066226754, | |
| "grad_norm": 3.5330817699432373, | |
| "learning_rate": 1.970570924072984e-05, | |
| "loss": 0.0445, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 0.015012709034717322, | |
| "grad_norm": 0.0001147388611570932, | |
| "learning_rate": 1.9704846217975385e-05, | |
| "loss": 0.0534, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.015055849003207888, | |
| "grad_norm": 1.4025573237541611e-11, | |
| "learning_rate": 1.9703983195220925e-05, | |
| "loss": 0.0443, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 0.015098988971698455, | |
| "grad_norm": 0.0013008522801101208, | |
| "learning_rate": 1.9703120172466468e-05, | |
| "loss": 0.0301, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.015142128940189022, | |
| "grad_norm": 4.471134662628174, | |
| "learning_rate": 1.970225714971201e-05, | |
| "loss": 0.0301, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 0.01518526890867959, | |
| "grad_norm": 6.183355708344607e-06, | |
| "learning_rate": 1.9701394126957555e-05, | |
| "loss": 0.0369, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.015228408877170157, | |
| "grad_norm": 7.665110751986504e-05, | |
| "learning_rate": 1.9700531104203095e-05, | |
| "loss": 0.0056, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 0.015271548845660723, | |
| "grad_norm": 2.106353521347046, | |
| "learning_rate": 1.9699668081448638e-05, | |
| "loss": 0.0272, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.01531468881415129, | |
| "grad_norm": 3.855154488974222e-07, | |
| "learning_rate": 1.9698805058694178e-05, | |
| "loss": 0.0352, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 0.015357828782641858, | |
| "grad_norm": 17.341279983520508, | |
| "learning_rate": 1.969794203593972e-05, | |
| "loss": 0.0465, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.015400968751132425, | |
| "grad_norm": 9.402146679349244e-05, | |
| "learning_rate": 1.9697079013185265e-05, | |
| "loss": 0.0183, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 0.015444108719622991, | |
| "grad_norm": 0.00015307770809158683, | |
| "learning_rate": 1.9696215990430805e-05, | |
| "loss": 0.0291, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.015487248688113558, | |
| "grad_norm": 3.735563609552628e-07, | |
| "learning_rate": 1.9695352967676348e-05, | |
| "loss": 0.0019, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 0.015530388656604124, | |
| "grad_norm": 5.729863187298179e-05, | |
| "learning_rate": 1.9694489944921888e-05, | |
| "loss": 0.0045, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.015573528625094693, | |
| "grad_norm": 2.0717274562542798e-09, | |
| "learning_rate": 1.969362692216743e-05, | |
| "loss": 0.018, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 0.01561666859358526, | |
| "grad_norm": 12.531591415405273, | |
| "learning_rate": 1.9692763899412974e-05, | |
| "loss": 0.0242, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.015659808562075828, | |
| "grad_norm": 3.573931508071837e-06, | |
| "learning_rate": 1.9691900876658518e-05, | |
| "loss": 0.0556, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 0.015702948530566394, | |
| "grad_norm": 1.851037545463896e-08, | |
| "learning_rate": 1.9691037853904058e-05, | |
| "loss": 0.0141, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.01574608849905696, | |
| "grad_norm": 3.601686694310047e-05, | |
| "learning_rate": 1.96901748311496e-05, | |
| "loss": 0.0541, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 0.015789228467547527, | |
| "grad_norm": 0.05700366199016571, | |
| "learning_rate": 1.968931180839514e-05, | |
| "loss": 0.0286, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.015832368436038094, | |
| "grad_norm": 3.566603901106191e-09, | |
| "learning_rate": 1.9688448785640684e-05, | |
| "loss": 0.0091, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 0.01587550840452866, | |
| "grad_norm": 0.00013142921670805663, | |
| "learning_rate": 1.9687585762886224e-05, | |
| "loss": 0.0254, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.015918648373019227, | |
| "grad_norm": 20.01519775390625, | |
| "learning_rate": 1.9686722740131768e-05, | |
| "loss": 0.0771, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 0.015961788341509794, | |
| "grad_norm": 0.1688498556613922, | |
| "learning_rate": 1.9685859717377308e-05, | |
| "loss": 0.0183, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.016004928310000364, | |
| "grad_norm": 0.030780350789427757, | |
| "learning_rate": 1.968499669462285e-05, | |
| "loss": 0.0251, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 0.01604806827849093, | |
| "grad_norm": 0.002585780341178179, | |
| "learning_rate": 1.9684133671868394e-05, | |
| "loss": 0.0718, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.016091208246981497, | |
| "grad_norm": 4.36324262409471e-05, | |
| "learning_rate": 1.9683270649113938e-05, | |
| "loss": 0.0203, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 0.016134348215472064, | |
| "grad_norm": 1.3234290463515208e-07, | |
| "learning_rate": 1.9682407626359478e-05, | |
| "loss": 0.0136, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.01617748818396263, | |
| "grad_norm": 1.555231143868241e-08, | |
| "learning_rate": 1.968154460360502e-05, | |
| "loss": 0.037, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.016220628152453197, | |
| "grad_norm": 2.4578237116656965e-06, | |
| "learning_rate": 1.968068158085056e-05, | |
| "loss": 0.0045, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.016263768120943763, | |
| "grad_norm": 0.009525042027235031, | |
| "learning_rate": 1.9679818558096104e-05, | |
| "loss": 0.0084, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 0.01630690808943433, | |
| "grad_norm": 22.186767578125, | |
| "learning_rate": 1.9678955535341647e-05, | |
| "loss": 0.0316, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.0163500480579249, | |
| "grad_norm": 6.056162419554312e-06, | |
| "learning_rate": 1.9678092512587187e-05, | |
| "loss": 0.0085, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 0.016393188026415467, | |
| "grad_norm": 1.4418605198684986e-09, | |
| "learning_rate": 1.967722948983273e-05, | |
| "loss": 0.0181, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.016436327994906033, | |
| "grad_norm": 5.71908742585947e-09, | |
| "learning_rate": 1.967636646707827e-05, | |
| "loss": 0.0073, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 0.0164794679633966, | |
| "grad_norm": 6.593646517671914e-09, | |
| "learning_rate": 1.9675503444323817e-05, | |
| "loss": 0.0685, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.016522607931887166, | |
| "grad_norm": 2.7447922229766846, | |
| "learning_rate": 1.9674640421569357e-05, | |
| "loss": 0.0368, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 0.016565747900377733, | |
| "grad_norm": 3.157795136488062e-09, | |
| "learning_rate": 1.96737773988149e-05, | |
| "loss": 0.0653, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.0166088878688683, | |
| "grad_norm": 6.913658580742776e-06, | |
| "learning_rate": 1.967291437606044e-05, | |
| "loss": 0.053, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 0.016652027837358866, | |
| "grad_norm": 3.1019378639030037e-06, | |
| "learning_rate": 1.9672051353305984e-05, | |
| "loss": 0.0392, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.016695167805849436, | |
| "grad_norm": 0.00028862591716460884, | |
| "learning_rate": 1.9671188330551524e-05, | |
| "loss": 0.0031, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 0.016738307774340003, | |
| "grad_norm": 2.975168058583222e-07, | |
| "learning_rate": 1.9670325307797067e-05, | |
| "loss": 0.0142, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.01678144774283057, | |
| "grad_norm": 6.055047379049938e-07, | |
| "learning_rate": 1.9669462285042607e-05, | |
| "loss": 0.0294, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 0.016824587711321136, | |
| "grad_norm": 0.0006536454311572015, | |
| "learning_rate": 1.966859926228815e-05, | |
| "loss": 0.0411, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.016867727679811702, | |
| "grad_norm": 0.0043412791565060616, | |
| "learning_rate": 1.9667736239533694e-05, | |
| "loss": 0.0477, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 0.01691086764830227, | |
| "grad_norm": 0.08467547595500946, | |
| "learning_rate": 1.9666873216779234e-05, | |
| "loss": 0.0041, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.016954007616792836, | |
| "grad_norm": 5.161958824828616e-07, | |
| "learning_rate": 1.9666010194024777e-05, | |
| "loss": 0.0828, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 0.016997147585283402, | |
| "grad_norm": 0.03497151657938957, | |
| "learning_rate": 1.966514717127032e-05, | |
| "loss": 0.0095, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.017040287553773972, | |
| "grad_norm": 0.0004174104833509773, | |
| "learning_rate": 1.966428414851586e-05, | |
| "loss": 0.0206, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 0.01708342752226454, | |
| "grad_norm": 0.00030457283719442785, | |
| "learning_rate": 1.9663421125761404e-05, | |
| "loss": 0.0092, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.017126567490755105, | |
| "grad_norm": 0.0026671765372157097, | |
| "learning_rate": 1.9662558103006947e-05, | |
| "loss": 0.0083, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 0.017169707459245672, | |
| "grad_norm": 20.56145668029785, | |
| "learning_rate": 1.9661695080252487e-05, | |
| "loss": 0.0259, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.01721284742773624, | |
| "grad_norm": 2.7404727006796747e-05, | |
| "learning_rate": 1.966083205749803e-05, | |
| "loss": 0.0165, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 0.017255987396226805, | |
| "grad_norm": 4.9371454480251487e-08, | |
| "learning_rate": 1.965996903474357e-05, | |
| "loss": 0.0121, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.017299127364717372, | |
| "grad_norm": 0.011251527816057205, | |
| "learning_rate": 1.9659106011989114e-05, | |
| "loss": 0.0033, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 0.01734226733320794, | |
| "grad_norm": 3.3487244088803436e-09, | |
| "learning_rate": 1.9658242989234654e-05, | |
| "loss": 0.0319, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.01738540730169851, | |
| "grad_norm": 0.00034460489405319095, | |
| "learning_rate": 1.9657379966480197e-05, | |
| "loss": 0.0215, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 0.017428547270189075, | |
| "grad_norm": 8.861123319547914e-07, | |
| "learning_rate": 1.965651694372574e-05, | |
| "loss": 0.0507, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.01747168723867964, | |
| "grad_norm": 0.0008550824131816626, | |
| "learning_rate": 1.9655653920971284e-05, | |
| "loss": 0.0499, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 0.017514827207170208, | |
| "grad_norm": 1.3901036766128527e-07, | |
| "learning_rate": 1.9654790898216824e-05, | |
| "loss": 0.0462, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.017557967175660775, | |
| "grad_norm": 0.06260337680578232, | |
| "learning_rate": 1.9653927875462367e-05, | |
| "loss": 0.0499, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 0.01760110714415134, | |
| "grad_norm": 1.0717659648662448e-07, | |
| "learning_rate": 1.9653064852707907e-05, | |
| "loss": 0.0155, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.017644247112641908, | |
| "grad_norm": 7.46982475874347e-09, | |
| "learning_rate": 1.965220182995345e-05, | |
| "loss": 0.0605, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 0.017687387081132475, | |
| "grad_norm": 2.092070280923508e-05, | |
| "learning_rate": 1.9651338807198994e-05, | |
| "loss": 0.002, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.017730527049623045, | |
| "grad_norm": 8.422440259892028e-06, | |
| "learning_rate": 1.9650475784444533e-05, | |
| "loss": 0.0041, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 0.01777366701811361, | |
| "grad_norm": 0.2332431972026825, | |
| "learning_rate": 1.9649612761690077e-05, | |
| "loss": 0.004, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.017816806986604178, | |
| "grad_norm": 4.870547076762932e-09, | |
| "learning_rate": 1.9648749738935617e-05, | |
| "loss": 0.0452, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 0.017859946955094744, | |
| "grad_norm": 5.206494506637682e-07, | |
| "learning_rate": 1.964788671618116e-05, | |
| "loss": 0.0045, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.01790308692358531, | |
| "grad_norm": 2.1451814973261207e-06, | |
| "learning_rate": 1.9647023693426703e-05, | |
| "loss": 0.006, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 0.017946226892075878, | |
| "grad_norm": 8.108095244097058e-06, | |
| "learning_rate": 1.9646160670672247e-05, | |
| "loss": 0.0345, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.017989366860566444, | |
| "grad_norm": 0.025016743689775467, | |
| "learning_rate": 1.9645297647917787e-05, | |
| "loss": 0.0532, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 0.01803250682905701, | |
| "grad_norm": 5.400533609645208e-06, | |
| "learning_rate": 1.964443462516333e-05, | |
| "loss": 0.0021, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.018075646797547577, | |
| "grad_norm": 2.8619383556360845e-06, | |
| "learning_rate": 1.964357160240887e-05, | |
| "loss": 0.0362, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 0.018118786766038147, | |
| "grad_norm": 3.4743165969848633, | |
| "learning_rate": 1.9642708579654413e-05, | |
| "loss": 0.0136, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.018161926734528714, | |
| "grad_norm": 59.8224983215332, | |
| "learning_rate": 1.9641845556899953e-05, | |
| "loss": 0.014, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 0.01820506670301928, | |
| "grad_norm": 8.128851186484098e-05, | |
| "learning_rate": 1.9640982534145497e-05, | |
| "loss": 0.0295, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.018248206671509847, | |
| "grad_norm": 3.548375752870925e-05, | |
| "learning_rate": 1.9640119511391037e-05, | |
| "loss": 0.0286, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 0.018291346640000414, | |
| "grad_norm": 0.0468142107129097, | |
| "learning_rate": 1.963925648863658e-05, | |
| "loss": 0.0525, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.01833448660849098, | |
| "grad_norm": 4.863815320277354e-06, | |
| "learning_rate": 1.9638393465882123e-05, | |
| "loss": 0.0063, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 0.018377626576981547, | |
| "grad_norm": 9.059208938566599e-10, | |
| "learning_rate": 1.9637530443127667e-05, | |
| "loss": 0.0217, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.018420766545472114, | |
| "grad_norm": 0.9207327365875244, | |
| "learning_rate": 1.9636667420373206e-05, | |
| "loss": 0.0054, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 0.018463906513962684, | |
| "grad_norm": 0.00036540269502438605, | |
| "learning_rate": 1.963580439761875e-05, | |
| "loss": 0.0188, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.01850704648245325, | |
| "grad_norm": 0.00022348039783537388, | |
| "learning_rate": 1.963494137486429e-05, | |
| "loss": 0.0025, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 0.018550186450943817, | |
| "grad_norm": 0.27767083048820496, | |
| "learning_rate": 1.9634078352109833e-05, | |
| "loss": 0.018, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.018593326419434383, | |
| "grad_norm": 0.022822152823209763, | |
| "learning_rate": 1.9633215329355376e-05, | |
| "loss": 0.0569, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 0.01863646638792495, | |
| "grad_norm": 0.00016692353528924286, | |
| "learning_rate": 1.9632352306600916e-05, | |
| "loss": 0.0227, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.018679606356415517, | |
| "grad_norm": 0.5533714890480042, | |
| "learning_rate": 1.963148928384646e-05, | |
| "loss": 0.0112, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 0.018722746324906083, | |
| "grad_norm": 0.030804995447397232, | |
| "learning_rate": 1.9630626261092e-05, | |
| "loss": 0.0083, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.01876588629339665, | |
| "grad_norm": 1.79214639501879e-05, | |
| "learning_rate": 1.9629763238337546e-05, | |
| "loss": 0.0079, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 0.01880902626188722, | |
| "grad_norm": 1.6093619492618672e-10, | |
| "learning_rate": 1.9628900215583086e-05, | |
| "loss": 0.0156, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.018852166230377786, | |
| "grad_norm": 0.005034497939050198, | |
| "learning_rate": 1.962803719282863e-05, | |
| "loss": 0.0623, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 0.018895306198868353, | |
| "grad_norm": 0.017401648685336113, | |
| "learning_rate": 1.962717417007417e-05, | |
| "loss": 0.0258, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.01893844616735892, | |
| "grad_norm": 2.2319347858428955, | |
| "learning_rate": 1.9626311147319713e-05, | |
| "loss": 0.0201, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 0.018981586135849486, | |
| "grad_norm": 8.550871825718787e-06, | |
| "learning_rate": 1.9625448124565253e-05, | |
| "loss": 0.009, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.019024726104340053, | |
| "grad_norm": 1.8346406704949914e-06, | |
| "learning_rate": 1.9624585101810796e-05, | |
| "loss": 0.0806, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 0.01906786607283062, | |
| "grad_norm": 12.84133243560791, | |
| "learning_rate": 1.9623722079056336e-05, | |
| "loss": 0.0097, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.019111006041321186, | |
| "grad_norm": 9.22921472579219e-09, | |
| "learning_rate": 1.962285905630188e-05, | |
| "loss": 0.0185, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 0.019154146009811756, | |
| "grad_norm": 1.6999269723892212, | |
| "learning_rate": 1.9621996033547423e-05, | |
| "loss": 0.0145, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.019197285978302323, | |
| "grad_norm": 2.9266016483306885, | |
| "learning_rate": 1.9621133010792963e-05, | |
| "loss": 0.0214, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 0.01924042594679289, | |
| "grad_norm": 0.13319005072116852, | |
| "learning_rate": 1.9620269988038506e-05, | |
| "loss": 0.0542, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.019283565915283456, | |
| "grad_norm": 1.2659254934987985e-05, | |
| "learning_rate": 1.961940696528405e-05, | |
| "loss": 0.0059, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 0.019326705883774022, | |
| "grad_norm": 2.33125811064383e-05, | |
| "learning_rate": 1.961854394252959e-05, | |
| "loss": 0.0414, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.01936984585226459, | |
| "grad_norm": 0.008146941661834717, | |
| "learning_rate": 1.9617680919775133e-05, | |
| "loss": 0.0129, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 0.019412985820755155, | |
| "grad_norm": 4.2442545236554e-05, | |
| "learning_rate": 1.9616817897020676e-05, | |
| "loss": 0.0237, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.019456125789245722, | |
| "grad_norm": 6.483288217395966e-08, | |
| "learning_rate": 1.9615954874266216e-05, | |
| "loss": 0.0036, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 0.019499265757736292, | |
| "grad_norm": 0.025942707434296608, | |
| "learning_rate": 1.961509185151176e-05, | |
| "loss": 0.0233, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.01954240572622686, | |
| "grad_norm": 0.004933039657771587, | |
| "learning_rate": 1.96142288287573e-05, | |
| "loss": 0.0279, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 0.019585545694717425, | |
| "grad_norm": 9.285894102262215e-12, | |
| "learning_rate": 1.9613365806002843e-05, | |
| "loss": 0.0137, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.019628685663207992, | |
| "grad_norm": 17.506160736083984, | |
| "learning_rate": 1.9612502783248383e-05, | |
| "loss": 0.0106, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 0.01967182563169856, | |
| "grad_norm": 1.2982255270799214e-07, | |
| "learning_rate": 1.9611639760493926e-05, | |
| "loss": 0.0066, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.019714965600189125, | |
| "grad_norm": 3.575518903176089e-08, | |
| "learning_rate": 1.961077673773947e-05, | |
| "loss": 0.0187, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 0.01975810556867969, | |
| "grad_norm": 0.04352926090359688, | |
| "learning_rate": 1.9609913714985013e-05, | |
| "loss": 0.0127, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.019801245537170258, | |
| "grad_norm": 15.828106880187988, | |
| "learning_rate": 1.9609050692230552e-05, | |
| "loss": 0.0234, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 0.01984438550566083, | |
| "grad_norm": 2.8101124982526926e-08, | |
| "learning_rate": 1.9608187669476096e-05, | |
| "loss": 0.0406, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.019887525474151395, | |
| "grad_norm": 1.5754636478959583e-05, | |
| "learning_rate": 1.9607324646721636e-05, | |
| "loss": 0.013, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 0.01993066544264196, | |
| "grad_norm": 0.00016132810560520738, | |
| "learning_rate": 1.960646162396718e-05, | |
| "loss": 0.0661, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.019973805411132528, | |
| "grad_norm": 0.009830374270677567, | |
| "learning_rate": 1.9605598601212722e-05, | |
| "loss": 0.0001, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 0.020016945379623095, | |
| "grad_norm": 1.5961271415676492e-08, | |
| "learning_rate": 1.9604735578458262e-05, | |
| "loss": 0.0001, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.02006008534811366, | |
| "grad_norm": 0.6032620668411255, | |
| "learning_rate": 1.9603872555703806e-05, | |
| "loss": 0.057, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 0.020103225316604228, | |
| "grad_norm": 0.0007053284207358956, | |
| "learning_rate": 1.9603009532949346e-05, | |
| "loss": 0.0328, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.020146365285094794, | |
| "grad_norm": 0.00022471090778708458, | |
| "learning_rate": 1.960214651019489e-05, | |
| "loss": 0.0176, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 0.020189505253585364, | |
| "grad_norm": 3.784521595662227e-06, | |
| "learning_rate": 1.9601283487440432e-05, | |
| "loss": 0.0342, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.02023264522207593, | |
| "grad_norm": 0.0002926274319179356, | |
| "learning_rate": 1.9600420464685976e-05, | |
| "loss": 0.0622, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 0.020275785190566498, | |
| "grad_norm": 0.0005665869684889913, | |
| "learning_rate": 1.9599557441931516e-05, | |
| "loss": 0.0319, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.020318925159057064, | |
| "grad_norm": 0.0020943868439644575, | |
| "learning_rate": 1.959869441917706e-05, | |
| "loss": 0.0437, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 0.02036206512754763, | |
| "grad_norm": 0.007852623239159584, | |
| "learning_rate": 1.95978313964226e-05, | |
| "loss": 0.0002, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.020405205096038197, | |
| "grad_norm": 1.9628392457962036, | |
| "learning_rate": 1.9596968373668142e-05, | |
| "loss": 0.006, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 0.020448345064528764, | |
| "grad_norm": 2.0241428533296357e-09, | |
| "learning_rate": 1.9596105350913682e-05, | |
| "loss": 0.0491, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.02049148503301933, | |
| "grad_norm": 1.5093628569218254e-09, | |
| "learning_rate": 1.9595242328159226e-05, | |
| "loss": 0.0218, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 0.020534625001509897, | |
| "grad_norm": 0.013457014225423336, | |
| "learning_rate": 1.9594379305404765e-05, | |
| "loss": 0.0246, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.020577764970000467, | |
| "grad_norm": 2.7149107495461067e-07, | |
| "learning_rate": 1.959351628265031e-05, | |
| "loss": 0.0309, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 0.020620904938491034, | |
| "grad_norm": 2.928385534062272e-09, | |
| "learning_rate": 1.9592653259895852e-05, | |
| "loss": 0.0243, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.0206640449069816, | |
| "grad_norm": 0.0007422782364301383, | |
| "learning_rate": 1.9591790237141395e-05, | |
| "loss": 0.0483, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 0.020707184875472167, | |
| "grad_norm": 3.9503233892901335e-06, | |
| "learning_rate": 1.9590927214386935e-05, | |
| "loss": 0.0281, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.020750324843962734, | |
| "grad_norm": 0.07909461110830307, | |
| "learning_rate": 1.959006419163248e-05, | |
| "loss": 0.0137, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 0.0207934648124533, | |
| "grad_norm": 1.3648401853139092e-10, | |
| "learning_rate": 1.958920116887802e-05, | |
| "loss": 0.046, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.020836604780943867, | |
| "grad_norm": 1.791205619383618e-07, | |
| "learning_rate": 1.9588338146123562e-05, | |
| "loss": 0.0303, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 0.020879744749434433, | |
| "grad_norm": 3.758560573885461e-09, | |
| "learning_rate": 1.9587475123369105e-05, | |
| "loss": 0.0029, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.020922884717925003, | |
| "grad_norm": 2.0997137362144258e-10, | |
| "learning_rate": 1.9586612100614645e-05, | |
| "loss": 0.0431, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 0.02096602468641557, | |
| "grad_norm": 4.752119064331055, | |
| "learning_rate": 1.958574907786019e-05, | |
| "loss": 0.0253, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.021009164654906137, | |
| "grad_norm": 0.004993764217942953, | |
| "learning_rate": 1.958488605510573e-05, | |
| "loss": 0.0292, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 0.021052304623396703, | |
| "grad_norm": 1.2806524729569446e-09, | |
| "learning_rate": 1.9584023032351275e-05, | |
| "loss": 0.0538, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.02109544459188727, | |
| "grad_norm": 6.973591126779866e-08, | |
| "learning_rate": 1.9583160009596815e-05, | |
| "loss": 0.0272, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 0.021138584560377836, | |
| "grad_norm": 0.042537808418273926, | |
| "learning_rate": 1.958229698684236e-05, | |
| "loss": 0.046, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.021181724528868403, | |
| "grad_norm": 0.0006602337816730142, | |
| "learning_rate": 1.95814339640879e-05, | |
| "loss": 0.024, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 0.02122486449735897, | |
| "grad_norm": 22.432666778564453, | |
| "learning_rate": 1.9580570941333442e-05, | |
| "loss": 0.0484, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.02126800446584954, | |
| "grad_norm": 0.024881912395358086, | |
| "learning_rate": 1.9579707918578982e-05, | |
| "loss": 0.0061, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 0.021311144434340106, | |
| "grad_norm": 9.876566764432937e-06, | |
| "learning_rate": 1.9578844895824525e-05, | |
| "loss": 0.033, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.021354284402830673, | |
| "grad_norm": 1.04228820418939e-05, | |
| "learning_rate": 1.9577981873070065e-05, | |
| "loss": 0.0246, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 0.02139742437132124, | |
| "grad_norm": 4.033939262626518e-07, | |
| "learning_rate": 1.957711885031561e-05, | |
| "loss": 0.0273, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.021440564339811806, | |
| "grad_norm": 1.8699473002925515e-05, | |
| "learning_rate": 1.9576255827561152e-05, | |
| "loss": 0.0404, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 0.021483704308302373, | |
| "grad_norm": 7.583350480899753e-08, | |
| "learning_rate": 1.957539280480669e-05, | |
| "loss": 0.0265, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.02152684427679294, | |
| "grad_norm": 0.02612815983593464, | |
| "learning_rate": 1.9574529782052235e-05, | |
| "loss": 0.0219, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 0.021569984245283506, | |
| "grad_norm": 5.127071176502795e-07, | |
| "learning_rate": 1.957366675929778e-05, | |
| "loss": 0.0609, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.021613124213774076, | |
| "grad_norm": 0.00036468004691414535, | |
| "learning_rate": 1.957280373654332e-05, | |
| "loss": 0.0173, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 0.021656264182264642, | |
| "grad_norm": 4.805618573300308e-06, | |
| "learning_rate": 1.957194071378886e-05, | |
| "loss": 0.0478, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.02169940415075521, | |
| "grad_norm": 0.003498099045827985, | |
| "learning_rate": 1.9571077691034405e-05, | |
| "loss": 0.0422, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 0.021742544119245776, | |
| "grad_norm": 3.893982466252055e-06, | |
| "learning_rate": 1.9570214668279945e-05, | |
| "loss": 0.024, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.021785684087736342, | |
| "grad_norm": 6.174719402451956e-08, | |
| "learning_rate": 1.9569351645525488e-05, | |
| "loss": 0.0448, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 0.02182882405622691, | |
| "grad_norm": 0.8544023633003235, | |
| "learning_rate": 1.9568488622771028e-05, | |
| "loss": 0.009, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.021871964024717475, | |
| "grad_norm": 1.8829781822660152e-07, | |
| "learning_rate": 1.956762560001657e-05, | |
| "loss": 0.0059, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 0.021915103993208042, | |
| "grad_norm": 1.7753800420905463e-06, | |
| "learning_rate": 1.956676257726211e-05, | |
| "loss": 0.0614, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.021958243961698612, | |
| "grad_norm": 2.652618924514627e-08, | |
| "learning_rate": 1.9565899554507655e-05, | |
| "loss": 0.0184, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 0.02200138393018918, | |
| "grad_norm": 0.33340388536453247, | |
| "learning_rate": 1.9565036531753198e-05, | |
| "loss": 0.0166, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.022044523898679745, | |
| "grad_norm": 0.40569502115249634, | |
| "learning_rate": 1.956417350899874e-05, | |
| "loss": 0.0179, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 0.02208766386717031, | |
| "grad_norm": 0.00011573725350899622, | |
| "learning_rate": 1.956331048624428e-05, | |
| "loss": 0.0006, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.02213080383566088, | |
| "grad_norm": 2.554327238613041e-06, | |
| "learning_rate": 1.9562447463489825e-05, | |
| "loss": 0.0402, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 0.022173943804151445, | |
| "grad_norm": 8.304319010221661e-08, | |
| "learning_rate": 1.9561584440735365e-05, | |
| "loss": 0.0363, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.02221708377264201, | |
| "grad_norm": 1.8539299873054915e-08, | |
| "learning_rate": 1.9560721417980908e-05, | |
| "loss": 0.0042, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 0.022260223741132578, | |
| "grad_norm": 0.043552886694669724, | |
| "learning_rate": 1.955985839522645e-05, | |
| "loss": 0.0358, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.022303363709623148, | |
| "grad_norm": 0.00025480103795416653, | |
| "learning_rate": 1.955899537247199e-05, | |
| "loss": 0.0349, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 0.022346503678113715, | |
| "grad_norm": 0.0006263578543439507, | |
| "learning_rate": 1.9558132349717535e-05, | |
| "loss": 0.0184, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.02238964364660428, | |
| "grad_norm": 2.677586793899536, | |
| "learning_rate": 1.9557269326963075e-05, | |
| "loss": 0.0667, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 0.022432783615094848, | |
| "grad_norm": 0.6284056305885315, | |
| "learning_rate": 1.9556406304208618e-05, | |
| "loss": 0.0061, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.022475923583585414, | |
| "grad_norm": 0.01573588326573372, | |
| "learning_rate": 1.955554328145416e-05, | |
| "loss": 0.0515, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 0.02251906355207598, | |
| "grad_norm": 9.318134289060254e-06, | |
| "learning_rate": 1.9554680258699705e-05, | |
| "loss": 0.0231, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.022562203520566548, | |
| "grad_norm": 3.892751294642949e-07, | |
| "learning_rate": 1.9553817235945245e-05, | |
| "loss": 0.004, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 0.022605343489057114, | |
| "grad_norm": 0.0010842111660167575, | |
| "learning_rate": 1.9552954213190788e-05, | |
| "loss": 0.0568, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.022648483457547684, | |
| "grad_norm": 0.021115347743034363, | |
| "learning_rate": 1.9552091190436328e-05, | |
| "loss": 0.0711, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 0.02269162342603825, | |
| "grad_norm": 0.07015379518270493, | |
| "learning_rate": 1.955122816768187e-05, | |
| "loss": 0.0305, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.022734763394528817, | |
| "grad_norm": 3.8024263631086797e-05, | |
| "learning_rate": 1.955036514492741e-05, | |
| "loss": 0.0309, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 0.022777903363019384, | |
| "grad_norm": 0.0043113697320222855, | |
| "learning_rate": 1.9549502122172954e-05, | |
| "loss": 0.0066, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.02282104333150995, | |
| "grad_norm": 0.007588895037770271, | |
| "learning_rate": 1.9548639099418494e-05, | |
| "loss": 0.0242, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 0.022864183300000517, | |
| "grad_norm": 1.8674474954605103, | |
| "learning_rate": 1.9547776076664038e-05, | |
| "loss": 0.0163, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.022907323268491084, | |
| "grad_norm": 4.954452991485596, | |
| "learning_rate": 1.954691305390958e-05, | |
| "loss": 0.0368, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 0.02295046323698165, | |
| "grad_norm": 0.0024081666488200426, | |
| "learning_rate": 1.9546050031155124e-05, | |
| "loss": 0.0255, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.02299360320547222, | |
| "grad_norm": 0.4166341722011566, | |
| "learning_rate": 1.9545187008400664e-05, | |
| "loss": 0.0331, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 0.023036743173962787, | |
| "grad_norm": 0.00036967426422052085, | |
| "learning_rate": 1.9544323985646208e-05, | |
| "loss": 0.0282, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.023079883142453354, | |
| "grad_norm": 1.1294196688993452e-08, | |
| "learning_rate": 1.954346096289175e-05, | |
| "loss": 0.0293, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 0.02312302311094392, | |
| "grad_norm": 24.33706283569336, | |
| "learning_rate": 1.954259794013729e-05, | |
| "loss": 0.0475, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.023166163079434487, | |
| "grad_norm": 1.3493994366342577e-08, | |
| "learning_rate": 1.9541734917382834e-05, | |
| "loss": 0.0045, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 0.023209303047925053, | |
| "grad_norm": 6.673410098301247e-05, | |
| "learning_rate": 1.9540871894628374e-05, | |
| "loss": 0.0059, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.02325244301641562, | |
| "grad_norm": 0.0014361342182382941, | |
| "learning_rate": 1.9540008871873918e-05, | |
| "loss": 0.0002, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 0.023295582984906187, | |
| "grad_norm": 3.2534658908843994, | |
| "learning_rate": 1.9539145849119458e-05, | |
| "loss": 0.0329, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.023338722953396753, | |
| "grad_norm": 0.0029180857818573713, | |
| "learning_rate": 1.9538282826365004e-05, | |
| "loss": 0.0007, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 0.023381862921887323, | |
| "grad_norm": 7.010048866271973, | |
| "learning_rate": 1.9537419803610544e-05, | |
| "loss": 0.0473, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.02342500289037789, | |
| "grad_norm": 0.5129420757293701, | |
| "learning_rate": 1.9536556780856088e-05, | |
| "loss": 0.0312, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 0.023468142858868456, | |
| "grad_norm": 0.008801298215985298, | |
| "learning_rate": 1.9535693758101627e-05, | |
| "loss": 0.0025, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.023511282827359023, | |
| "grad_norm": 7.381456001986919e-10, | |
| "learning_rate": 1.953483073534717e-05, | |
| "loss": 0.0399, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 0.02355442279584959, | |
| "grad_norm": 0.015433188527822495, | |
| "learning_rate": 1.953396771259271e-05, | |
| "loss": 0.0248, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.023597562764340156, | |
| "grad_norm": 3.086728572845459, | |
| "learning_rate": 1.9533104689838254e-05, | |
| "loss": 0.0124, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 0.023640702732830723, | |
| "grad_norm": 5.318460255532287e-11, | |
| "learning_rate": 1.9532241667083794e-05, | |
| "loss": 0.0259, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.02368384270132129, | |
| "grad_norm": 0.0015008836053311825, | |
| "learning_rate": 1.9531378644329337e-05, | |
| "loss": 0.0128, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 0.02372698266981186, | |
| "grad_norm": 4.6280136302812025e-05, | |
| "learning_rate": 1.953051562157488e-05, | |
| "loss": 0.0005, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.023770122638302426, | |
| "grad_norm": 4.795760560227791e-06, | |
| "learning_rate": 1.952965259882042e-05, | |
| "loss": 0.0155, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 0.023813262606792993, | |
| "grad_norm": 0.19684414565563202, | |
| "learning_rate": 1.9528789576065964e-05, | |
| "loss": 0.0042, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.02385640257528356, | |
| "grad_norm": 1.0629539559658951e-07, | |
| "learning_rate": 1.9527926553311507e-05, | |
| "loss": 0.0097, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 0.023899542543774126, | |
| "grad_norm": 1.161576043684498e-11, | |
| "learning_rate": 1.9527063530557047e-05, | |
| "loss": 0.0009, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.023942682512264692, | |
| "grad_norm": 1.7004417318666754e-10, | |
| "learning_rate": 1.952620050780259e-05, | |
| "loss": 0.0324, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 0.02398582248075526, | |
| "grad_norm": 3.243289393140003e-05, | |
| "learning_rate": 1.9525337485048134e-05, | |
| "loss": 0.0768, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.024028962449245826, | |
| "grad_norm": 0.0029646342154592276, | |
| "learning_rate": 1.9524474462293674e-05, | |
| "loss": 0.0142, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 0.024072102417736396, | |
| "grad_norm": 0.0012051378143951297, | |
| "learning_rate": 1.9523611439539217e-05, | |
| "loss": 0.0147, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.024115242386226962, | |
| "grad_norm": 1.3464485164149664e-05, | |
| "learning_rate": 1.9522748416784757e-05, | |
| "loss": 0.0023, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 0.02415838235471753, | |
| "grad_norm": 0.0002646016946528107, | |
| "learning_rate": 1.95218853940303e-05, | |
| "loss": 0.0098, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.024201522323208095, | |
| "grad_norm": 6.006689727655612e-05, | |
| "learning_rate": 1.952102237127584e-05, | |
| "loss": 0.0052, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 0.024244662291698662, | |
| "grad_norm": 31.13625717163086, | |
| "learning_rate": 1.9520159348521384e-05, | |
| "loss": 0.0298, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.02428780226018923, | |
| "grad_norm": 0.00010399877646705136, | |
| "learning_rate": 1.9519296325766927e-05, | |
| "loss": 0.0512, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 0.024330942228679795, | |
| "grad_norm": 9.850235755948233e-07, | |
| "learning_rate": 1.951843330301247e-05, | |
| "loss": 0.052, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.02437408219717036, | |
| "grad_norm": 3.698731597978622e-05, | |
| "learning_rate": 1.951757028025801e-05, | |
| "loss": 0.0017, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 0.02441722216566093, | |
| "grad_norm": 0.04309392347931862, | |
| "learning_rate": 1.9516707257503554e-05, | |
| "loss": 0.0385, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.0244603621341515, | |
| "grad_norm": 9.081038115255069e-06, | |
| "learning_rate": 1.9515844234749094e-05, | |
| "loss": 0.0418, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 0.024503502102642065, | |
| "grad_norm": 0.48385998606681824, | |
| "learning_rate": 1.9514981211994637e-05, | |
| "loss": 0.0207, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.02454664207113263, | |
| "grad_norm": 1.9165490527939255e-07, | |
| "learning_rate": 1.951411818924018e-05, | |
| "loss": 0.0206, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 0.024589782039623198, | |
| "grad_norm": 1.4679693776997738e-05, | |
| "learning_rate": 1.951325516648572e-05, | |
| "loss": 0.0346, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.024632922008113765, | |
| "grad_norm": 0.11278124898672104, | |
| "learning_rate": 1.9512392143731264e-05, | |
| "loss": 0.0081, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 0.02467606197660433, | |
| "grad_norm": 9.307966024607595e-07, | |
| "learning_rate": 1.9511529120976804e-05, | |
| "loss": 0.012, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.024719201945094898, | |
| "grad_norm": 0.00027512782253324986, | |
| "learning_rate": 1.9510666098222347e-05, | |
| "loss": 0.0032, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 0.024762341913585468, | |
| "grad_norm": 0.11172260344028473, | |
| "learning_rate": 1.950980307546789e-05, | |
| "loss": 0.0032, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.024805481882076034, | |
| "grad_norm": 2.1106679923832417e-06, | |
| "learning_rate": 1.9508940052713434e-05, | |
| "loss": 0.0339, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 0.0248486218505666, | |
| "grad_norm": 0.028800344094634056, | |
| "learning_rate": 1.9508077029958973e-05, | |
| "loss": 0.0278, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.024891761819057168, | |
| "grad_norm": 1.757417521730531e-06, | |
| "learning_rate": 1.9507214007204517e-05, | |
| "loss": 0.0164, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 0.024934901787547734, | |
| "grad_norm": 4.451398893934311e-08, | |
| "learning_rate": 1.9506350984450057e-05, | |
| "loss": 0.009, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.0249780417560383, | |
| "grad_norm": 4.7023010552038613e-07, | |
| "learning_rate": 1.95054879616956e-05, | |
| "loss": 0.01, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 0.025021181724528867, | |
| "grad_norm": 0.3000449538230896, | |
| "learning_rate": 1.950462493894114e-05, | |
| "loss": 0.0208, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.025064321693019434, | |
| "grad_norm": 2.5534254746162333e-06, | |
| "learning_rate": 1.9503761916186683e-05, | |
| "loss": 0.0113, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 0.025107461661510004, | |
| "grad_norm": 1.051041209620962e-07, | |
| "learning_rate": 1.9502898893432223e-05, | |
| "loss": 0.0286, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.02515060163000057, | |
| "grad_norm": 0.11379561573266983, | |
| "learning_rate": 1.9502035870677767e-05, | |
| "loss": 0.0273, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 0.025193741598491137, | |
| "grad_norm": 4.075488391208637e-09, | |
| "learning_rate": 1.950117284792331e-05, | |
| "loss": 0.001, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.025236881566981704, | |
| "grad_norm": 6.561435283991557e-10, | |
| "learning_rate": 1.9500309825168853e-05, | |
| "loss": 0.0002, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 0.02528002153547227, | |
| "grad_norm": 2.523017644882202, | |
| "learning_rate": 1.9499446802414393e-05, | |
| "loss": 0.0078, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.025323161503962837, | |
| "grad_norm": 1.13604746729834e-05, | |
| "learning_rate": 1.9498583779659937e-05, | |
| "loss": 0.0009, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 0.025366301472453404, | |
| "grad_norm": 0.00017209288489539176, | |
| "learning_rate": 1.949772075690548e-05, | |
| "loss": 0.0157, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.02540944144094397, | |
| "grad_norm": 0.00011601659207371995, | |
| "learning_rate": 1.949685773415102e-05, | |
| "loss": 0.045, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 0.02545258140943454, | |
| "grad_norm": 22.94985580444336, | |
| "learning_rate": 1.9495994711396563e-05, | |
| "loss": 0.0269, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.025495721377925107, | |
| "grad_norm": 2.782198776918321e-11, | |
| "learning_rate": 1.9495131688642103e-05, | |
| "loss": 0.0025, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 0.025538861346415673, | |
| "grad_norm": 2.155955371563323e-05, | |
| "learning_rate": 1.9494268665887646e-05, | |
| "loss": 0.0452, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.02558200131490624, | |
| "grad_norm": 8.12989310361445e-06, | |
| "learning_rate": 1.9493405643133186e-05, | |
| "loss": 0.019, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 0.025625141283396807, | |
| "grad_norm": 0.000956275500357151, | |
| "learning_rate": 1.9492542620378733e-05, | |
| "loss": 0.0122, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.025668281251887373, | |
| "grad_norm": 3.834348838438473e-09, | |
| "learning_rate": 1.9491679597624273e-05, | |
| "loss": 0.0255, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 0.02571142122037794, | |
| "grad_norm": 0.16173326969146729, | |
| "learning_rate": 1.9490816574869816e-05, | |
| "loss": 0.0382, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.025754561188868506, | |
| "grad_norm": 0.0008912076009437442, | |
| "learning_rate": 1.9489953552115356e-05, | |
| "loss": 0.029, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 0.025797701157359073, | |
| "grad_norm": 1.517190213462527e-07, | |
| "learning_rate": 1.94890905293609e-05, | |
| "loss": 0.0413, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.025840841125849643, | |
| "grad_norm": 6.658311946239337e-08, | |
| "learning_rate": 1.948822750660644e-05, | |
| "loss": 0.0142, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 0.02588398109434021, | |
| "grad_norm": 0.0003508856752887368, | |
| "learning_rate": 1.9487364483851983e-05, | |
| "loss": 0.0079, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.025927121062830776, | |
| "grad_norm": 0.026366397738456726, | |
| "learning_rate": 1.9486501461097523e-05, | |
| "loss": 0.0381, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 0.025970261031321343, | |
| "grad_norm": 5.6284894943237305, | |
| "learning_rate": 1.9485638438343066e-05, | |
| "loss": 0.0618, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.02601340099981191, | |
| "grad_norm": 0.00824633240699768, | |
| "learning_rate": 1.948477541558861e-05, | |
| "loss": 0.0173, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 0.026056540968302476, | |
| "grad_norm": 0.0007174229249358177, | |
| "learning_rate": 1.948391239283415e-05, | |
| "loss": 0.0199, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.026099680936793043, | |
| "grad_norm": 0.02488381415605545, | |
| "learning_rate": 1.9483049370079693e-05, | |
| "loss": 0.0312, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 0.02614282090528361, | |
| "grad_norm": 1.9344063997268677, | |
| "learning_rate": 1.9482186347325236e-05, | |
| "loss": 0.0357, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.02618596087377418, | |
| "grad_norm": 3.485973834991455, | |
| "learning_rate": 1.9481323324570776e-05, | |
| "loss": 0.0266, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 0.026229100842264746, | |
| "grad_norm": 6.07471008606808e-07, | |
| "learning_rate": 1.948046030181632e-05, | |
| "loss": 0.0308, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.026272240810755312, | |
| "grad_norm": 7.532801760135044e-08, | |
| "learning_rate": 1.9479597279061863e-05, | |
| "loss": 0.047, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 0.02631538077924588, | |
| "grad_norm": 0.0005202541360631585, | |
| "learning_rate": 1.9478734256307403e-05, | |
| "loss": 0.0239, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.026358520747736446, | |
| "grad_norm": 26.940954208374023, | |
| "learning_rate": 1.9477871233552946e-05, | |
| "loss": 0.0709, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 0.026401660716227012, | |
| "grad_norm": 7.362630470575393e-12, | |
| "learning_rate": 1.9477008210798486e-05, | |
| "loss": 0.0117, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.02644480068471758, | |
| "grad_norm": 1.2002854418824427e-05, | |
| "learning_rate": 1.947614518804403e-05, | |
| "loss": 0.0392, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 0.026487940653208145, | |
| "grad_norm": 0.4743211269378662, | |
| "learning_rate": 1.947528216528957e-05, | |
| "loss": 0.0461, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.026531080621698715, | |
| "grad_norm": 5.520277568393794e-10, | |
| "learning_rate": 1.9474419142535113e-05, | |
| "loss": 0.002, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 0.026574220590189282, | |
| "grad_norm": 5.655643420254819e-08, | |
| "learning_rate": 1.9473556119780656e-05, | |
| "loss": 0.002, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.02661736055867985, | |
| "grad_norm": 3.0585747481381986e-06, | |
| "learning_rate": 1.94726930970262e-05, | |
| "loss": 0.0208, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 0.026660500527170415, | |
| "grad_norm": 0.00038789489190094173, | |
| "learning_rate": 1.947183007427174e-05, | |
| "loss": 0.0221, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.02670364049566098, | |
| "grad_norm": 0.006069442722946405, | |
| "learning_rate": 1.9470967051517283e-05, | |
| "loss": 0.0332, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 0.02674678046415155, | |
| "grad_norm": 2.1992854204455625e-09, | |
| "learning_rate": 1.9470104028762823e-05, | |
| "loss": 0.0133, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.026789920432642115, | |
| "grad_norm": 0.0005120674031786621, | |
| "learning_rate": 1.9469241006008366e-05, | |
| "loss": 0.0549, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 0.02683306040113268, | |
| "grad_norm": 3.589123298297636e-05, | |
| "learning_rate": 1.946837798325391e-05, | |
| "loss": 0.0172, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.02687620036962325, | |
| "grad_norm": 4.615823812059716e-08, | |
| "learning_rate": 1.946751496049945e-05, | |
| "loss": 0.013, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 0.026919340338113818, | |
| "grad_norm": 7.231820475794848e-09, | |
| "learning_rate": 1.9466651937744993e-05, | |
| "loss": 0.0037, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.026962480306604385, | |
| "grad_norm": 5.052131157867734e-09, | |
| "learning_rate": 1.9465788914990532e-05, | |
| "loss": 0.0491, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 0.02700562027509495, | |
| "grad_norm": 0.00010309406206943095, | |
| "learning_rate": 1.9464925892236076e-05, | |
| "loss": 0.0028, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.027048760243585518, | |
| "grad_norm": 5.6031745771178976e-05, | |
| "learning_rate": 1.946406286948162e-05, | |
| "loss": 0.015, | |
| "step": 31350 | |
| }, | |
| { | |
| "epoch": 0.027091900212076084, | |
| "grad_norm": 0.00024476449470967054, | |
| "learning_rate": 1.9463199846727162e-05, | |
| "loss": 0.0154, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.02713504018056665, | |
| "grad_norm": 2.0063467331965512e-07, | |
| "learning_rate": 1.9462336823972702e-05, | |
| "loss": 0.0212, | |
| "step": 31450 | |
| }, | |
| { | |
| "epoch": 0.027178180149057218, | |
| "grad_norm": 6.659844075329602e-05, | |
| "learning_rate": 1.9461473801218246e-05, | |
| "loss": 0.0216, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.027221320117547788, | |
| "grad_norm": 4.053091470268555e-05, | |
| "learning_rate": 1.9460610778463786e-05, | |
| "loss": 0.026, | |
| "step": 31550 | |
| }, | |
| { | |
| "epoch": 0.027264460086038354, | |
| "grad_norm": 2.6744512382492758e-08, | |
| "learning_rate": 1.945974775570933e-05, | |
| "loss": 0.0284, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.02730760005452892, | |
| "grad_norm": 0.1950395703315735, | |
| "learning_rate": 1.945888473295487e-05, | |
| "loss": 0.0064, | |
| "step": 31650 | |
| }, | |
| { | |
| "epoch": 0.027350740023019487, | |
| "grad_norm": 41.71430587768555, | |
| "learning_rate": 1.9458021710200412e-05, | |
| "loss": 0.0379, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.027393879991510054, | |
| "grad_norm": 2.8257717943347416e-08, | |
| "learning_rate": 1.9457158687445952e-05, | |
| "loss": 0.0263, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 0.02743701996000062, | |
| "grad_norm": 0.002763712080195546, | |
| "learning_rate": 1.9456295664691496e-05, | |
| "loss": 0.0189, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.027480159928491187, | |
| "grad_norm": 1.0972726061098115e-09, | |
| "learning_rate": 1.945543264193704e-05, | |
| "loss": 0.0156, | |
| "step": 31850 | |
| }, | |
| { | |
| "epoch": 0.027523299896981754, | |
| "grad_norm": 0.0012834984809160233, | |
| "learning_rate": 1.9454569619182582e-05, | |
| "loss": 0.0076, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.027566439865472324, | |
| "grad_norm": 2.497445628080186e-08, | |
| "learning_rate": 1.9453706596428122e-05, | |
| "loss": 0.0209, | |
| "step": 31950 | |
| }, | |
| { | |
| "epoch": 0.02760957983396289, | |
| "grad_norm": 23.704517364501953, | |
| "learning_rate": 1.9452843573673666e-05, | |
| "loss": 0.0603, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.027652719802453457, | |
| "grad_norm": 0.0009068456711247563, | |
| "learning_rate": 1.945198055091921e-05, | |
| "loss": 0.035, | |
| "step": 32050 | |
| }, | |
| { | |
| "epoch": 0.027695859770944024, | |
| "grad_norm": 5.298162460327148, | |
| "learning_rate": 1.945111752816475e-05, | |
| "loss": 0.0053, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.02773899973943459, | |
| "grad_norm": 0.017380917444825172, | |
| "learning_rate": 1.9450254505410292e-05, | |
| "loss": 0.0471, | |
| "step": 32150 | |
| }, | |
| { | |
| "epoch": 0.027782139707925157, | |
| "grad_norm": 0.02581915073096752, | |
| "learning_rate": 1.9449391482655832e-05, | |
| "loss": 0.0396, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.027825279676415723, | |
| "grad_norm": 1.437704066908907e-09, | |
| "learning_rate": 1.9448528459901375e-05, | |
| "loss": 0.0283, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 0.02786841964490629, | |
| "grad_norm": 1.0882466483508324e-08, | |
| "learning_rate": 1.9447665437146915e-05, | |
| "loss": 0.0066, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.02791155961339686, | |
| "grad_norm": 5.027173122229556e-11, | |
| "learning_rate": 1.9446802414392462e-05, | |
| "loss": 0.0082, | |
| "step": 32350 | |
| }, | |
| { | |
| "epoch": 0.027954699581887427, | |
| "grad_norm": 4.071168899536133, | |
| "learning_rate": 1.9445939391638002e-05, | |
| "loss": 0.0217, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.027997839550377993, | |
| "grad_norm": 0.0017136979149654508, | |
| "learning_rate": 1.9445076368883545e-05, | |
| "loss": 0.0665, | |
| "step": 32450 | |
| }, | |
| { | |
| "epoch": 0.02804097951886856, | |
| "grad_norm": 1.7071112301536573e-09, | |
| "learning_rate": 1.9444213346129085e-05, | |
| "loss": 0.0283, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.028084119487359126, | |
| "grad_norm": 2.8745741897928667e-10, | |
| "learning_rate": 1.944335032337463e-05, | |
| "loss": 0.0165, | |
| "step": 32550 | |
| }, | |
| { | |
| "epoch": 0.028127259455849693, | |
| "grad_norm": 0.06553611904382706, | |
| "learning_rate": 1.944248730062017e-05, | |
| "loss": 0.0039, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.02817039942434026, | |
| "grad_norm": 1.2114237506466452e-05, | |
| "learning_rate": 1.9441624277865712e-05, | |
| "loss": 0.0053, | |
| "step": 32650 | |
| }, | |
| { | |
| "epoch": 0.028213539392830826, | |
| "grad_norm": 5.977819910185644e-06, | |
| "learning_rate": 1.9440761255111252e-05, | |
| "loss": 0.016, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.028256679361321393, | |
| "grad_norm": 0.00414885301142931, | |
| "learning_rate": 1.9439898232356795e-05, | |
| "loss": 0.0064, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 0.028299819329811963, | |
| "grad_norm": 0.001667422242462635, | |
| "learning_rate": 1.943903520960234e-05, | |
| "loss": 0.0013, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.02834295929830253, | |
| "grad_norm": 1.7196412045450415e-06, | |
| "learning_rate": 1.943817218684788e-05, | |
| "loss": 0.0022, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 0.028386099266793096, | |
| "grad_norm": 4.220390792397666e-07, | |
| "learning_rate": 1.9437309164093422e-05, | |
| "loss": 0.0278, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.028429239235283663, | |
| "grad_norm": 8.6249691833018e-09, | |
| "learning_rate": 1.9436446141338965e-05, | |
| "loss": 0.0155, | |
| "step": 32950 | |
| }, | |
| { | |
| "epoch": 0.02847237920377423, | |
| "grad_norm": 21.435453414916992, | |
| "learning_rate": 1.9435583118584505e-05, | |
| "loss": 0.0234, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.028515519172264796, | |
| "grad_norm": 9.135671461990569e-06, | |
| "learning_rate": 1.943472009583005e-05, | |
| "loss": 0.0028, | |
| "step": 33050 | |
| }, | |
| { | |
| "epoch": 0.028558659140755362, | |
| "grad_norm": 1.085790088950489e-07, | |
| "learning_rate": 1.9433857073075592e-05, | |
| "loss": 0.0189, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.02860179910924593, | |
| "grad_norm": 1.0733113288879395, | |
| "learning_rate": 1.9432994050321132e-05, | |
| "loss": 0.0188, | |
| "step": 33150 | |
| }, | |
| { | |
| "epoch": 0.0286449390777365, | |
| "grad_norm": 5.325038046066766e-07, | |
| "learning_rate": 1.9432131027566675e-05, | |
| "loss": 0.0025, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.028688079046227066, | |
| "grad_norm": 0.001730454503558576, | |
| "learning_rate": 1.9431268004812215e-05, | |
| "loss": 0.0429, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 0.028731219014717632, | |
| "grad_norm": 0.03524341806769371, | |
| "learning_rate": 1.943040498205776e-05, | |
| "loss": 0.0147, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.0287743589832082, | |
| "grad_norm": 8.027368769703003e-10, | |
| "learning_rate": 1.9429541959303298e-05, | |
| "loss": 0.0074, | |
| "step": 33350 | |
| }, | |
| { | |
| "epoch": 0.028817498951698765, | |
| "grad_norm": 2.603889299734874e-07, | |
| "learning_rate": 1.9428678936548845e-05, | |
| "loss": 0.0015, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.028860638920189332, | |
| "grad_norm": 12.171298027038574, | |
| "learning_rate": 1.9427815913794385e-05, | |
| "loss": 0.0188, | |
| "step": 33450 | |
| }, | |
| { | |
| "epoch": 0.0289037788886799, | |
| "grad_norm": 3.4058632536471123e-06, | |
| "learning_rate": 1.9426952891039928e-05, | |
| "loss": 0.0529, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.028946918857170465, | |
| "grad_norm": 17.399200439453125, | |
| "learning_rate": 1.9426089868285468e-05, | |
| "loss": 0.0294, | |
| "step": 33550 | |
| }, | |
| { | |
| "epoch": 0.028990058825661035, | |
| "grad_norm": 0.011678768321871758, | |
| "learning_rate": 1.942522684553101e-05, | |
| "loss": 0.0211, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.029033198794151602, | |
| "grad_norm": 2.466938212819514e-06, | |
| "learning_rate": 1.942436382277655e-05, | |
| "loss": 0.03, | |
| "step": 33650 | |
| }, | |
| { | |
| "epoch": 0.02907633876264217, | |
| "grad_norm": 3.6094334986136456e-12, | |
| "learning_rate": 1.9423500800022095e-05, | |
| "loss": 0.0381, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.029119478731132735, | |
| "grad_norm": 0.08116328716278076, | |
| "learning_rate": 1.9422637777267638e-05, | |
| "loss": 0.0016, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 0.0291626186996233, | |
| "grad_norm": 0.2594936788082123, | |
| "learning_rate": 1.9421774754513178e-05, | |
| "loss": 0.0145, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.029205758668113868, | |
| "grad_norm": 1.6326714103342965e-05, | |
| "learning_rate": 1.942091173175872e-05, | |
| "loss": 0.014, | |
| "step": 33850 | |
| }, | |
| { | |
| "epoch": 0.029248898636604435, | |
| "grad_norm": 6.704578368044167e-07, | |
| "learning_rate": 1.942004870900426e-05, | |
| "loss": 0.0138, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.029292038605095, | |
| "grad_norm": 1.600632737464025e-09, | |
| "learning_rate": 1.9419185686249805e-05, | |
| "loss": 0.0044, | |
| "step": 33950 | |
| }, | |
| { | |
| "epoch": 0.02933517857358557, | |
| "grad_norm": 2.9473580070771277e-05, | |
| "learning_rate": 1.9418322663495348e-05, | |
| "loss": 0.0209, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.029378318542076138, | |
| "grad_norm": 0.013792168349027634, | |
| "learning_rate": 1.941745964074089e-05, | |
| "loss": 0.009, | |
| "step": 34050 | |
| }, | |
| { | |
| "epoch": 0.029421458510566705, | |
| "grad_norm": 1.5911604123175493e-07, | |
| "learning_rate": 1.941659661798643e-05, | |
| "loss": 0.0272, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.02946459847905727, | |
| "grad_norm": 0.29515737295150757, | |
| "learning_rate": 1.9415733595231975e-05, | |
| "loss": 0.0595, | |
| "step": 34150 | |
| }, | |
| { | |
| "epoch": 0.029507738447547838, | |
| "grad_norm": 2.744394862475019e-07, | |
| "learning_rate": 1.9414870572477515e-05, | |
| "loss": 0.046, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.029550878416038404, | |
| "grad_norm": 0.028887495398521423, | |
| "learning_rate": 1.9414007549723058e-05, | |
| "loss": 0.0014, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 0.02959401838452897, | |
| "grad_norm": 1.5995985449990258e-05, | |
| "learning_rate": 1.9413144526968598e-05, | |
| "loss": 0.0072, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.029637158353019537, | |
| "grad_norm": 1.774524207576178e-05, | |
| "learning_rate": 1.941228150421414e-05, | |
| "loss": 0.0072, | |
| "step": 34350 | |
| }, | |
| { | |
| "epoch": 0.029680298321510108, | |
| "grad_norm": 3.840292084333896e-09, | |
| "learning_rate": 1.9411418481459685e-05, | |
| "loss": 0.015, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.029723438290000674, | |
| "grad_norm": 4.855828592553735e-06, | |
| "learning_rate": 1.9410555458705225e-05, | |
| "loss": 0.0101, | |
| "step": 34450 | |
| }, | |
| { | |
| "epoch": 0.02976657825849124, | |
| "grad_norm": 5.043638229370117, | |
| "learning_rate": 1.9409692435950768e-05, | |
| "loss": 0.0598, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.029809718226981807, | |
| "grad_norm": 3.365451473058556e-09, | |
| "learning_rate": 1.940882941319631e-05, | |
| "loss": 0.012, | |
| "step": 34550 | |
| }, | |
| { | |
| "epoch": 0.029852858195472374, | |
| "grad_norm": 2.5963392999983625e-06, | |
| "learning_rate": 1.940796639044185e-05, | |
| "loss": 0.0195, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.02989599816396294, | |
| "grad_norm": 0.0003348338359501213, | |
| "learning_rate": 1.9407103367687394e-05, | |
| "loss": 0.0289, | |
| "step": 34650 | |
| }, | |
| { | |
| "epoch": 0.029939138132453507, | |
| "grad_norm": 6.386066436767578, | |
| "learning_rate": 1.9406240344932938e-05, | |
| "loss": 0.0308, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.029982278100944074, | |
| "grad_norm": 0.00012195282033644617, | |
| "learning_rate": 1.9405377322178478e-05, | |
| "loss": 0.0522, | |
| "step": 34750 | |
| }, | |
| { | |
| "epoch": 0.030025418069434644, | |
| "grad_norm": 0.0025203858967870474, | |
| "learning_rate": 1.940451429942402e-05, | |
| "loss": 0.0275, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.03006855803792521, | |
| "grad_norm": 4.238718820381848e-10, | |
| "learning_rate": 1.940365127666956e-05, | |
| "loss": 0.0164, | |
| "step": 34850 | |
| }, | |
| { | |
| "epoch": 0.030111698006415777, | |
| "grad_norm": 1.477847000330712e-08, | |
| "learning_rate": 1.9402788253915104e-05, | |
| "loss": 0.0227, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.030154837974906343, | |
| "grad_norm": 8.416482621953492e-09, | |
| "learning_rate": 1.9401925231160644e-05, | |
| "loss": 0.0379, | |
| "step": 34950 | |
| }, | |
| { | |
| "epoch": 0.03019797794339691, | |
| "grad_norm": 2.9379866646195296e-06, | |
| "learning_rate": 1.940106220840619e-05, | |
| "loss": 0.0449, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.030241117911887477, | |
| "grad_norm": 13.662910461425781, | |
| "learning_rate": 1.940019918565173e-05, | |
| "loss": 0.0245, | |
| "step": 35050 | |
| }, | |
| { | |
| "epoch": 0.030284257880378043, | |
| "grad_norm": 2.694193881325191e-06, | |
| "learning_rate": 1.9399336162897274e-05, | |
| "loss": 0.0231, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.03032739784886861, | |
| "grad_norm": 19.55348014831543, | |
| "learning_rate": 1.9398473140142814e-05, | |
| "loss": 0.0253, | |
| "step": 35150 | |
| }, | |
| { | |
| "epoch": 0.03037053781735918, | |
| "grad_norm": 7.588599970631549e-09, | |
| "learning_rate": 1.9397610117388358e-05, | |
| "loss": 0.026, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.030413677785849746, | |
| "grad_norm": 6.923779882761494e-10, | |
| "learning_rate": 1.9396747094633898e-05, | |
| "loss": 0.008, | |
| "step": 35250 | |
| }, | |
| { | |
| "epoch": 0.030456817754340313, | |
| "grad_norm": 5.178381456971692e-07, | |
| "learning_rate": 1.939588407187944e-05, | |
| "loss": 0.0512, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.03049995772283088, | |
| "grad_norm": 3.179905760930524e-08, | |
| "learning_rate": 1.939502104912498e-05, | |
| "loss": 0.0314, | |
| "step": 35350 | |
| }, | |
| { | |
| "epoch": 0.030543097691321446, | |
| "grad_norm": 0.00010464258957654238, | |
| "learning_rate": 1.9394158026370524e-05, | |
| "loss": 0.0015, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.030586237659812013, | |
| "grad_norm": 11.300006866455078, | |
| "learning_rate": 1.9393295003616067e-05, | |
| "loss": 0.0298, | |
| "step": 35450 | |
| }, | |
| { | |
| "epoch": 0.03062937762830258, | |
| "grad_norm": 1.0112120918392975e-07, | |
| "learning_rate": 1.9392431980861607e-05, | |
| "loss": 0.0235, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.030672517596793146, | |
| "grad_norm": 0.0002930278715211898, | |
| "learning_rate": 1.939156895810715e-05, | |
| "loss": 0.0422, | |
| "step": 35550 | |
| }, | |
| { | |
| "epoch": 0.030715657565283716, | |
| "grad_norm": 3.265949146680214e-07, | |
| "learning_rate": 1.9390705935352694e-05, | |
| "loss": 0.0453, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.030758797533774283, | |
| "grad_norm": 0.01071107853204012, | |
| "learning_rate": 1.9389842912598234e-05, | |
| "loss": 0.0088, | |
| "step": 35650 | |
| }, | |
| { | |
| "epoch": 0.03080193750226485, | |
| "grad_norm": 2.198061288538611e-09, | |
| "learning_rate": 1.9388979889843777e-05, | |
| "loss": 0.0344, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.030845077470755416, | |
| "grad_norm": 2.0178050874619657e-07, | |
| "learning_rate": 1.938811686708932e-05, | |
| "loss": 0.0112, | |
| "step": 35750 | |
| }, | |
| { | |
| "epoch": 0.030888217439245982, | |
| "grad_norm": 0.03751551732420921, | |
| "learning_rate": 1.938725384433486e-05, | |
| "loss": 0.0112, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.03093135740773655, | |
| "grad_norm": 0.00011108023318229243, | |
| "learning_rate": 1.9386390821580404e-05, | |
| "loss": 0.0275, | |
| "step": 35850 | |
| }, | |
| { | |
| "epoch": 0.030974497376227116, | |
| "grad_norm": 1.5553026644354873e-09, | |
| "learning_rate": 1.9385527798825944e-05, | |
| "loss": 0.0118, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.031017637344717682, | |
| "grad_norm": 2.6839693418878596e-06, | |
| "learning_rate": 1.9384664776071487e-05, | |
| "loss": 0.0054, | |
| "step": 35950 | |
| }, | |
| { | |
| "epoch": 0.03106077731320825, | |
| "grad_norm": 2.178272318076324e-08, | |
| "learning_rate": 1.9383801753317027e-05, | |
| "loss": 0.0331, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.03110391728169882, | |
| "grad_norm": 2.3207785204704123e-07, | |
| "learning_rate": 1.9382938730562574e-05, | |
| "loss": 0.0102, | |
| "step": 36050 | |
| }, | |
| { | |
| "epoch": 0.031147057250189385, | |
| "grad_norm": 1.738131345518923e-07, | |
| "learning_rate": 1.9382075707808114e-05, | |
| "loss": 0.0588, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.031190197218679952, | |
| "grad_norm": 0.019147371873259544, | |
| "learning_rate": 1.9381212685053657e-05, | |
| "loss": 0.043, | |
| "step": 36150 | |
| }, | |
| { | |
| "epoch": 0.03123333718717052, | |
| "grad_norm": 0.0022545859683305025, | |
| "learning_rate": 1.9380349662299197e-05, | |
| "loss": 0.0191, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.031276477155661085, | |
| "grad_norm": 0.00014786762767471373, | |
| "learning_rate": 1.937948663954474e-05, | |
| "loss": 0.0016, | |
| "step": 36250 | |
| }, | |
| { | |
| "epoch": 0.031319617124151655, | |
| "grad_norm": 1.8323513018003723e-07, | |
| "learning_rate": 1.937862361679028e-05, | |
| "loss": 0.0007, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.03136275709264222, | |
| "grad_norm": 15.16702651977539, | |
| "learning_rate": 1.9377760594035824e-05, | |
| "loss": 0.0363, | |
| "step": 36350 | |
| }, | |
| { | |
| "epoch": 0.03140589706113279, | |
| "grad_norm": 0.061391185969114304, | |
| "learning_rate": 1.9376897571281367e-05, | |
| "loss": 0.0393, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.03144903702962335, | |
| "grad_norm": 0.0035098083317279816, | |
| "learning_rate": 1.9376034548526907e-05, | |
| "loss": 0.0147, | |
| "step": 36450 | |
| }, | |
| { | |
| "epoch": 0.03149217699811392, | |
| "grad_norm": 0.06623140722513199, | |
| "learning_rate": 1.937517152577245e-05, | |
| "loss": 0.0543, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.031535316966604485, | |
| "grad_norm": 8.011748832359444e-06, | |
| "learning_rate": 1.937430850301799e-05, | |
| "loss": 0.0447, | |
| "step": 36550 | |
| }, | |
| { | |
| "epoch": 0.031578456935095055, | |
| "grad_norm": 2.976227278850274e-06, | |
| "learning_rate": 1.9373445480263534e-05, | |
| "loss": 0.0238, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.031621596903585625, | |
| "grad_norm": 4.54370677971383e-07, | |
| "learning_rate": 1.9372582457509077e-05, | |
| "loss": 0.0282, | |
| "step": 36650 | |
| }, | |
| { | |
| "epoch": 0.03166473687207619, | |
| "grad_norm": 1.2593355247503268e-09, | |
| "learning_rate": 1.937171943475462e-05, | |
| "loss": 0.0475, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.03170787684056676, | |
| "grad_norm": 0.0001775699929567054, | |
| "learning_rate": 1.937085641200016e-05, | |
| "loss": 0.0005, | |
| "step": 36750 | |
| }, | |
| { | |
| "epoch": 0.03175101680905732, | |
| "grad_norm": 1.9041050336454646e-07, | |
| "learning_rate": 1.9369993389245704e-05, | |
| "loss": 0.0008, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.03179415677754789, | |
| "grad_norm": 0.0002166083868360147, | |
| "learning_rate": 1.9369130366491244e-05, | |
| "loss": 0.0064, | |
| "step": 36850 | |
| }, | |
| { | |
| "epoch": 0.031837296746038454, | |
| "grad_norm": 2.4730157921482032e-09, | |
| "learning_rate": 1.9368267343736787e-05, | |
| "loss": 0.0747, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.031880436714529024, | |
| "grad_norm": 6.864386705274228e-06, | |
| "learning_rate": 1.9367404320982327e-05, | |
| "loss": 0.0022, | |
| "step": 36950 | |
| }, | |
| { | |
| "epoch": 0.03192357668301959, | |
| "grad_norm": 2.638907517393818e-06, | |
| "learning_rate": 1.936654129822787e-05, | |
| "loss": 0.0239, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.03196671665151016, | |
| "grad_norm": 8.631070522824302e-05, | |
| "learning_rate": 1.9365678275473413e-05, | |
| "loss": 0.0191, | |
| "step": 37050 | |
| }, | |
| { | |
| "epoch": 0.03200985662000073, | |
| "grad_norm": 14.52698802947998, | |
| "learning_rate": 1.9364815252718953e-05, | |
| "loss": 0.0188, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.03205299658849129, | |
| "grad_norm": 0.07407932728528976, | |
| "learning_rate": 1.9363952229964497e-05, | |
| "loss": 0.0136, | |
| "step": 37150 | |
| }, | |
| { | |
| "epoch": 0.03209613655698186, | |
| "grad_norm": 0.002848062664270401, | |
| "learning_rate": 1.936308920721004e-05, | |
| "loss": 0.0451, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.032139276525472424, | |
| "grad_norm": 2.2414766931433405e-07, | |
| "learning_rate": 1.936222618445558e-05, | |
| "loss": 0.0395, | |
| "step": 37250 | |
| }, | |
| { | |
| "epoch": 0.032182416493962994, | |
| "grad_norm": 5.524349830920983e-07, | |
| "learning_rate": 1.9361363161701123e-05, | |
| "loss": 0.0468, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.03222555646245356, | |
| "grad_norm": 2.2004120182828046e-05, | |
| "learning_rate": 1.9360500138946667e-05, | |
| "loss": 0.0599, | |
| "step": 37350 | |
| }, | |
| { | |
| "epoch": 0.03226869643094413, | |
| "grad_norm": 5.064206831661977e-08, | |
| "learning_rate": 1.9359637116192207e-05, | |
| "loss": 0.0191, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.0323118363994347, | |
| "grad_norm": 5.038096060161479e-05, | |
| "learning_rate": 1.935877409343775e-05, | |
| "loss": 0.0094, | |
| "step": 37450 | |
| }, | |
| { | |
| "epoch": 0.03235497636792526, | |
| "grad_norm": 0.002139901742339134, | |
| "learning_rate": 1.935791107068329e-05, | |
| "loss": 0.0026, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.03239811633641583, | |
| "grad_norm": 0.025793571025133133, | |
| "learning_rate": 1.9357048047928833e-05, | |
| "loss": 0.0503, | |
| "step": 37550 | |
| }, | |
| { | |
| "epoch": 0.03244125630490639, | |
| "grad_norm": 1.497374176979065, | |
| "learning_rate": 1.9356185025174373e-05, | |
| "loss": 0.0239, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.032484396273396964, | |
| "grad_norm": 9.68094241216022e-07, | |
| "learning_rate": 1.935532200241992e-05, | |
| "loss": 0.0362, | |
| "step": 37650 | |
| }, | |
| { | |
| "epoch": 0.03252753624188753, | |
| "grad_norm": 7.437192266479542e-07, | |
| "learning_rate": 1.935445897966546e-05, | |
| "loss": 0.0174, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.0325706762103781, | |
| "grad_norm": 1.591896947594762e-09, | |
| "learning_rate": 1.9353595956911003e-05, | |
| "loss": 0.0253, | |
| "step": 37750 | |
| }, | |
| { | |
| "epoch": 0.03261381617886866, | |
| "grad_norm": 14.039113998413086, | |
| "learning_rate": 1.9352732934156543e-05, | |
| "loss": 0.0201, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.03265695614735923, | |
| "grad_norm": 2.0073053747182712e-05, | |
| "learning_rate": 1.9351869911402087e-05, | |
| "loss": 0.043, | |
| "step": 37850 | |
| }, | |
| { | |
| "epoch": 0.0327000961158498, | |
| "grad_norm": 1.3844499768822516e-08, | |
| "learning_rate": 1.9351006888647626e-05, | |
| "loss": 0.007, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.03274323608434036, | |
| "grad_norm": 0.02289557084441185, | |
| "learning_rate": 1.935014386589317e-05, | |
| "loss": 0.0268, | |
| "step": 37950 | |
| }, | |
| { | |
| "epoch": 0.03278637605283093, | |
| "grad_norm": 2.7390053766729316e-11, | |
| "learning_rate": 1.934928084313871e-05, | |
| "loss": 0.0141, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.032829516021321496, | |
| "grad_norm": 2.0595265937117802e-07, | |
| "learning_rate": 1.9348417820384253e-05, | |
| "loss": 0.1202, | |
| "step": 38050 | |
| }, | |
| { | |
| "epoch": 0.032872655989812066, | |
| "grad_norm": 0.00014018621004652232, | |
| "learning_rate": 1.9347554797629796e-05, | |
| "loss": 0.0277, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.03291579595830263, | |
| "grad_norm": 9.558748570270836e-05, | |
| "learning_rate": 1.9346691774875336e-05, | |
| "loss": 0.0498, | |
| "step": 38150 | |
| }, | |
| { | |
| "epoch": 0.0329589359267932, | |
| "grad_norm": 2.20267253325801e-07, | |
| "learning_rate": 1.934582875212088e-05, | |
| "loss": 0.0357, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.03300207589528377, | |
| "grad_norm": 0.002117832424119115, | |
| "learning_rate": 1.9344965729366423e-05, | |
| "loss": 0.0478, | |
| "step": 38250 | |
| }, | |
| { | |
| "epoch": 0.03304521586377433, | |
| "grad_norm": 0.0015125697245821357, | |
| "learning_rate": 1.9344102706611963e-05, | |
| "loss": 0.0049, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.0330883558322649, | |
| "grad_norm": 0.001929111429490149, | |
| "learning_rate": 1.9343239683857506e-05, | |
| "loss": 0.0321, | |
| "step": 38350 | |
| }, | |
| { | |
| "epoch": 0.033131495800755466, | |
| "grad_norm": 14.052818298339844, | |
| "learning_rate": 1.934237666110305e-05, | |
| "loss": 0.0126, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.033174635769246036, | |
| "grad_norm": 0.04780351743102074, | |
| "learning_rate": 1.934151363834859e-05, | |
| "loss": 0.0145, | |
| "step": 38450 | |
| }, | |
| { | |
| "epoch": 0.0332177757377366, | |
| "grad_norm": 1.625859908926941e-07, | |
| "learning_rate": 1.9340650615594133e-05, | |
| "loss": 0.0006, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.03326091570622717, | |
| "grad_norm": 4.171390173723921e-06, | |
| "learning_rate": 1.9339787592839673e-05, | |
| "loss": 0.0052, | |
| "step": 38550 | |
| }, | |
| { | |
| "epoch": 0.03330405567471773, | |
| "grad_norm": 9.933991532307118e-05, | |
| "learning_rate": 1.9338924570085216e-05, | |
| "loss": 0.0149, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.0333471956432083, | |
| "grad_norm": 5.527433510899016e-10, | |
| "learning_rate": 1.9338061547330756e-05, | |
| "loss": 0.0569, | |
| "step": 38650 | |
| }, | |
| { | |
| "epoch": 0.03339033561169887, | |
| "grad_norm": 1.7711924149566016e-09, | |
| "learning_rate": 1.9337198524576303e-05, | |
| "loss": 0.0089, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.033433475580189435, | |
| "grad_norm": 5.876652497960322e-09, | |
| "learning_rate": 1.9336335501821843e-05, | |
| "loss": 0.0412, | |
| "step": 38750 | |
| }, | |
| { | |
| "epoch": 0.033476615548680005, | |
| "grad_norm": 1.2611899375915527, | |
| "learning_rate": 1.9335472479067386e-05, | |
| "loss": 0.0057, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.03351975551717057, | |
| "grad_norm": 0.00011541438288986683, | |
| "learning_rate": 1.9334609456312926e-05, | |
| "loss": 0.0264, | |
| "step": 38850 | |
| }, | |
| { | |
| "epoch": 0.03356289548566114, | |
| "grad_norm": 0.7902683019638062, | |
| "learning_rate": 1.933374643355847e-05, | |
| "loss": 0.0269, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.0336060354541517, | |
| "grad_norm": 1.6534098904230632e-05, | |
| "learning_rate": 1.933288341080401e-05, | |
| "loss": 0.0041, | |
| "step": 38950 | |
| }, | |
| { | |
| "epoch": 0.03364917542264227, | |
| "grad_norm": 0.029098449274897575, | |
| "learning_rate": 1.9332020388049553e-05, | |
| "loss": 0.0208, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.033692315391132835, | |
| "grad_norm": 0.0004794780688825995, | |
| "learning_rate": 1.9331157365295096e-05, | |
| "loss": 0.0595, | |
| "step": 39050 | |
| }, | |
| { | |
| "epoch": 0.033735455359623405, | |
| "grad_norm": 16.320070266723633, | |
| "learning_rate": 1.9330294342540636e-05, | |
| "loss": 0.0735, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.033778595328113975, | |
| "grad_norm": 3.635158840609165e-09, | |
| "learning_rate": 1.932943131978618e-05, | |
| "loss": 0.0164, | |
| "step": 39150 | |
| }, | |
| { | |
| "epoch": 0.03382173529660454, | |
| "grad_norm": 1.406357796440716e-06, | |
| "learning_rate": 1.932856829703172e-05, | |
| "loss": 0.0237, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.03386487526509511, | |
| "grad_norm": 0.05031180754303932, | |
| "learning_rate": 1.9327705274277263e-05, | |
| "loss": 0.0264, | |
| "step": 39250 | |
| }, | |
| { | |
| "epoch": 0.03390801523358567, | |
| "grad_norm": 0.022205352783203125, | |
| "learning_rate": 1.9326842251522806e-05, | |
| "loss": 0.0076, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.03395115520207624, | |
| "grad_norm": 3.1427214707946405e-05, | |
| "learning_rate": 1.932597922876835e-05, | |
| "loss": 0.0093, | |
| "step": 39350 | |
| }, | |
| { | |
| "epoch": 0.033994295170566804, | |
| "grad_norm": 0.0015017461264505982, | |
| "learning_rate": 1.932511620601389e-05, | |
| "loss": 0.0016, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.034037435139057375, | |
| "grad_norm": 3.3295341483885466e-10, | |
| "learning_rate": 1.9324253183259433e-05, | |
| "loss": 0.0297, | |
| "step": 39450 | |
| }, | |
| { | |
| "epoch": 0.034080575107547945, | |
| "grad_norm": 1.431539747853705e-10, | |
| "learning_rate": 1.9323390160504972e-05, | |
| "loss": 0.008, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.03412371507603851, | |
| "grad_norm": 9.472168188695562e-11, | |
| "learning_rate": 1.9322527137750516e-05, | |
| "loss": 0.0526, | |
| "step": 39550 | |
| }, | |
| { | |
| "epoch": 0.03416685504452908, | |
| "grad_norm": 1.1010347078510563e-09, | |
| "learning_rate": 1.9321664114996056e-05, | |
| "loss": 0.0438, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.03420999501301964, | |
| "grad_norm": 0.0038324242923408747, | |
| "learning_rate": 1.93208010922416e-05, | |
| "loss": 0.0068, | |
| "step": 39650 | |
| }, | |
| { | |
| "epoch": 0.03425313498151021, | |
| "grad_norm": 1.2454121067762003e-10, | |
| "learning_rate": 1.9319938069487142e-05, | |
| "loss": 0.0105, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.034296274950000774, | |
| "grad_norm": 3.0910987103283105e-09, | |
| "learning_rate": 1.9319075046732682e-05, | |
| "loss": 0.003, | |
| "step": 39750 | |
| }, | |
| { | |
| "epoch": 0.034339414918491344, | |
| "grad_norm": 0.20711366832256317, | |
| "learning_rate": 1.9318212023978226e-05, | |
| "loss": 0.0072, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.03438255488698191, | |
| "grad_norm": 0.0013983896933495998, | |
| "learning_rate": 1.931734900122377e-05, | |
| "loss": 0.0357, | |
| "step": 39850 | |
| }, | |
| { | |
| "epoch": 0.03442569485547248, | |
| "grad_norm": 1.195646859741828e-06, | |
| "learning_rate": 1.931648597846931e-05, | |
| "loss": 0.0252, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.03446883482396305, | |
| "grad_norm": 0.0007419702014885843, | |
| "learning_rate": 1.9315622955714852e-05, | |
| "loss": 0.0203, | |
| "step": 39950 | |
| }, | |
| { | |
| "epoch": 0.03451197479245361, | |
| "grad_norm": 1.9399341908865608e-05, | |
| "learning_rate": 1.9314759932960396e-05, | |
| "loss": 0.0273, | |
| "step": 40000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1159018, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 2500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |