Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.46803435666036, | |
| "eval_steps": 500, | |
| "global_step": 900000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 3.8533715074003996e-06, | |
| "grad_norm": 33.24064254760742, | |
| "learning_rate": 5e-05, | |
| "loss": 10.4552, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0038533715074004, | |
| "grad_norm": 2.1120877265930176, | |
| "learning_rate": 4.99445e-05, | |
| "loss": 4.0146, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0077067430148008, | |
| "grad_norm": 1.7379038333892822, | |
| "learning_rate": 4.9888944444444445e-05, | |
| "loss": 2.9699, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.0115601145222012, | |
| "grad_norm": 1.6360819339752197, | |
| "learning_rate": 4.9833388888888894e-05, | |
| "loss": 2.6552, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.0154134860296016, | |
| "grad_norm": 1.456438660621643, | |
| "learning_rate": 4.9777833333333336e-05, | |
| "loss": 2.4387, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.019266857537002, | |
| "grad_norm": 1.4404393434524536, | |
| "learning_rate": 4.972227777777778e-05, | |
| "loss": 2.288, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.0231202290444024, | |
| "grad_norm": 1.476318359375, | |
| "learning_rate": 4.966672222222222e-05, | |
| "loss": 2.1816, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.026973600551802798, | |
| "grad_norm": 1.4118831157684326, | |
| "learning_rate": 4.961116666666667e-05, | |
| "loss": 2.1048, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.0308269720592032, | |
| "grad_norm": 1.3684428930282593, | |
| "learning_rate": 4.955561111111111e-05, | |
| "loss": 2.0433, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.0346803435666036, | |
| "grad_norm": 1.32038414478302, | |
| "learning_rate": 4.9500055555555555e-05, | |
| "loss": 1.9921, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.038533715074004, | |
| "grad_norm": 1.3200163841247559, | |
| "learning_rate": 4.9444500000000004e-05, | |
| "loss": 1.9502, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.0423870865814044, | |
| "grad_norm": 1.2274246215820312, | |
| "learning_rate": 4.9388944444444446e-05, | |
| "loss": 1.9121, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.0462404580888048, | |
| "grad_norm": 1.2220534086227417, | |
| "learning_rate": 4.9333388888888896e-05, | |
| "loss": 1.8812, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.0500938295962052, | |
| "grad_norm": 1.186949610710144, | |
| "learning_rate": 4.927783333333333e-05, | |
| "loss": 1.8519, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.053947201103605597, | |
| "grad_norm": 1.1999038457870483, | |
| "learning_rate": 4.922227777777778e-05, | |
| "loss": 1.8266, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.057800572611006, | |
| "grad_norm": 1.150551676750183, | |
| "learning_rate": 4.916672222222222e-05, | |
| "loss": 1.8038, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.0616539441184064, | |
| "grad_norm": 1.1635456085205078, | |
| "learning_rate": 4.911116666666667e-05, | |
| "loss": 1.7846, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.0655073156258068, | |
| "grad_norm": 1.1746997833251953, | |
| "learning_rate": 4.9055611111111114e-05, | |
| "loss": 1.7659, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.0693606871332072, | |
| "grad_norm": 1.1516313552856445, | |
| "learning_rate": 4.9000055555555556e-05, | |
| "loss": 1.7487, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.0732140586406076, | |
| "grad_norm": 1.1180574893951416, | |
| "learning_rate": 4.8944500000000005e-05, | |
| "loss": 1.7329, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.077067430148008, | |
| "grad_norm": 1.0941097736358643, | |
| "learning_rate": 4.888894444444445e-05, | |
| "loss": 1.7186, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.0809208016554084, | |
| "grad_norm": 1.0867908000946045, | |
| "learning_rate": 4.883338888888889e-05, | |
| "loss": 1.7059, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.0847741731628088, | |
| "grad_norm": 1.1522151231765747, | |
| "learning_rate": 4.877783333333333e-05, | |
| "loss": 1.6922, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.0886275446702092, | |
| "grad_norm": 1.0744513273239136, | |
| "learning_rate": 4.872227777777778e-05, | |
| "loss": 1.6789, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.0924809161776096, | |
| "grad_norm": 1.0637524127960205, | |
| "learning_rate": 4.8666722222222224e-05, | |
| "loss": 1.6685, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.09633428768501, | |
| "grad_norm": 1.1220489740371704, | |
| "learning_rate": 4.8611166666666666e-05, | |
| "loss": 1.6582, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.1001876591924104, | |
| "grad_norm": 1.0609642267227173, | |
| "learning_rate": 4.8555611111111115e-05, | |
| "loss": 1.6472, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.1040410306998108, | |
| "grad_norm": 1.0400673151016235, | |
| "learning_rate": 4.850005555555556e-05, | |
| "loss": 1.6383, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.10789440220721119, | |
| "grad_norm": 0.982431948184967, | |
| "learning_rate": 4.844450000000001e-05, | |
| "loss": 1.6292, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.1117477737146116, | |
| "grad_norm": 0.9695896506309509, | |
| "learning_rate": 4.838894444444444e-05, | |
| "loss": 1.6205, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.115601145222012, | |
| "grad_norm": 1.0017755031585693, | |
| "learning_rate": 4.833338888888889e-05, | |
| "loss": 1.6121, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.1194545167294124, | |
| "grad_norm": 0.9817500114440918, | |
| "learning_rate": 4.8277833333333334e-05, | |
| "loss": 1.6047, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.1233078882368128, | |
| "grad_norm": 0.9635987877845764, | |
| "learning_rate": 4.822227777777778e-05, | |
| "loss": 1.597, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.1271612597442132, | |
| "grad_norm": 1.0124236345291138, | |
| "learning_rate": 4.8166722222222225e-05, | |
| "loss": 1.5886, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.1310146312516136, | |
| "grad_norm": 0.9595442414283752, | |
| "learning_rate": 4.811116666666667e-05, | |
| "loss": 1.5812, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.134868002759014, | |
| "grad_norm": 0.9507773518562317, | |
| "learning_rate": 4.805561111111112e-05, | |
| "loss": 1.5757, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.1387213742664144, | |
| "grad_norm": 1.0198057889938354, | |
| "learning_rate": 4.800005555555556e-05, | |
| "loss": 1.5693, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.1425747457738148, | |
| "grad_norm": 0.9612113833427429, | |
| "learning_rate": 4.79445e-05, | |
| "loss": 1.5633, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.1464281172812152, | |
| "grad_norm": 0.9176149368286133, | |
| "learning_rate": 4.7888944444444444e-05, | |
| "loss": 1.5563, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.1502814887886156, | |
| "grad_norm": 0.9220606684684753, | |
| "learning_rate": 4.783338888888889e-05, | |
| "loss": 1.551, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.154134860296016, | |
| "grad_norm": 0.9286023378372192, | |
| "learning_rate": 4.7777833333333335e-05, | |
| "loss": 1.5454, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.1579882318034164, | |
| "grad_norm": 0.9021902680397034, | |
| "learning_rate": 4.772227777777778e-05, | |
| "loss": 1.5416, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.1618416033108168, | |
| "grad_norm": 0.961746871471405, | |
| "learning_rate": 4.766672222222223e-05, | |
| "loss": 1.5355, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.1656949748182172, | |
| "grad_norm": 0.8932749629020691, | |
| "learning_rate": 4.761116666666667e-05, | |
| "loss": 1.531, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.1695483463256176, | |
| "grad_norm": 0.8928858041763306, | |
| "learning_rate": 4.755561111111111e-05, | |
| "loss": 1.5252, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.173401717833018, | |
| "grad_norm": 0.9956013560295105, | |
| "learning_rate": 4.7500055555555554e-05, | |
| "loss": 1.5208, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.1772550893404184, | |
| "grad_norm": 0.8892582058906555, | |
| "learning_rate": 4.74445e-05, | |
| "loss": 1.5159, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.1811084608478188, | |
| "grad_norm": 0.9004553556442261, | |
| "learning_rate": 4.7388944444444445e-05, | |
| "loss": 1.5119, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.1849618323552192, | |
| "grad_norm": 0.884730875492096, | |
| "learning_rate": 4.7333388888888894e-05, | |
| "loss": 1.507, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.1888152038626196, | |
| "grad_norm": 0.866369903087616, | |
| "learning_rate": 4.727783333333334e-05, | |
| "loss": 1.5036, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.19266857537002, | |
| "grad_norm": 0.8980478048324585, | |
| "learning_rate": 4.722227777777778e-05, | |
| "loss": 1.4993, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.1965219468774204, | |
| "grad_norm": 0.9032998085021973, | |
| "learning_rate": 4.716672222222223e-05, | |
| "loss": 1.4949, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.2003753183848208, | |
| "grad_norm": 0.8740929961204529, | |
| "learning_rate": 4.711116666666667e-05, | |
| "loss": 1.4919, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.2042286898922212, | |
| "grad_norm": 0.8779985308647156, | |
| "learning_rate": 4.705561111111111e-05, | |
| "loss": 1.4876, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.2080820613996216, | |
| "grad_norm": 0.8598712086677551, | |
| "learning_rate": 4.7000055555555555e-05, | |
| "loss": 1.4847, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.211935432907022, | |
| "grad_norm": 0.8621186017990112, | |
| "learning_rate": 4.6944500000000004e-05, | |
| "loss": 1.4805, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.21578880441442239, | |
| "grad_norm": 0.8625257015228271, | |
| "learning_rate": 4.688894444444445e-05, | |
| "loss": 1.4771, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.2196421759218228, | |
| "grad_norm": 0.83707195520401, | |
| "learning_rate": 4.683338888888889e-05, | |
| "loss": 1.473, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.2234955474292232, | |
| "grad_norm": 0.8472415804862976, | |
| "learning_rate": 4.677783333333334e-05, | |
| "loss": 1.4703, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.2273489189366236, | |
| "grad_norm": 0.8501800298690796, | |
| "learning_rate": 4.672227777777778e-05, | |
| "loss": 1.4677, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.231202290444024, | |
| "grad_norm": 0.8390816450119019, | |
| "learning_rate": 4.666672222222222e-05, | |
| "loss": 1.4632, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.2350556619514244, | |
| "grad_norm": 0.8502111434936523, | |
| "learning_rate": 4.6611166666666665e-05, | |
| "loss": 1.4607, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.2389090334588248, | |
| "grad_norm": 0.8513786196708679, | |
| "learning_rate": 4.6555611111111114e-05, | |
| "loss": 1.4569, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.2427624049662252, | |
| "grad_norm": 0.8665297031402588, | |
| "learning_rate": 4.6500055555555557e-05, | |
| "loss": 1.4541, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.2466157764736256, | |
| "grad_norm": 0.8247693777084351, | |
| "learning_rate": 4.64445e-05, | |
| "loss": 1.4519, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.250469147981026, | |
| "grad_norm": 0.8859081268310547, | |
| "learning_rate": 4.638894444444445e-05, | |
| "loss": 1.4494, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.2543225194884264, | |
| "grad_norm": 0.816459059715271, | |
| "learning_rate": 4.633338888888889e-05, | |
| "loss": 1.4457, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.2581758909958268, | |
| "grad_norm": 0.8117705583572388, | |
| "learning_rate": 4.627783333333334e-05, | |
| "loss": 1.4426, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.2620292625032272, | |
| "grad_norm": 0.8296411633491516, | |
| "learning_rate": 4.6222277777777775e-05, | |
| "loss": 1.4401, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.2658826340106276, | |
| "grad_norm": 0.8232107758522034, | |
| "learning_rate": 4.6166722222222224e-05, | |
| "loss": 1.4377, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.269736005518028, | |
| "grad_norm": 0.7909438014030457, | |
| "learning_rate": 4.6111166666666667e-05, | |
| "loss": 1.4357, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.2735893770254284, | |
| "grad_norm": 0.8350186347961426, | |
| "learning_rate": 4.6055611111111116e-05, | |
| "loss": 1.4328, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.2774427485328288, | |
| "grad_norm": 0.8087278604507446, | |
| "learning_rate": 4.600005555555556e-05, | |
| "loss": 1.4311, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.2812961200402292, | |
| "grad_norm": 0.8941106200218201, | |
| "learning_rate": 4.59445e-05, | |
| "loss": 1.4278, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.2851494915476296, | |
| "grad_norm": 0.8181429505348206, | |
| "learning_rate": 4.588894444444445e-05, | |
| "loss": 1.4262, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.28900286305503, | |
| "grad_norm": 0.8224203586578369, | |
| "learning_rate": 4.583338888888889e-05, | |
| "loss": 1.4239, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.2928562345624304, | |
| "grad_norm": 0.7924049496650696, | |
| "learning_rate": 4.5777833333333334e-05, | |
| "loss": 1.421, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.2967096060698308, | |
| "grad_norm": 0.7774991393089294, | |
| "learning_rate": 4.5722277777777776e-05, | |
| "loss": 1.4189, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.3005629775772312, | |
| "grad_norm": 0.8097211122512817, | |
| "learning_rate": 4.5666722222222226e-05, | |
| "loss": 1.4154, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.3044163490846316, | |
| "grad_norm": 0.7798463702201843, | |
| "learning_rate": 4.5611166666666675e-05, | |
| "loss": 1.4146, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.308269720592032, | |
| "grad_norm": 0.8168286681175232, | |
| "learning_rate": 4.555561111111111e-05, | |
| "loss": 1.4126, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.3121230920994324, | |
| "grad_norm": 0.7852460145950317, | |
| "learning_rate": 4.550005555555556e-05, | |
| "loss": 1.4089, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.3159764636068328, | |
| "grad_norm": 0.779107928276062, | |
| "learning_rate": 4.54445e-05, | |
| "loss": 1.4072, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.3198298351142332, | |
| "grad_norm": 0.7704175710678101, | |
| "learning_rate": 4.538894444444445e-05, | |
| "loss": 1.4056, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.3236832066216336, | |
| "grad_norm": 0.7774575352668762, | |
| "learning_rate": 4.5333388888888886e-05, | |
| "loss": 1.4044, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.327536578129034, | |
| "grad_norm": 0.7762672305107117, | |
| "learning_rate": 4.5277833333333336e-05, | |
| "loss": 1.4018, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.3313899496364344, | |
| "grad_norm": 0.7741659283638, | |
| "learning_rate": 4.522227777777778e-05, | |
| "loss": 1.3992, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.3352433211438348, | |
| "grad_norm": 0.8049792051315308, | |
| "learning_rate": 4.516672222222223e-05, | |
| "loss": 1.3978, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.3390966926512352, | |
| "grad_norm": 0.7718026041984558, | |
| "learning_rate": 4.511116666666667e-05, | |
| "loss": 1.3955, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.3429500641586356, | |
| "grad_norm": 0.7472023367881775, | |
| "learning_rate": 4.505561111111111e-05, | |
| "loss": 1.394, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.346803435666036, | |
| "grad_norm": 0.7536051869392395, | |
| "learning_rate": 4.500005555555556e-05, | |
| "loss": 1.3924, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.3506568071734364, | |
| "grad_norm": 0.774494469165802, | |
| "learning_rate": 4.49445e-05, | |
| "loss": 1.3913, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.3545101786808368, | |
| "grad_norm": 0.7755584120750427, | |
| "learning_rate": 4.4888944444444445e-05, | |
| "loss": 1.3888, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.3583635501882372, | |
| "grad_norm": 0.748931348323822, | |
| "learning_rate": 4.483338888888889e-05, | |
| "loss": 1.3875, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.3622169216956376, | |
| "grad_norm": 0.7655521035194397, | |
| "learning_rate": 4.477783333333334e-05, | |
| "loss": 1.3857, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.366070293203038, | |
| "grad_norm": 0.7640711665153503, | |
| "learning_rate": 4.472227777777778e-05, | |
| "loss": 1.3838, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.3699236647104384, | |
| "grad_norm": 0.7506535649299622, | |
| "learning_rate": 4.466672222222222e-05, | |
| "loss": 1.3816, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.3737770362178388, | |
| "grad_norm": 0.7430715560913086, | |
| "learning_rate": 4.461116666666667e-05, | |
| "loss": 1.3796, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.3776304077252392, | |
| "grad_norm": 0.733686089515686, | |
| "learning_rate": 4.455561111111111e-05, | |
| "loss": 1.3782, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.3814837792326396, | |
| "grad_norm": 0.7562544941902161, | |
| "learning_rate": 4.4500055555555555e-05, | |
| "loss": 1.3782, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.38533715074004, | |
| "grad_norm": 0.7897418141365051, | |
| "learning_rate": 4.44445e-05, | |
| "loss": 1.3758, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.3891905222474404, | |
| "grad_norm": 0.7322382926940918, | |
| "learning_rate": 4.438894444444445e-05, | |
| "loss": 1.3746, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.3930438937548408, | |
| "grad_norm": 0.7251117825508118, | |
| "learning_rate": 4.4333388888888896e-05, | |
| "loss": 1.3727, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.3968972652622412, | |
| "grad_norm": 0.7524704337120056, | |
| "learning_rate": 4.427783333333334e-05, | |
| "loss": 1.3704, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.4007506367696416, | |
| "grad_norm": 0.7829206585884094, | |
| "learning_rate": 4.422227777777778e-05, | |
| "loss": 1.3695, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.404604008277042, | |
| "grad_norm": 0.7486142516136169, | |
| "learning_rate": 4.416672222222222e-05, | |
| "loss": 1.3685, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.4084573797844424, | |
| "grad_norm": 0.7729934453964233, | |
| "learning_rate": 4.411116666666667e-05, | |
| "loss": 1.3664, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.4123107512918428, | |
| "grad_norm": 0.7546641826629639, | |
| "learning_rate": 4.4055611111111114e-05, | |
| "loss": 1.3652, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.4161641227992432, | |
| "grad_norm": 0.7382177710533142, | |
| "learning_rate": 4.400005555555556e-05, | |
| "loss": 1.3644, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.4200174943066436, | |
| "grad_norm": 0.7239982485771179, | |
| "learning_rate": 4.39445e-05, | |
| "loss": 1.3622, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.423870865814044, | |
| "grad_norm": 0.7373159527778625, | |
| "learning_rate": 4.388894444444445e-05, | |
| "loss": 1.361, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.4277242373214444, | |
| "grad_norm": 0.7275413274765015, | |
| "learning_rate": 4.383338888888889e-05, | |
| "loss": 1.3599, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.43157760882884477, | |
| "grad_norm": 0.7439739108085632, | |
| "learning_rate": 4.377783333333333e-05, | |
| "loss": 1.3578, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.4354309803362452, | |
| "grad_norm": 0.7235158681869507, | |
| "learning_rate": 4.372227777777778e-05, | |
| "loss": 1.3571, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.4392843518436456, | |
| "grad_norm": 0.7328953742980957, | |
| "learning_rate": 4.3666722222222224e-05, | |
| "loss": 1.3551, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.443137723351046, | |
| "grad_norm": 0.7176284790039062, | |
| "learning_rate": 4.361116666666667e-05, | |
| "loss": 1.3541, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.4469910948584464, | |
| "grad_norm": 0.7536628842353821, | |
| "learning_rate": 4.355561111111111e-05, | |
| "loss": 1.3514, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.4508444663658468, | |
| "grad_norm": 0.7354797124862671, | |
| "learning_rate": 4.350005555555556e-05, | |
| "loss": 1.3516, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.4546978378732472, | |
| "grad_norm": 0.7122487425804138, | |
| "learning_rate": 4.344450000000001e-05, | |
| "loss": 1.3512, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.4585512093806476, | |
| "grad_norm": 0.7279472947120667, | |
| "learning_rate": 4.338894444444444e-05, | |
| "loss": 1.3491, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.462404580888048, | |
| "grad_norm": 0.7182700037956238, | |
| "learning_rate": 4.333338888888889e-05, | |
| "loss": 1.3476, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.46625795239544837, | |
| "grad_norm": 0.7295483350753784, | |
| "learning_rate": 4.3277833333333334e-05, | |
| "loss": 1.3474, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.4701113239028488, | |
| "grad_norm": 0.7142743468284607, | |
| "learning_rate": 4.3222277777777783e-05, | |
| "loss": 1.3453, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.4739646954102492, | |
| "grad_norm": 0.7233024835586548, | |
| "learning_rate": 4.316672222222222e-05, | |
| "loss": 1.3441, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.4778180669176496, | |
| "grad_norm": 0.7175471186637878, | |
| "learning_rate": 4.311116666666667e-05, | |
| "loss": 1.3435, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.48167143842505, | |
| "grad_norm": 0.7073889970779419, | |
| "learning_rate": 4.305561111111111e-05, | |
| "loss": 1.3427, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.4855248099324504, | |
| "grad_norm": 0.7192471027374268, | |
| "learning_rate": 4.300005555555556e-05, | |
| "loss": 1.3414, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.4893781814398508, | |
| "grad_norm": 0.7008840441703796, | |
| "learning_rate": 4.29445e-05, | |
| "loss": 1.3397, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.4932315529472512, | |
| "grad_norm": 0.7169083952903748, | |
| "learning_rate": 4.2888944444444444e-05, | |
| "loss": 1.3386, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.4970849244546516, | |
| "grad_norm": 0.7027848958969116, | |
| "learning_rate": 4.2833388888888893e-05, | |
| "loss": 1.3376, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.500938295962052, | |
| "grad_norm": 0.7080409526824951, | |
| "learning_rate": 4.2777833333333336e-05, | |
| "loss": 1.3372, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.5047916674694524, | |
| "grad_norm": 0.7177674770355225, | |
| "learning_rate": 4.272227777777778e-05, | |
| "loss": 1.3348, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.5086450389768528, | |
| "grad_norm": 0.7196437120437622, | |
| "learning_rate": 4.266672222222222e-05, | |
| "loss": 1.3348, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.5124984104842532, | |
| "grad_norm": 0.7128574848175049, | |
| "learning_rate": 4.261116666666667e-05, | |
| "loss": 1.3324, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.5163517819916535, | |
| "grad_norm": 0.7088640332221985, | |
| "learning_rate": 4.255561111111112e-05, | |
| "loss": 1.3325, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.520205153499054, | |
| "grad_norm": 0.7082544565200806, | |
| "learning_rate": 4.2500055555555554e-05, | |
| "loss": 1.3314, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.5240585250064544, | |
| "grad_norm": 0.7220800518989563, | |
| "learning_rate": 4.24445e-05, | |
| "loss": 1.3306, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.5279118965138548, | |
| "grad_norm": 0.6928138136863708, | |
| "learning_rate": 4.2388944444444446e-05, | |
| "loss": 1.3296, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.5317652680212552, | |
| "grad_norm": 0.7121208310127258, | |
| "learning_rate": 4.2333388888888895e-05, | |
| "loss": 1.328, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.5356186395286556, | |
| "grad_norm": 0.7346400022506714, | |
| "learning_rate": 4.227783333333333e-05, | |
| "loss": 1.3262, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.539472011036056, | |
| "grad_norm": 0.7152061462402344, | |
| "learning_rate": 4.222227777777778e-05, | |
| "loss": 1.3264, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.5433253825434564, | |
| "grad_norm": 0.7062528133392334, | |
| "learning_rate": 4.216672222222223e-05, | |
| "loss": 1.3246, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.5471787540508568, | |
| "grad_norm": 0.7030431628227234, | |
| "learning_rate": 4.211116666666667e-05, | |
| "loss": 1.3245, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.5510321255582572, | |
| "grad_norm": 0.706847071647644, | |
| "learning_rate": 4.205561111111111e-05, | |
| "loss": 1.3226, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.5548854970656576, | |
| "grad_norm": Infinity, | |
| "learning_rate": 4.2000055555555556e-05, | |
| "loss": 1.3223, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.558738868573058, | |
| "grad_norm": 0.6956000328063965, | |
| "learning_rate": 4.1944500000000005e-05, | |
| "loss": 1.3212, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.5625922400804584, | |
| "grad_norm": 0.7229527235031128, | |
| "learning_rate": 4.188894444444445e-05, | |
| "loss": 1.3204, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.5664456115878588, | |
| "grad_norm": 0.6998220682144165, | |
| "learning_rate": 4.183338888888889e-05, | |
| "loss": 1.3199, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.5702989830952592, | |
| "grad_norm": 0.7195952534675598, | |
| "learning_rate": 4.177783333333333e-05, | |
| "loss": 1.3179, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.5741523546026596, | |
| "grad_norm": 0.6813680529594421, | |
| "learning_rate": 4.172227777777778e-05, | |
| "loss": 1.3174, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.57800572611006, | |
| "grad_norm": 0.7080066204071045, | |
| "learning_rate": 4.166672222222222e-05, | |
| "loss": 1.3166, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.5818590976174604, | |
| "grad_norm": 0.7254391312599182, | |
| "learning_rate": 4.1611166666666666e-05, | |
| "loss": 1.3154, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.5857124691248607, | |
| "grad_norm": 0.717185378074646, | |
| "learning_rate": 4.1555611111111115e-05, | |
| "loss": 1.3148, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.5895658406322613, | |
| "grad_norm": 0.7029238343238831, | |
| "learning_rate": 4.150005555555556e-05, | |
| "loss": 1.3137, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.5934192121396616, | |
| "grad_norm": 0.7082277536392212, | |
| "learning_rate": 4.1444500000000006e-05, | |
| "loss": 1.3132, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.597272583647062, | |
| "grad_norm": 0.6892799139022827, | |
| "learning_rate": 4.138894444444444e-05, | |
| "loss": 1.3117, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.6011259551544624, | |
| "grad_norm": 0.6990786790847778, | |
| "learning_rate": 4.133338888888889e-05, | |
| "loss": 1.3108, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.6049793266618628, | |
| "grad_norm": 0.6933837532997131, | |
| "learning_rate": 4.127783333333334e-05, | |
| "loss": 1.3108, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.6088326981692632, | |
| "grad_norm": 0.7177742123603821, | |
| "learning_rate": 4.122227777777778e-05, | |
| "loss": 1.3091, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.6126860696766636, | |
| "grad_norm": 0.724977970123291, | |
| "learning_rate": 4.1166722222222225e-05, | |
| "loss": 1.3085, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.616539441184064, | |
| "grad_norm": 0.707911491394043, | |
| "learning_rate": 4.111116666666667e-05, | |
| "loss": 1.3084, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.6203928126914644, | |
| "grad_norm": 0.7061564326286316, | |
| "learning_rate": 4.1055611111111116e-05, | |
| "loss": 1.3072, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.6242461841988648, | |
| "grad_norm": 0.676784873008728, | |
| "learning_rate": 4.100005555555556e-05, | |
| "loss": 1.3061, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.6280995557062652, | |
| "grad_norm": 0.6791040897369385, | |
| "learning_rate": 4.09445e-05, | |
| "loss": 1.3058, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.6319529272136656, | |
| "grad_norm": 0.6959836483001709, | |
| "learning_rate": 4.088894444444445e-05, | |
| "loss": 1.3049, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.635806298721066, | |
| "grad_norm": 0.7067059874534607, | |
| "learning_rate": 4.083338888888889e-05, | |
| "loss": 1.3043, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.6396596702284664, | |
| "grad_norm": 0.6933940052986145, | |
| "learning_rate": 4.0777833333333335e-05, | |
| "loss": 1.3019, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.6435130417358668, | |
| "grad_norm": 0.7712944149971008, | |
| "learning_rate": 4.072227777777778e-05, | |
| "loss": 1.3022, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.6473664132432672, | |
| "grad_norm": 0.6971937417984009, | |
| "learning_rate": 4.0666722222222226e-05, | |
| "loss": 1.3005, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.6512197847506676, | |
| "grad_norm": 0.6904628276824951, | |
| "learning_rate": 4.061116666666667e-05, | |
| "loss": 1.2999, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.655073156258068, | |
| "grad_norm": 0.6890471577644348, | |
| "learning_rate": 4.055561111111111e-05, | |
| "loss": 1.2996, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.6589265277654685, | |
| "grad_norm": 0.6851339340209961, | |
| "learning_rate": 4.050005555555555e-05, | |
| "loss": 1.3, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.6627798992728688, | |
| "grad_norm": 0.719916820526123, | |
| "learning_rate": 4.04445e-05, | |
| "loss": 1.2993, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.6666332707802692, | |
| "grad_norm": 0.6848444938659668, | |
| "learning_rate": 4.038894444444445e-05, | |
| "loss": 1.2981, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.6704866422876696, | |
| "grad_norm": 0.6885384321212769, | |
| "learning_rate": 4.033338888888889e-05, | |
| "loss": 1.2964, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.67434001379507, | |
| "grad_norm": 0.7302813529968262, | |
| "learning_rate": 4.0277833333333336e-05, | |
| "loss": 1.2965, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.6781933853024704, | |
| "grad_norm": 0.7206672430038452, | |
| "learning_rate": 4.022227777777778e-05, | |
| "loss": 1.2954, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.6820467568098708, | |
| "grad_norm": 0.7295191884040833, | |
| "learning_rate": 4.016672222222223e-05, | |
| "loss": 1.2946, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.6859001283172712, | |
| "grad_norm": 0.697117269039154, | |
| "learning_rate": 4.011116666666666e-05, | |
| "loss": 1.2947, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.6897534998246716, | |
| "grad_norm": 0.6886401176452637, | |
| "learning_rate": 4.005561111111111e-05, | |
| "loss": 1.2932, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.693606871332072, | |
| "grad_norm": 0.6862413883209229, | |
| "learning_rate": 4.000005555555556e-05, | |
| "loss": 1.2922, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.6974602428394724, | |
| "grad_norm": 0.685055673122406, | |
| "learning_rate": 3.9944500000000004e-05, | |
| "loss": 1.292, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.7013136143468728, | |
| "grad_norm": 0.6738216876983643, | |
| "learning_rate": 3.9888944444444446e-05, | |
| "loss": 1.2914, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.7051669858542732, | |
| "grad_norm": 0.6774701476097107, | |
| "learning_rate": 3.983338888888889e-05, | |
| "loss": 1.2899, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.7090203573616736, | |
| "grad_norm": 0.6937932968139648, | |
| "learning_rate": 3.977783333333334e-05, | |
| "loss": 1.2901, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.712873728869074, | |
| "grad_norm": 0.6844605803489685, | |
| "learning_rate": 3.972227777777778e-05, | |
| "loss": 1.2881, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.7167271003764744, | |
| "grad_norm": 0.7089695334434509, | |
| "learning_rate": 3.966672222222222e-05, | |
| "loss": 1.2874, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.7205804718838748, | |
| "grad_norm": 0.7017620205879211, | |
| "learning_rate": 3.9611166666666664e-05, | |
| "loss": 1.288, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.7244338433912751, | |
| "grad_norm": 0.685483455657959, | |
| "learning_rate": 3.9555611111111113e-05, | |
| "loss": 1.2874, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 0.7282872148986756, | |
| "grad_norm": 0.7270601987838745, | |
| "learning_rate": 3.950005555555556e-05, | |
| "loss": 1.286, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 0.732140586406076, | |
| "grad_norm": 0.6981102824211121, | |
| "learning_rate": 3.94445e-05, | |
| "loss": 1.2863, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.7359939579134764, | |
| "grad_norm": 0.6715162992477417, | |
| "learning_rate": 3.938894444444445e-05, | |
| "loss": 1.2858, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 0.7398473294208768, | |
| "grad_norm": 0.687854528427124, | |
| "learning_rate": 3.933338888888889e-05, | |
| "loss": 1.2853, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 0.7437007009282772, | |
| "grad_norm": 0.7099502682685852, | |
| "learning_rate": 3.927783333333334e-05, | |
| "loss": 1.2835, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 0.7475540724356776, | |
| "grad_norm": 0.6917000412940979, | |
| "learning_rate": 3.9222277777777774e-05, | |
| "loss": 1.2836, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 0.751407443943078, | |
| "grad_norm": 0.6732981204986572, | |
| "learning_rate": 3.9166722222222223e-05, | |
| "loss": 1.2823, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.7552608154504784, | |
| "grad_norm": 0.6816644668579102, | |
| "learning_rate": 3.911116666666667e-05, | |
| "loss": 1.2823, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 0.7591141869578788, | |
| "grad_norm": 0.6732121109962463, | |
| "learning_rate": 3.9055611111111115e-05, | |
| "loss": 1.2814, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 0.7629675584652792, | |
| "grad_norm": 0.6966871619224548, | |
| "learning_rate": 3.900005555555556e-05, | |
| "loss": 1.2799, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 0.7668209299726796, | |
| "grad_norm": 0.6813514232635498, | |
| "learning_rate": 3.89445e-05, | |
| "loss": 1.2796, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 0.77067430148008, | |
| "grad_norm": 0.6650587916374207, | |
| "learning_rate": 3.888894444444445e-05, | |
| "loss": 1.2798, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.7745276729874804, | |
| "grad_norm": 0.674834668636322, | |
| "learning_rate": 3.883338888888889e-05, | |
| "loss": 1.2781, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 0.7783810444948808, | |
| "grad_norm": 0.6968523859977722, | |
| "learning_rate": 3.877783333333333e-05, | |
| "loss": 1.2778, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 0.7822344160022812, | |
| "grad_norm": 0.6971069574356079, | |
| "learning_rate": 3.872227777777778e-05, | |
| "loss": 1.2778, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 0.7860877875096816, | |
| "grad_norm": 0.6771474480628967, | |
| "learning_rate": 3.8666722222222225e-05, | |
| "loss": 1.2767, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 0.789941159017082, | |
| "grad_norm": 0.6738539338111877, | |
| "learning_rate": 3.861116666666667e-05, | |
| "loss": 1.2765, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 0.7937945305244823, | |
| "grad_norm": 0.6804343461990356, | |
| "learning_rate": 3.855561111111111e-05, | |
| "loss": 1.2761, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 0.7976479020318828, | |
| "grad_norm": 0.6916020512580872, | |
| "learning_rate": 3.850005555555556e-05, | |
| "loss": 1.2764, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 0.8015012735392832, | |
| "grad_norm": 0.6735371947288513, | |
| "learning_rate": 3.84445e-05, | |
| "loss": 1.2755, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 0.8053546450466836, | |
| "grad_norm": 0.6745339035987854, | |
| "learning_rate": 3.838894444444445e-05, | |
| "loss": 1.2744, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 0.809208016554084, | |
| "grad_norm": 0.6855958700180054, | |
| "learning_rate": 3.8333388888888886e-05, | |
| "loss": 1.2735, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 0.8130613880614844, | |
| "grad_norm": 0.6521448493003845, | |
| "learning_rate": 3.8277833333333335e-05, | |
| "loss": 1.2729, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 0.8169147595688848, | |
| "grad_norm": 0.7009506821632385, | |
| "learning_rate": 3.8222277777777784e-05, | |
| "loss": 1.2722, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 0.8207681310762852, | |
| "grad_norm": 0.7052969336509705, | |
| "learning_rate": 3.8166722222222226e-05, | |
| "loss": 1.2722, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 0.8246215025836856, | |
| "grad_norm": 0.6950345635414124, | |
| "learning_rate": 3.811116666666667e-05, | |
| "loss": 1.2718, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 0.828474874091086, | |
| "grad_norm": 0.6897072196006775, | |
| "learning_rate": 3.805561111111111e-05, | |
| "loss": 1.2708, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 0.8323282455984864, | |
| "grad_norm": 0.6870512962341309, | |
| "learning_rate": 3.800005555555556e-05, | |
| "loss": 1.2706, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 0.8361816171058868, | |
| "grad_norm": 0.6739286780357361, | |
| "learning_rate": 3.79445e-05, | |
| "loss": 1.2695, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 0.8400349886132872, | |
| "grad_norm": 0.691164493560791, | |
| "learning_rate": 3.7888944444444445e-05, | |
| "loss": 1.2696, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 0.8438883601206876, | |
| "grad_norm": 0.6866764426231384, | |
| "learning_rate": 3.7833388888888894e-05, | |
| "loss": 1.2684, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 0.847741731628088, | |
| "grad_norm": 0.6938662528991699, | |
| "learning_rate": 3.7777833333333336e-05, | |
| "loss": 1.268, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.8515951031354884, | |
| "grad_norm": 0.7062351107597351, | |
| "learning_rate": 3.772227777777778e-05, | |
| "loss": 1.267, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 0.8554484746428888, | |
| "grad_norm": 0.6679728031158447, | |
| "learning_rate": 3.766672222222222e-05, | |
| "loss": 1.2674, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 0.8593018461502892, | |
| "grad_norm": 0.6871834397315979, | |
| "learning_rate": 3.761116666666667e-05, | |
| "loss": 1.2662, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 0.8631552176576895, | |
| "grad_norm": 0.652167797088623, | |
| "learning_rate": 3.755561111111111e-05, | |
| "loss": 1.267, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 0.86700858916509, | |
| "grad_norm": 0.708121657371521, | |
| "learning_rate": 3.7500055555555555e-05, | |
| "loss": 1.2662, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 0.8708619606724904, | |
| "grad_norm": 0.6835631728172302, | |
| "learning_rate": 3.74445e-05, | |
| "loss": 1.2651, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 0.8747153321798908, | |
| "grad_norm": 0.67769455909729, | |
| "learning_rate": 3.7388944444444446e-05, | |
| "loss": 1.2645, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 0.8785687036872912, | |
| "grad_norm": 0.6746647357940674, | |
| "learning_rate": 3.7333388888888895e-05, | |
| "loss": 1.264, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 0.8824220751946916, | |
| "grad_norm": 0.6745488047599792, | |
| "learning_rate": 3.727783333333333e-05, | |
| "loss": 1.2631, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 0.886275446702092, | |
| "grad_norm": 0.665640115737915, | |
| "learning_rate": 3.722227777777778e-05, | |
| "loss": 1.2628, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 0.8901288182094924, | |
| "grad_norm": 0.6739605069160461, | |
| "learning_rate": 3.716672222222222e-05, | |
| "loss": 1.2627, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 0.8939821897168928, | |
| "grad_norm": 0.7083284258842468, | |
| "learning_rate": 3.711116666666667e-05, | |
| "loss": 1.2612, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 0.8978355612242932, | |
| "grad_norm": 0.6816121935844421, | |
| "learning_rate": 3.7055611111111114e-05, | |
| "loss": 1.2619, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 0.9016889327316936, | |
| "grad_norm": 0.6729110479354858, | |
| "learning_rate": 3.7000055555555556e-05, | |
| "loss": 1.2615, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 0.905542304239094, | |
| "grad_norm": 0.6974055171012878, | |
| "learning_rate": 3.6944500000000005e-05, | |
| "loss": 1.2596, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 0.9093956757464944, | |
| "grad_norm": 0.6852896213531494, | |
| "learning_rate": 3.688894444444445e-05, | |
| "loss": 1.2601, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 0.9132490472538948, | |
| "grad_norm": 0.6774199604988098, | |
| "learning_rate": 3.683338888888889e-05, | |
| "loss": 1.2595, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 0.9171024187612952, | |
| "grad_norm": 0.672041118144989, | |
| "learning_rate": 3.677783333333333e-05, | |
| "loss": 1.2591, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 0.9209557902686956, | |
| "grad_norm": 0.6697712540626526, | |
| "learning_rate": 3.672227777777778e-05, | |
| "loss": 1.2571, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 0.924809161776096, | |
| "grad_norm": 0.6848810911178589, | |
| "learning_rate": 3.6666722222222224e-05, | |
| "loss": 1.2578, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 0.9286625332834963, | |
| "grad_norm": 0.6790698766708374, | |
| "learning_rate": 3.6611166666666666e-05, | |
| "loss": 1.2565, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 0.9325159047908967, | |
| "grad_norm": 0.6708704233169556, | |
| "learning_rate": 3.6555611111111115e-05, | |
| "loss": 1.2574, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 0.9363692762982972, | |
| "grad_norm": 0.6941115260124207, | |
| "learning_rate": 3.650005555555556e-05, | |
| "loss": 1.2562, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 0.9402226478056976, | |
| "grad_norm": 0.6749645471572876, | |
| "learning_rate": 3.6444500000000007e-05, | |
| "loss": 1.2567, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 0.944076019313098, | |
| "grad_norm": 0.655571460723877, | |
| "learning_rate": 3.638894444444444e-05, | |
| "loss": 1.2557, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 0.9479293908204984, | |
| "grad_norm": 0.6796839237213135, | |
| "learning_rate": 3.633338888888889e-05, | |
| "loss": 1.2563, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 0.9517827623278988, | |
| "grad_norm": 0.6742174029350281, | |
| "learning_rate": 3.6277833333333334e-05, | |
| "loss": 1.2548, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 0.9556361338352992, | |
| "grad_norm": 0.6875942349433899, | |
| "learning_rate": 3.622227777777778e-05, | |
| "loss": 1.254, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 0.9594895053426996, | |
| "grad_norm": 0.6719071865081787, | |
| "learning_rate": 3.616672222222222e-05, | |
| "loss": 1.2533, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 0.9633428768501, | |
| "grad_norm": 0.6660245060920715, | |
| "learning_rate": 3.611116666666667e-05, | |
| "loss": 1.2536, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 0.9671962483575004, | |
| "grad_norm": 0.6870962977409363, | |
| "learning_rate": 3.6055611111111117e-05, | |
| "loss": 1.2527, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 0.9710496198649008, | |
| "grad_norm": 0.6905462145805359, | |
| "learning_rate": 3.600005555555556e-05, | |
| "loss": 1.2531, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 0.9749029913723012, | |
| "grad_norm": 0.6646074056625366, | |
| "learning_rate": 3.59445e-05, | |
| "loss": 1.2531, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 0.9787563628797016, | |
| "grad_norm": 0.6946249008178711, | |
| "learning_rate": 3.5888944444444444e-05, | |
| "loss": 1.2515, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 0.982609734387102, | |
| "grad_norm": 0.6882653832435608, | |
| "learning_rate": 3.583338888888889e-05, | |
| "loss": 1.2513, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 0.9864631058945024, | |
| "grad_norm": 0.6676469445228577, | |
| "learning_rate": 3.5777833333333335e-05, | |
| "loss": 1.2507, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 0.9903164774019028, | |
| "grad_norm": 0.6981261968612671, | |
| "learning_rate": 3.572227777777778e-05, | |
| "loss": 1.2504, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 0.9941698489093032, | |
| "grad_norm": 0.6620067358016968, | |
| "learning_rate": 3.5666722222222226e-05, | |
| "loss": 1.2507, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 0.9980232204167035, | |
| "grad_norm": 0.6728119850158691, | |
| "learning_rate": 3.561116666666667e-05, | |
| "loss": 1.249, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 1.001876591924104, | |
| "grad_norm": 0.6715940833091736, | |
| "learning_rate": 3.555561111111112e-05, | |
| "loss": 1.2498, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.0057299634315044, | |
| "grad_norm": 0.6744341254234314, | |
| "learning_rate": 3.5500055555555553e-05, | |
| "loss": 1.2488, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 1.0095833349389047, | |
| "grad_norm": 0.6996214985847473, | |
| "learning_rate": 3.54445e-05, | |
| "loss": 1.2484, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 1.0134367064463052, | |
| "grad_norm": 0.6556364893913269, | |
| "learning_rate": 3.5388944444444445e-05, | |
| "loss": 1.248, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 1.0172900779537055, | |
| "grad_norm": 0.692175567150116, | |
| "learning_rate": 3.5333388888888894e-05, | |
| "loss": 1.2466, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 1.021143449461106, | |
| "grad_norm": 0.6721535921096802, | |
| "learning_rate": 3.5277833333333336e-05, | |
| "loss": 1.2461, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 1.0249968209685063, | |
| "grad_norm": 0.6992902159690857, | |
| "learning_rate": 3.522227777777778e-05, | |
| "loss": 1.2457, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 1.0288501924759068, | |
| "grad_norm": 0.6894251108169556, | |
| "learning_rate": 3.516672222222223e-05, | |
| "loss": 1.246, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 1.032703563983307, | |
| "grad_norm": 0.6852269172668457, | |
| "learning_rate": 3.511116666666667e-05, | |
| "loss": 1.2459, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 1.0365569354907076, | |
| "grad_norm": 0.6719028949737549, | |
| "learning_rate": 3.505561111111111e-05, | |
| "loss": 1.2452, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 1.0404103069981079, | |
| "grad_norm": 0.6796379089355469, | |
| "learning_rate": 3.5000055555555555e-05, | |
| "loss": 1.2439, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.0442636785055084, | |
| "grad_norm": 0.6743236184120178, | |
| "learning_rate": 3.4944500000000004e-05, | |
| "loss": 1.2441, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 1.048117050012909, | |
| "grad_norm": 0.6704487800598145, | |
| "learning_rate": 3.4888944444444446e-05, | |
| "loss": 1.2438, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 1.0519704215203092, | |
| "grad_norm": 0.6776983141899109, | |
| "learning_rate": 3.483338888888889e-05, | |
| "loss": 1.2443, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 1.0558237930277097, | |
| "grad_norm": 0.6701886057853699, | |
| "learning_rate": 3.477783333333334e-05, | |
| "loss": 1.2428, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 1.05967716453511, | |
| "grad_norm": 0.6820278167724609, | |
| "learning_rate": 3.472227777777778e-05, | |
| "loss": 1.2426, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 1.0635305360425105, | |
| "grad_norm": 0.7262411117553711, | |
| "learning_rate": 3.466672222222222e-05, | |
| "loss": 1.2429, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 1.0673839075499107, | |
| "grad_norm": 0.6728771328926086, | |
| "learning_rate": 3.4611166666666665e-05, | |
| "loss": 1.2421, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 1.0712372790573113, | |
| "grad_norm": 0.66309654712677, | |
| "learning_rate": 3.4555611111111114e-05, | |
| "loss": 1.2416, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 1.0750906505647115, | |
| "grad_norm": 0.6764417886734009, | |
| "learning_rate": 3.4500055555555556e-05, | |
| "loss": 1.2413, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 1.078944022072112, | |
| "grad_norm": 0.6755089163780212, | |
| "learning_rate": 3.44445e-05, | |
| "loss": 1.2411, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 1.0827973935795123, | |
| "grad_norm": 0.669450581073761, | |
| "learning_rate": 3.438894444444445e-05, | |
| "loss": 1.2406, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 1.0866507650869128, | |
| "grad_norm": 0.6609264612197876, | |
| "learning_rate": 3.433338888888889e-05, | |
| "loss": 1.2395, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 1.0905041365943131, | |
| "grad_norm": 0.6697176694869995, | |
| "learning_rate": 3.427783333333334e-05, | |
| "loss": 1.2403, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 1.0943575081017136, | |
| "grad_norm": 0.6523563861846924, | |
| "learning_rate": 3.4222277777777775e-05, | |
| "loss": 1.2392, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 1.098210879609114, | |
| "grad_norm": 0.6608708500862122, | |
| "learning_rate": 3.4166722222222224e-05, | |
| "loss": 1.239, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 1.1020642511165144, | |
| "grad_norm": 0.6721755862236023, | |
| "learning_rate": 3.4111166666666666e-05, | |
| "loss": 1.2394, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 1.105917622623915, | |
| "grad_norm": 0.6699149012565613, | |
| "learning_rate": 3.4055611111111115e-05, | |
| "loss": 1.2389, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 1.1097709941313152, | |
| "grad_norm": 0.6876478791236877, | |
| "learning_rate": 3.400005555555556e-05, | |
| "loss": 1.2384, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 1.1136243656387157, | |
| "grad_norm": 0.6746466755867004, | |
| "learning_rate": 3.39445e-05, | |
| "loss": 1.2372, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 1.117477737146116, | |
| "grad_norm": 0.6752446889877319, | |
| "learning_rate": 3.388894444444445e-05, | |
| "loss": 1.2367, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 1.1213311086535165, | |
| "grad_norm": 0.6689814329147339, | |
| "learning_rate": 3.383338888888889e-05, | |
| "loss": 1.2373, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 1.1251844801609168, | |
| "grad_norm": 0.6524012684822083, | |
| "learning_rate": 3.3777833333333334e-05, | |
| "loss": 1.2364, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 1.1290378516683173, | |
| "grad_norm": 0.6835392713546753, | |
| "learning_rate": 3.3722277777777776e-05, | |
| "loss": 1.2367, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 1.1328912231757176, | |
| "grad_norm": 0.663935124874115, | |
| "learning_rate": 3.3666722222222225e-05, | |
| "loss": 1.2354, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 1.136744594683118, | |
| "grad_norm": 0.681470513343811, | |
| "learning_rate": 3.361116666666667e-05, | |
| "loss": 1.2341, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 1.1405979661905183, | |
| "grad_norm": 0.6814187169075012, | |
| "learning_rate": 3.355561111111111e-05, | |
| "loss": 1.2347, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 1.1444513376979188, | |
| "grad_norm": 0.6692870259284973, | |
| "learning_rate": 3.350005555555556e-05, | |
| "loss": 1.234, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 1.1483047092053191, | |
| "grad_norm": 0.6934278011322021, | |
| "learning_rate": 3.34445e-05, | |
| "loss": 1.2349, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 1.1521580807127196, | |
| "grad_norm": 0.6900179982185364, | |
| "learning_rate": 3.338894444444445e-05, | |
| "loss": 1.234, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 1.15601145222012, | |
| "grad_norm": 0.6840701699256897, | |
| "learning_rate": 3.3333388888888886e-05, | |
| "loss": 1.2332, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 1.1598648237275204, | |
| "grad_norm": 0.7003931403160095, | |
| "learning_rate": 3.3277833333333335e-05, | |
| "loss": 1.2329, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 1.1637181952349207, | |
| "grad_norm": 0.6828613877296448, | |
| "learning_rate": 3.322227777777778e-05, | |
| "loss": 1.232, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 1.1675715667423212, | |
| "grad_norm": 0.7264192700386047, | |
| "learning_rate": 3.316672222222223e-05, | |
| "loss": 1.2322, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 1.1714249382497215, | |
| "grad_norm": 0.6868515014648438, | |
| "learning_rate": 3.311116666666667e-05, | |
| "loss": 1.2322, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 1.175278309757122, | |
| "grad_norm": 0.6805739402770996, | |
| "learning_rate": 3.305561111111111e-05, | |
| "loss": 1.2307, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 1.1791316812645225, | |
| "grad_norm": 0.6556283831596375, | |
| "learning_rate": 3.300005555555556e-05, | |
| "loss": 1.2308, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 1.1829850527719228, | |
| "grad_norm": 0.6635182499885559, | |
| "learning_rate": 3.29445e-05, | |
| "loss": 1.2311, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 1.1868384242793233, | |
| "grad_norm": 0.6593520641326904, | |
| "learning_rate": 3.2888944444444445e-05, | |
| "loss": 1.2307, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 1.1906917957867236, | |
| "grad_norm": 0.6719244122505188, | |
| "learning_rate": 3.283338888888889e-05, | |
| "loss": 1.2302, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 1.194545167294124, | |
| "grad_norm": 0.663469135761261, | |
| "learning_rate": 3.2777833333333337e-05, | |
| "loss": 1.2302, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 1.1983985388015244, | |
| "grad_norm": 0.679842472076416, | |
| "learning_rate": 3.272227777777778e-05, | |
| "loss": 1.23, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 1.2022519103089249, | |
| "grad_norm": 0.6602251529693604, | |
| "learning_rate": 3.266672222222222e-05, | |
| "loss": 1.229, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 1.2061052818163251, | |
| "grad_norm": 0.6897211670875549, | |
| "learning_rate": 3.261116666666667e-05, | |
| "loss": 1.2293, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 1.2099586533237257, | |
| "grad_norm": 0.6772252321243286, | |
| "learning_rate": 3.255561111111111e-05, | |
| "loss": 1.2287, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 1.213812024831126, | |
| "grad_norm": 0.6991803646087646, | |
| "learning_rate": 3.250005555555556e-05, | |
| "loss": 1.2289, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 1.2176653963385264, | |
| "grad_norm": 0.6778867840766907, | |
| "learning_rate": 3.24445e-05, | |
| "loss": 1.2273, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 1.2215187678459267, | |
| "grad_norm": 0.6776384115219116, | |
| "learning_rate": 3.2388944444444447e-05, | |
| "loss": 1.2282, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 1.2253721393533272, | |
| "grad_norm": 0.6960573196411133, | |
| "learning_rate": 3.233338888888889e-05, | |
| "loss": 1.227, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 1.2292255108607275, | |
| "grad_norm": 0.682949423789978, | |
| "learning_rate": 3.227783333333334e-05, | |
| "loss": 1.2268, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 1.233078882368128, | |
| "grad_norm": 0.6904979348182678, | |
| "learning_rate": 3.222227777777778e-05, | |
| "loss": 1.2273, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 1.2369322538755283, | |
| "grad_norm": 0.6834551692008972, | |
| "learning_rate": 3.216672222222222e-05, | |
| "loss": 1.2265, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 1.2407856253829288, | |
| "grad_norm": 0.6686312556266785, | |
| "learning_rate": 3.211116666666667e-05, | |
| "loss": 1.2258, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 1.244638996890329, | |
| "grad_norm": 0.6807515025138855, | |
| "learning_rate": 3.2055611111111114e-05, | |
| "loss": 1.2264, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 1.2484923683977296, | |
| "grad_norm": 0.6831598877906799, | |
| "learning_rate": 3.2000055555555556e-05, | |
| "loss": 1.2254, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 1.25234573990513, | |
| "grad_norm": 0.6734605431556702, | |
| "learning_rate": 3.19445e-05, | |
| "loss": 1.2257, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 1.2561991114125304, | |
| "grad_norm": 0.7259578704833984, | |
| "learning_rate": 3.188894444444445e-05, | |
| "loss": 1.2246, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 1.2600524829199307, | |
| "grad_norm": 0.6729893684387207, | |
| "learning_rate": 3.183338888888889e-05, | |
| "loss": 1.2247, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 1.2639058544273312, | |
| "grad_norm": 0.6584126353263855, | |
| "learning_rate": 3.177783333333333e-05, | |
| "loss": 1.2233, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 1.2677592259347317, | |
| "grad_norm": 0.6399083733558655, | |
| "learning_rate": 3.172227777777778e-05, | |
| "loss": 1.224, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 1.271612597442132, | |
| "grad_norm": 0.6504297852516174, | |
| "learning_rate": 3.1666722222222224e-05, | |
| "loss": 1.2235, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 1.2754659689495325, | |
| "grad_norm": 0.6758235692977905, | |
| "learning_rate": 3.1611166666666666e-05, | |
| "loss": 1.2233, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 1.2793193404569327, | |
| "grad_norm": 0.6571764945983887, | |
| "learning_rate": 3.155561111111111e-05, | |
| "loss": 1.2225, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 1.2831727119643332, | |
| "grad_norm": 0.6841081976890564, | |
| "learning_rate": 3.150005555555556e-05, | |
| "loss": 1.2234, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 1.2870260834717335, | |
| "grad_norm": 0.6634018421173096, | |
| "learning_rate": 3.14445e-05, | |
| "loss": 1.2223, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 1.290879454979134, | |
| "grad_norm": 0.693499743938446, | |
| "learning_rate": 3.138894444444444e-05, | |
| "loss": 1.223, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 1.2947328264865343, | |
| "grad_norm": 0.6660721898078918, | |
| "learning_rate": 3.133338888888889e-05, | |
| "loss": 1.2216, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 1.2985861979939348, | |
| "grad_norm": 0.6657261252403259, | |
| "learning_rate": 3.1277833333333334e-05, | |
| "loss": 1.2209, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 1.302439569501335, | |
| "grad_norm": 0.6683467626571655, | |
| "learning_rate": 3.122227777777778e-05, | |
| "loss": 1.2214, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 1.3062929410087356, | |
| "grad_norm": 0.6729023456573486, | |
| "learning_rate": 3.1166722222222225e-05, | |
| "loss": 1.2207, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 1.3101463125161361, | |
| "grad_norm": 0.6579126715660095, | |
| "learning_rate": 3.111116666666667e-05, | |
| "loss": 1.2208, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 1.3139996840235364, | |
| "grad_norm": 0.7024135589599609, | |
| "learning_rate": 3.105561111111111e-05, | |
| "loss": 1.2197, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 1.3178530555309367, | |
| "grad_norm": 0.6785723567008972, | |
| "learning_rate": 3.100005555555556e-05, | |
| "loss": 1.2197, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 1.3217064270383372, | |
| "grad_norm": 0.6838181018829346, | |
| "learning_rate": 3.09445e-05, | |
| "loss": 1.2196, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 1.3255597985457377, | |
| "grad_norm": 0.663948655128479, | |
| "learning_rate": 3.0888944444444444e-05, | |
| "loss": 1.2201, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 1.329413170053138, | |
| "grad_norm": 0.6804963946342468, | |
| "learning_rate": 3.083338888888889e-05, | |
| "loss": 1.2183, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 1.3332665415605385, | |
| "grad_norm": 0.6698565483093262, | |
| "learning_rate": 3.0777833333333335e-05, | |
| "loss": 1.2195, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 1.3371199130679388, | |
| "grad_norm": 0.66984623670578, | |
| "learning_rate": 3.072227777777778e-05, | |
| "loss": 1.2177, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 1.3409732845753393, | |
| "grad_norm": 0.6739431023597717, | |
| "learning_rate": 3.066672222222222e-05, | |
| "loss": 1.2187, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 1.3448266560827395, | |
| "grad_norm": 0.6695194244384766, | |
| "learning_rate": 3.061116666666667e-05, | |
| "loss": 1.2172, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 1.34868002759014, | |
| "grad_norm": 0.669137179851532, | |
| "learning_rate": 3.055561111111111e-05, | |
| "loss": 1.218, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 1.3525333990975403, | |
| "grad_norm": 0.6813784241676331, | |
| "learning_rate": 3.0500055555555557e-05, | |
| "loss": 1.218, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 1.3563867706049408, | |
| "grad_norm": 0.6733546853065491, | |
| "learning_rate": 3.0444500000000003e-05, | |
| "loss": 1.2168, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 1.3602401421123411, | |
| "grad_norm": 0.6998477578163147, | |
| "learning_rate": 3.0388944444444445e-05, | |
| "loss": 1.2172, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 1.3640935136197416, | |
| "grad_norm": 0.6859351396560669, | |
| "learning_rate": 3.033338888888889e-05, | |
| "loss": 1.2162, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 1.3679468851271421, | |
| "grad_norm": 0.6735222339630127, | |
| "learning_rate": 3.0277833333333333e-05, | |
| "loss": 1.2165, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 1.3718002566345424, | |
| "grad_norm": 0.6935626864433289, | |
| "learning_rate": 3.022227777777778e-05, | |
| "loss": 1.2164, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 1.3756536281419427, | |
| "grad_norm": 0.695367693901062, | |
| "learning_rate": 3.016672222222222e-05, | |
| "loss": 1.2157, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 1.3795069996493432, | |
| "grad_norm": 0.6997620463371277, | |
| "learning_rate": 3.0111166666666667e-05, | |
| "loss": 1.2151, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 1.3833603711567437, | |
| "grad_norm": 0.6904904246330261, | |
| "learning_rate": 3.0055611111111116e-05, | |
| "loss": 1.2156, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 1.387213742664144, | |
| "grad_norm": 0.6987177729606628, | |
| "learning_rate": 3.0000055555555555e-05, | |
| "loss": 1.2155, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 1.3910671141715443, | |
| "grad_norm": 0.6865441203117371, | |
| "learning_rate": 2.9944500000000004e-05, | |
| "loss": 1.2139, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 1.3949204856789448, | |
| "grad_norm": 0.6597044467926025, | |
| "learning_rate": 2.9888944444444443e-05, | |
| "loss": 1.2149, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 1.3987738571863453, | |
| "grad_norm": 0.6782757639884949, | |
| "learning_rate": 2.9833388888888892e-05, | |
| "loss": 1.2141, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 1.4026272286937456, | |
| "grad_norm": 0.6776473522186279, | |
| "learning_rate": 2.977783333333333e-05, | |
| "loss": 1.2139, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 1.406480600201146, | |
| "grad_norm": 0.6748208403587341, | |
| "learning_rate": 2.972227777777778e-05, | |
| "loss": 1.2126, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 1.4103339717085464, | |
| "grad_norm": 0.6814528107643127, | |
| "learning_rate": 2.9666722222222226e-05, | |
| "loss": 1.2133, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 1.4141873432159469, | |
| "grad_norm": 0.6742803454399109, | |
| "learning_rate": 2.961116666666667e-05, | |
| "loss": 1.2131, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 1.4180407147233471, | |
| "grad_norm": 0.6685371398925781, | |
| "learning_rate": 2.9555611111111114e-05, | |
| "loss": 1.2132, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 1.4218940862307476, | |
| "grad_norm": 0.6587190628051758, | |
| "learning_rate": 2.9500055555555557e-05, | |
| "loss": 1.2126, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 1.425747457738148, | |
| "grad_norm": 0.7063204050064087, | |
| "learning_rate": 2.9444500000000002e-05, | |
| "loss": 1.2133, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 1.4296008292455484, | |
| "grad_norm": 0.6783314943313599, | |
| "learning_rate": 2.9388944444444445e-05, | |
| "loss": 1.2114, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 1.4334542007529487, | |
| "grad_norm": 0.6613739132881165, | |
| "learning_rate": 2.933338888888889e-05, | |
| "loss": 1.2109, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 1.4373075722603492, | |
| "grad_norm": 0.7002771496772766, | |
| "learning_rate": 2.9277833333333333e-05, | |
| "loss": 1.2115, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 1.4411609437677497, | |
| "grad_norm": 0.69558185338974, | |
| "learning_rate": 2.922227777777778e-05, | |
| "loss": 1.2112, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 1.44501431527515, | |
| "grad_norm": 0.6645965576171875, | |
| "learning_rate": 2.9166722222222224e-05, | |
| "loss": 1.2103, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 1.4488676867825503, | |
| "grad_norm": 0.6624684929847717, | |
| "learning_rate": 2.9111166666666667e-05, | |
| "loss": 1.2096, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 1.4527210582899508, | |
| "grad_norm": 0.6573096513748169, | |
| "learning_rate": 2.9055611111111112e-05, | |
| "loss": 1.2102, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 1.4565744297973513, | |
| "grad_norm": 0.6568763852119446, | |
| "learning_rate": 2.9000055555555555e-05, | |
| "loss": 1.2109, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 1.4604278013047516, | |
| "grad_norm": 0.6698375940322876, | |
| "learning_rate": 2.8944500000000004e-05, | |
| "loss": 1.2102, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 1.4642811728121519, | |
| "grad_norm": 0.6893269419670105, | |
| "learning_rate": 2.8888944444444443e-05, | |
| "loss": 1.2096, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 1.4681345443195524, | |
| "grad_norm": 0.6947731375694275, | |
| "learning_rate": 2.8833388888888892e-05, | |
| "loss": 1.2089, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 1.4719879158269529, | |
| "grad_norm": 0.6623468399047852, | |
| "learning_rate": 2.8777833333333338e-05, | |
| "loss": 1.2093, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 1.4758412873343532, | |
| "grad_norm": 0.6745172739028931, | |
| "learning_rate": 2.872227777777778e-05, | |
| "loss": 1.2092, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 1.4796946588417537, | |
| "grad_norm": 0.6809899806976318, | |
| "learning_rate": 2.8666722222222226e-05, | |
| "loss": 1.2083, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 1.483548030349154, | |
| "grad_norm": 0.6865934729576111, | |
| "learning_rate": 2.8611166666666668e-05, | |
| "loss": 1.2088, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 1.4874014018565545, | |
| "grad_norm": 0.6749284863471985, | |
| "learning_rate": 2.8555611111111114e-05, | |
| "loss": 1.2085, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 1.4912547733639547, | |
| "grad_norm": 0.6682766079902649, | |
| "learning_rate": 2.8500055555555556e-05, | |
| "loss": 1.2075, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 1.4951081448713552, | |
| "grad_norm": 0.7050167322158813, | |
| "learning_rate": 2.8444500000000002e-05, | |
| "loss": 1.208, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 1.4989615163787555, | |
| "grad_norm": 0.6618677377700806, | |
| "learning_rate": 2.8388944444444448e-05, | |
| "loss": 1.2073, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 1.502814887886156, | |
| "grad_norm": 0.6752711534500122, | |
| "learning_rate": 2.833338888888889e-05, | |
| "loss": 1.2071, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 1.5066682593935563, | |
| "grad_norm": 0.6783143877983093, | |
| "learning_rate": 2.8277833333333336e-05, | |
| "loss": 1.207, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 1.5105216309009568, | |
| "grad_norm": 0.6858145594596863, | |
| "learning_rate": 2.8222277777777778e-05, | |
| "loss": 1.206, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 1.5143750024083573, | |
| "grad_norm": 0.6630164384841919, | |
| "learning_rate": 2.8166722222222224e-05, | |
| "loss": 1.2066, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 1.5182283739157576, | |
| "grad_norm": 0.6869551539421082, | |
| "learning_rate": 2.8111166666666666e-05, | |
| "loss": 1.2065, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 1.5220817454231579, | |
| "grad_norm": 0.6730819344520569, | |
| "learning_rate": 2.8055611111111112e-05, | |
| "loss": 1.2053, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 1.5259351169305584, | |
| "grad_norm": 0.6799289584159851, | |
| "learning_rate": 2.8000055555555554e-05, | |
| "loss": 1.2053, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 1.529788488437959, | |
| "grad_norm": 0.6745020747184753, | |
| "learning_rate": 2.79445e-05, | |
| "loss": 1.2053, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 1.5336418599453592, | |
| "grad_norm": 0.6658075451850891, | |
| "learning_rate": 2.788894444444445e-05, | |
| "loss": 1.2051, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 1.5374952314527595, | |
| "grad_norm": 0.685326874256134, | |
| "learning_rate": 2.7833388888888888e-05, | |
| "loss": 1.2047, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 1.54134860296016, | |
| "grad_norm": 0.6764355301856995, | |
| "learning_rate": 2.7777833333333337e-05, | |
| "loss": 1.2051, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 1.5452019744675605, | |
| "grad_norm": 0.6723695993423462, | |
| "learning_rate": 2.7722277777777776e-05, | |
| "loss": 1.2047, | |
| "step": 401000 | |
| }, | |
| { | |
| "epoch": 1.5490553459749608, | |
| "grad_norm": 0.6721011996269226, | |
| "learning_rate": 2.7666722222222225e-05, | |
| "loss": 1.2035, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 1.552908717482361, | |
| "grad_norm": 0.6737053394317627, | |
| "learning_rate": 2.7611166666666664e-05, | |
| "loss": 1.2047, | |
| "step": 403000 | |
| }, | |
| { | |
| "epoch": 1.5567620889897615, | |
| "grad_norm": 0.6559922695159912, | |
| "learning_rate": 2.7555611111111113e-05, | |
| "loss": 1.2037, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 1.560615460497162, | |
| "grad_norm": 0.6571487188339233, | |
| "learning_rate": 2.750005555555556e-05, | |
| "loss": 1.2034, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 1.5644688320045623, | |
| "grad_norm": 0.6738882660865784, | |
| "learning_rate": 2.74445e-05, | |
| "loss": 1.203, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 1.5683222035119628, | |
| "grad_norm": 0.6922580599784851, | |
| "learning_rate": 2.7388944444444447e-05, | |
| "loss": 1.2028, | |
| "step": 407000 | |
| }, | |
| { | |
| "epoch": 1.5721755750193633, | |
| "grad_norm": 0.6696702837944031, | |
| "learning_rate": 2.733338888888889e-05, | |
| "loss": 1.2021, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 1.5760289465267636, | |
| "grad_norm": 0.688118577003479, | |
| "learning_rate": 2.7277833333333335e-05, | |
| "loss": 1.203, | |
| "step": 409000 | |
| }, | |
| { | |
| "epoch": 1.579882318034164, | |
| "grad_norm": 0.6660063862800598, | |
| "learning_rate": 2.7222277777777777e-05, | |
| "loss": 1.2018, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 1.5837356895415644, | |
| "grad_norm": 0.7178686857223511, | |
| "learning_rate": 2.7166722222222223e-05, | |
| "loss": 1.2018, | |
| "step": 411000 | |
| }, | |
| { | |
| "epoch": 1.587589061048965, | |
| "grad_norm": 0.7126618027687073, | |
| "learning_rate": 2.7111166666666665e-05, | |
| "loss": 1.2019, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 1.5914424325563652, | |
| "grad_norm": 0.7018870711326599, | |
| "learning_rate": 2.705561111111111e-05, | |
| "loss": 1.2018, | |
| "step": 413000 | |
| }, | |
| { | |
| "epoch": 1.5952958040637655, | |
| "grad_norm": 0.6731059551239014, | |
| "learning_rate": 2.700005555555556e-05, | |
| "loss": 1.2012, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 1.599149175571166, | |
| "grad_norm": 0.6750038862228394, | |
| "learning_rate": 2.69445e-05, | |
| "loss": 1.1995, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 1.6030025470785665, | |
| "grad_norm": 0.661834180355072, | |
| "learning_rate": 2.688894444444445e-05, | |
| "loss": 1.1999, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 1.6068559185859668, | |
| "grad_norm": 0.6862068176269531, | |
| "learning_rate": 2.6833388888888887e-05, | |
| "loss": 1.2, | |
| "step": 417000 | |
| }, | |
| { | |
| "epoch": 1.610709290093367, | |
| "grad_norm": 0.6672124862670898, | |
| "learning_rate": 2.6777833333333336e-05, | |
| "loss": 1.1995, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 1.6145626616007676, | |
| "grad_norm": 0.6680454015731812, | |
| "learning_rate": 2.6722277777777775e-05, | |
| "loss": 1.2003, | |
| "step": 419000 | |
| }, | |
| { | |
| "epoch": 1.618416033108168, | |
| "grad_norm": 0.6725075244903564, | |
| "learning_rate": 2.6666722222222225e-05, | |
| "loss": 1.2002, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 1.6222694046155683, | |
| "grad_norm": 0.6725237369537354, | |
| "learning_rate": 2.661116666666667e-05, | |
| "loss": 1.1989, | |
| "step": 421000 | |
| }, | |
| { | |
| "epoch": 1.6261227761229686, | |
| "grad_norm": 0.6713998317718506, | |
| "learning_rate": 2.6555611111111113e-05, | |
| "loss": 1.1992, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 1.6299761476303694, | |
| "grad_norm": 0.6976920366287231, | |
| "learning_rate": 2.650005555555556e-05, | |
| "loss": 1.1996, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 1.6338295191377696, | |
| "grad_norm": 0.6603657007217407, | |
| "learning_rate": 2.64445e-05, | |
| "loss": 1.1988, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 1.63768289064517, | |
| "grad_norm": 0.6840860843658447, | |
| "learning_rate": 2.6388944444444446e-05, | |
| "loss": 1.1986, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 1.6415362621525704, | |
| "grad_norm": 0.6827540397644043, | |
| "learning_rate": 2.633338888888889e-05, | |
| "loss": 1.1985, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 1.645389633659971, | |
| "grad_norm": 0.6934226155281067, | |
| "learning_rate": 2.6277833333333334e-05, | |
| "loss": 1.1986, | |
| "step": 427000 | |
| }, | |
| { | |
| "epoch": 1.6492430051673712, | |
| "grad_norm": 0.6878825426101685, | |
| "learning_rate": 2.622227777777778e-05, | |
| "loss": 1.1977, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 1.6530963766747715, | |
| "grad_norm": 0.6928458213806152, | |
| "learning_rate": 2.6166722222222223e-05, | |
| "loss": 1.1979, | |
| "step": 429000 | |
| }, | |
| { | |
| "epoch": 1.656949748182172, | |
| "grad_norm": 0.6635681986808777, | |
| "learning_rate": 2.6111166666666668e-05, | |
| "loss": 1.1974, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 1.6608031196895725, | |
| "grad_norm": 0.753787636756897, | |
| "learning_rate": 2.605561111111111e-05, | |
| "loss": 1.1975, | |
| "step": 431000 | |
| }, | |
| { | |
| "epoch": 1.6646564911969728, | |
| "grad_norm": 0.6675045490264893, | |
| "learning_rate": 2.600005555555556e-05, | |
| "loss": 1.1972, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 1.668509862704373, | |
| "grad_norm": 0.6703343987464905, | |
| "learning_rate": 2.59445e-05, | |
| "loss": 1.1969, | |
| "step": 433000 | |
| }, | |
| { | |
| "epoch": 1.6723632342117736, | |
| "grad_norm": 0.6867698431015015, | |
| "learning_rate": 2.5888944444444448e-05, | |
| "loss": 1.1966, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 1.676216605719174, | |
| "grad_norm": 0.6581012606620789, | |
| "learning_rate": 2.5833388888888887e-05, | |
| "loss": 1.1966, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 1.6800699772265744, | |
| "grad_norm": 0.7066845297813416, | |
| "learning_rate": 2.5777833333333336e-05, | |
| "loss": 1.197, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 1.6839233487339746, | |
| "grad_norm": 0.7115961313247681, | |
| "learning_rate": 2.572227777777778e-05, | |
| "loss": 1.196, | |
| "step": 437000 | |
| }, | |
| { | |
| "epoch": 1.6877767202413752, | |
| "grad_norm": 0.7107385993003845, | |
| "learning_rate": 2.5666722222222224e-05, | |
| "loss": 1.1959, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 1.6916300917487757, | |
| "grad_norm": 0.6925193667411804, | |
| "learning_rate": 2.561116666666667e-05, | |
| "loss": 1.1956, | |
| "step": 439000 | |
| }, | |
| { | |
| "epoch": 1.695483463256176, | |
| "grad_norm": 0.6661742329597473, | |
| "learning_rate": 2.5555611111111112e-05, | |
| "loss": 1.1951, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 1.6993368347635764, | |
| "grad_norm": 0.6755145788192749, | |
| "learning_rate": 2.5500055555555558e-05, | |
| "loss": 1.1951, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 1.703190206270977, | |
| "grad_norm": 0.7076007127761841, | |
| "learning_rate": 2.54445e-05, | |
| "loss": 1.1953, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 1.7070435777783772, | |
| "grad_norm": 0.6796631813049316, | |
| "learning_rate": 2.5388944444444446e-05, | |
| "loss": 1.1945, | |
| "step": 443000 | |
| }, | |
| { | |
| "epoch": 1.7108969492857775, | |
| "grad_norm": 0.6803722381591797, | |
| "learning_rate": 2.533338888888889e-05, | |
| "loss": 1.1947, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 1.714750320793178, | |
| "grad_norm": 0.6730965971946716, | |
| "learning_rate": 2.5277833333333334e-05, | |
| "loss": 1.1942, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 1.7186036923005785, | |
| "grad_norm": 0.6800934672355652, | |
| "learning_rate": 2.522227777777778e-05, | |
| "loss": 1.1941, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 1.7224570638079788, | |
| "grad_norm": 0.6878598928451538, | |
| "learning_rate": 2.5166722222222222e-05, | |
| "loss": 1.1941, | |
| "step": 447000 | |
| }, | |
| { | |
| "epoch": 1.726310435315379, | |
| "grad_norm": 0.6674512624740601, | |
| "learning_rate": 2.5111166666666668e-05, | |
| "loss": 1.1935, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 1.7301638068227796, | |
| "grad_norm": 0.6966185569763184, | |
| "learning_rate": 2.505561111111111e-05, | |
| "loss": 1.1936, | |
| "step": 449000 | |
| }, | |
| { | |
| "epoch": 1.73401717833018, | |
| "grad_norm": 0.708171546459198, | |
| "learning_rate": 2.5000055555555556e-05, | |
| "loss": 1.1932, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 1.7378705498375804, | |
| "grad_norm": 0.6620480418205261, | |
| "learning_rate": 2.49445e-05, | |
| "loss": 1.1929, | |
| "step": 451000 | |
| }, | |
| { | |
| "epoch": 1.7417239213449807, | |
| "grad_norm": 0.6783220767974854, | |
| "learning_rate": 2.4888944444444444e-05, | |
| "loss": 1.1934, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 1.7455772928523812, | |
| "grad_norm": 0.7003952264785767, | |
| "learning_rate": 2.483338888888889e-05, | |
| "loss": 1.1926, | |
| "step": 453000 | |
| }, | |
| { | |
| "epoch": 1.7494306643597817, | |
| "grad_norm": 0.6729221343994141, | |
| "learning_rate": 2.4777833333333332e-05, | |
| "loss": 1.1925, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 1.753284035867182, | |
| "grad_norm": 0.7178687453269958, | |
| "learning_rate": 2.472227777777778e-05, | |
| "loss": 1.193, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 1.7571374073745822, | |
| "grad_norm": 0.6935052275657654, | |
| "learning_rate": 2.4666722222222223e-05, | |
| "loss": 1.1921, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 1.7609907788819827, | |
| "grad_norm": 0.6983472108840942, | |
| "learning_rate": 2.461116666666667e-05, | |
| "loss": 1.1922, | |
| "step": 457000 | |
| }, | |
| { | |
| "epoch": 1.7648441503893832, | |
| "grad_norm": 0.6847233176231384, | |
| "learning_rate": 2.455561111111111e-05, | |
| "loss": 1.192, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 1.7686975218967835, | |
| "grad_norm": 0.6784983277320862, | |
| "learning_rate": 2.4500055555555557e-05, | |
| "loss": 1.1914, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 1.772550893404184, | |
| "grad_norm": 0.6867578625679016, | |
| "learning_rate": 2.44445e-05, | |
| "loss": 1.1916, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 1.7764042649115845, | |
| "grad_norm": 0.681760311126709, | |
| "learning_rate": 2.4388944444444445e-05, | |
| "loss": 1.1909, | |
| "step": 461000 | |
| }, | |
| { | |
| "epoch": 1.7802576364189848, | |
| "grad_norm": 0.6820582151412964, | |
| "learning_rate": 2.4333388888888888e-05, | |
| "loss": 1.1913, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 1.784111007926385, | |
| "grad_norm": 0.6804444193840027, | |
| "learning_rate": 2.4277833333333337e-05, | |
| "loss": 1.1905, | |
| "step": 463000 | |
| }, | |
| { | |
| "epoch": 1.7879643794337856, | |
| "grad_norm": 0.6828032732009888, | |
| "learning_rate": 2.422227777777778e-05, | |
| "loss": 1.1906, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 1.7918177509411861, | |
| "grad_norm": 0.675101101398468, | |
| "learning_rate": 2.4166722222222225e-05, | |
| "loss": 1.1902, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 1.7956711224485864, | |
| "grad_norm": 0.6670296788215637, | |
| "learning_rate": 2.4111166666666667e-05, | |
| "loss": 1.1902, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 1.7995244939559867, | |
| "grad_norm": 0.730213463306427, | |
| "learning_rate": 2.4055611111111113e-05, | |
| "loss": 1.1903, | |
| "step": 467000 | |
| }, | |
| { | |
| "epoch": 1.8033778654633872, | |
| "grad_norm": 0.728732705116272, | |
| "learning_rate": 2.4000055555555555e-05, | |
| "loss": 1.1898, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 1.8072312369707877, | |
| "grad_norm": 0.7038969993591309, | |
| "learning_rate": 2.39445e-05, | |
| "loss": 1.1901, | |
| "step": 469000 | |
| }, | |
| { | |
| "epoch": 1.811084608478188, | |
| "grad_norm": 0.7080554366111755, | |
| "learning_rate": 2.3888944444444443e-05, | |
| "loss": 1.1902, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 1.8149379799855883, | |
| "grad_norm": 0.6914920806884766, | |
| "learning_rate": 2.3833388888888892e-05, | |
| "loss": 1.1893, | |
| "step": 471000 | |
| }, | |
| { | |
| "epoch": 1.8187913514929888, | |
| "grad_norm": 0.6858305931091309, | |
| "learning_rate": 2.3777833333333335e-05, | |
| "loss": 1.1886, | |
| "step": 472000 | |
| }, | |
| { | |
| "epoch": 1.8226447230003893, | |
| "grad_norm": 0.7036804556846619, | |
| "learning_rate": 2.372227777777778e-05, | |
| "loss": 1.1891, | |
| "step": 473000 | |
| }, | |
| { | |
| "epoch": 1.8264980945077895, | |
| "grad_norm": 0.7008316516876221, | |
| "learning_rate": 2.3666722222222223e-05, | |
| "loss": 1.1883, | |
| "step": 474000 | |
| }, | |
| { | |
| "epoch": 1.8303514660151898, | |
| "grad_norm": 0.7048190236091614, | |
| "learning_rate": 2.361116666666667e-05, | |
| "loss": 1.189, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 1.8342048375225903, | |
| "grad_norm": 0.7033438086509705, | |
| "learning_rate": 2.355561111111111e-05, | |
| "loss": 1.1871, | |
| "step": 476000 | |
| }, | |
| { | |
| "epoch": 1.8380582090299908, | |
| "grad_norm": 0.7042247653007507, | |
| "learning_rate": 2.3500055555555557e-05, | |
| "loss": 1.1878, | |
| "step": 477000 | |
| }, | |
| { | |
| "epoch": 1.8419115805373911, | |
| "grad_norm": 0.7107093334197998, | |
| "learning_rate": 2.34445e-05, | |
| "loss": 1.1877, | |
| "step": 478000 | |
| }, | |
| { | |
| "epoch": 1.8457649520447916, | |
| "grad_norm": 0.6645656228065491, | |
| "learning_rate": 2.3388944444444448e-05, | |
| "loss": 1.1872, | |
| "step": 479000 | |
| }, | |
| { | |
| "epoch": 1.8496183235521921, | |
| "grad_norm": 0.676702618598938, | |
| "learning_rate": 2.333338888888889e-05, | |
| "loss": 1.1867, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 1.8534716950595924, | |
| "grad_norm": 0.7022708654403687, | |
| "learning_rate": 2.3277833333333336e-05, | |
| "loss": 1.1878, | |
| "step": 481000 | |
| }, | |
| { | |
| "epoch": 1.8573250665669927, | |
| "grad_norm": 0.6802497506141663, | |
| "learning_rate": 2.322227777777778e-05, | |
| "loss": 1.1866, | |
| "step": 482000 | |
| }, | |
| { | |
| "epoch": 1.8611784380743932, | |
| "grad_norm": 0.6969447135925293, | |
| "learning_rate": 2.3166722222222224e-05, | |
| "loss": 1.187, | |
| "step": 483000 | |
| }, | |
| { | |
| "epoch": 1.8650318095817937, | |
| "grad_norm": 0.6957907676696777, | |
| "learning_rate": 2.3111166666666666e-05, | |
| "loss": 1.1869, | |
| "step": 484000 | |
| }, | |
| { | |
| "epoch": 1.868885181089194, | |
| "grad_norm": 0.70342618227005, | |
| "learning_rate": 2.3055611111111112e-05, | |
| "loss": 1.1861, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 1.8727385525965943, | |
| "grad_norm": 0.6961056590080261, | |
| "learning_rate": 2.3000055555555558e-05, | |
| "loss": 1.1852, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 1.8765919241039948, | |
| "grad_norm": 0.6739860773086548, | |
| "learning_rate": 2.2944500000000004e-05, | |
| "loss": 1.1856, | |
| "step": 487000 | |
| }, | |
| { | |
| "epoch": 1.8804452956113953, | |
| "grad_norm": 0.7163240909576416, | |
| "learning_rate": 2.2888944444444446e-05, | |
| "loss": 1.1852, | |
| "step": 488000 | |
| }, | |
| { | |
| "epoch": 1.8842986671187956, | |
| "grad_norm": 0.6964929103851318, | |
| "learning_rate": 2.2833388888888892e-05, | |
| "loss": 1.1853, | |
| "step": 489000 | |
| }, | |
| { | |
| "epoch": 1.8881520386261958, | |
| "grad_norm": 0.6857479214668274, | |
| "learning_rate": 2.2777833333333334e-05, | |
| "loss": 1.1858, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 1.8920054101335964, | |
| "grad_norm": 0.6846323013305664, | |
| "learning_rate": 2.272227777777778e-05, | |
| "loss": 1.185, | |
| "step": 491000 | |
| }, | |
| { | |
| "epoch": 1.8958587816409969, | |
| "grad_norm": 0.6962786316871643, | |
| "learning_rate": 2.2666722222222222e-05, | |
| "loss": 1.1845, | |
| "step": 492000 | |
| }, | |
| { | |
| "epoch": 1.8997121531483971, | |
| "grad_norm": 0.7047570943832397, | |
| "learning_rate": 2.2611166666666668e-05, | |
| "loss": 1.185, | |
| "step": 493000 | |
| }, | |
| { | |
| "epoch": 1.9035655246557974, | |
| "grad_norm": 0.699938952922821, | |
| "learning_rate": 2.2555611111111114e-05, | |
| "loss": 1.1845, | |
| "step": 494000 | |
| }, | |
| { | |
| "epoch": 1.9074188961631982, | |
| "grad_norm": 0.6817954182624817, | |
| "learning_rate": 2.2500055555555556e-05, | |
| "loss": 1.1844, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 1.9112722676705984, | |
| "grad_norm": 0.7055638432502747, | |
| "learning_rate": 2.2444500000000002e-05, | |
| "loss": 1.1848, | |
| "step": 496000 | |
| }, | |
| { | |
| "epoch": 1.9151256391779987, | |
| "grad_norm": 0.688340425491333, | |
| "learning_rate": 2.2388944444444444e-05, | |
| "loss": 1.1842, | |
| "step": 497000 | |
| }, | |
| { | |
| "epoch": 1.9189790106853992, | |
| "grad_norm": 0.667679488658905, | |
| "learning_rate": 2.233338888888889e-05, | |
| "loss": 1.1832, | |
| "step": 498000 | |
| }, | |
| { | |
| "epoch": 1.9228323821927997, | |
| "grad_norm": 0.7045871019363403, | |
| "learning_rate": 2.2277833333333335e-05, | |
| "loss": 1.1823, | |
| "step": 499000 | |
| }, | |
| { | |
| "epoch": 1.9266857537002, | |
| "grad_norm": 0.7178110480308533, | |
| "learning_rate": 2.2222277777777778e-05, | |
| "loss": 1.1834, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 1.9305391252076003, | |
| "grad_norm": 0.7002114057540894, | |
| "learning_rate": 2.2166722222222224e-05, | |
| "loss": 1.1842, | |
| "step": 501000 | |
| }, | |
| { | |
| "epoch": 1.9343924967150008, | |
| "grad_norm": 0.6976704001426697, | |
| "learning_rate": 2.211116666666667e-05, | |
| "loss": 1.1827, | |
| "step": 502000 | |
| }, | |
| { | |
| "epoch": 1.9382458682224013, | |
| "grad_norm": 0.6923725008964539, | |
| "learning_rate": 2.205561111111111e-05, | |
| "loss": 1.1828, | |
| "step": 503000 | |
| }, | |
| { | |
| "epoch": 1.9420992397298016, | |
| "grad_norm": 0.6811366081237793, | |
| "learning_rate": 2.2000055555555557e-05, | |
| "loss": 1.1831, | |
| "step": 504000 | |
| }, | |
| { | |
| "epoch": 1.9459526112372019, | |
| "grad_norm": 0.6838697195053101, | |
| "learning_rate": 2.19445e-05, | |
| "loss": 1.1835, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 1.9498059827446024, | |
| "grad_norm": 0.7210490703582764, | |
| "learning_rate": 2.1888944444444445e-05, | |
| "loss": 1.1827, | |
| "step": 506000 | |
| }, | |
| { | |
| "epoch": 1.9536593542520029, | |
| "grad_norm": 0.7091962695121765, | |
| "learning_rate": 2.1833388888888888e-05, | |
| "loss": 1.1826, | |
| "step": 507000 | |
| }, | |
| { | |
| "epoch": 1.9575127257594032, | |
| "grad_norm": 0.7018596529960632, | |
| "learning_rate": 2.1777833333333334e-05, | |
| "loss": 1.1817, | |
| "step": 508000 | |
| }, | |
| { | |
| "epoch": 1.9613660972668034, | |
| "grad_norm": 0.6678944826126099, | |
| "learning_rate": 2.1722277777777776e-05, | |
| "loss": 1.1828, | |
| "step": 509000 | |
| }, | |
| { | |
| "epoch": 1.965219468774204, | |
| "grad_norm": 0.7078109979629517, | |
| "learning_rate": 2.1666722222222225e-05, | |
| "loss": 1.1817, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 1.9690728402816045, | |
| "grad_norm": 0.7082613110542297, | |
| "learning_rate": 2.1611166666666667e-05, | |
| "loss": 1.1816, | |
| "step": 511000 | |
| }, | |
| { | |
| "epoch": 1.9729262117890047, | |
| "grad_norm": 0.692307710647583, | |
| "learning_rate": 2.1555611111111113e-05, | |
| "loss": 1.1818, | |
| "step": 512000 | |
| }, | |
| { | |
| "epoch": 1.9767795832964052, | |
| "grad_norm": 0.6972088813781738, | |
| "learning_rate": 2.1500055555555555e-05, | |
| "loss": 1.1817, | |
| "step": 513000 | |
| }, | |
| { | |
| "epoch": 1.9806329548038057, | |
| "grad_norm": 0.7020753622055054, | |
| "learning_rate": 2.14445e-05, | |
| "loss": 1.1815, | |
| "step": 514000 | |
| }, | |
| { | |
| "epoch": 1.984486326311206, | |
| "grad_norm": 0.7120492458343506, | |
| "learning_rate": 2.1388944444444443e-05, | |
| "loss": 1.1814, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 1.9883396978186063, | |
| "grad_norm": 0.6771794557571411, | |
| "learning_rate": 2.133338888888889e-05, | |
| "loss": 1.1813, | |
| "step": 516000 | |
| }, | |
| { | |
| "epoch": 1.9921930693260068, | |
| "grad_norm": 0.6771290898323059, | |
| "learning_rate": 2.1277833333333335e-05, | |
| "loss": 1.1799, | |
| "step": 517000 | |
| }, | |
| { | |
| "epoch": 1.9960464408334073, | |
| "grad_norm": 0.694908082485199, | |
| "learning_rate": 2.122227777777778e-05, | |
| "loss": 1.1807, | |
| "step": 518000 | |
| }, | |
| { | |
| "epoch": 1.9998998123408076, | |
| "grad_norm": 0.6758216023445129, | |
| "learning_rate": 2.1166722222222223e-05, | |
| "loss": 1.1808, | |
| "step": 519000 | |
| }, | |
| { | |
| "epoch": 2.003753183848208, | |
| "grad_norm": 0.6828746795654297, | |
| "learning_rate": 2.111116666666667e-05, | |
| "loss": 1.1799, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 2.007606555355608, | |
| "grad_norm": 0.7079645991325378, | |
| "learning_rate": 2.105561111111111e-05, | |
| "loss": 1.1788, | |
| "step": 521000 | |
| }, | |
| { | |
| "epoch": 2.011459926863009, | |
| "grad_norm": 0.7006625533103943, | |
| "learning_rate": 2.1000055555555557e-05, | |
| "loss": 1.1796, | |
| "step": 522000 | |
| }, | |
| { | |
| "epoch": 2.015313298370409, | |
| "grad_norm": 0.7358622550964355, | |
| "learning_rate": 2.09445e-05, | |
| "loss": 1.1794, | |
| "step": 523000 | |
| }, | |
| { | |
| "epoch": 2.0191666698778095, | |
| "grad_norm": 0.7054480910301208, | |
| "learning_rate": 2.0888944444444445e-05, | |
| "loss": 1.1797, | |
| "step": 524000 | |
| }, | |
| { | |
| "epoch": 2.02302004138521, | |
| "grad_norm": 0.6997362971305847, | |
| "learning_rate": 2.083338888888889e-05, | |
| "loss": 1.1794, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 2.0268734128926105, | |
| "grad_norm": 0.7154229879379272, | |
| "learning_rate": 2.0777833333333336e-05, | |
| "loss": 1.1795, | |
| "step": 526000 | |
| }, | |
| { | |
| "epoch": 2.0307267844000108, | |
| "grad_norm": 0.7037490010261536, | |
| "learning_rate": 2.072227777777778e-05, | |
| "loss": 1.177, | |
| "step": 527000 | |
| }, | |
| { | |
| "epoch": 2.034580155907411, | |
| "grad_norm": 0.6936154365539551, | |
| "learning_rate": 2.0666722222222224e-05, | |
| "loss": 1.1782, | |
| "step": 528000 | |
| }, | |
| { | |
| "epoch": 2.0384335274148118, | |
| "grad_norm": 0.6885814070701599, | |
| "learning_rate": 2.0611166666666667e-05, | |
| "loss": 1.1787, | |
| "step": 529000 | |
| }, | |
| { | |
| "epoch": 2.042286898922212, | |
| "grad_norm": 0.7303010821342468, | |
| "learning_rate": 2.0555611111111112e-05, | |
| "loss": 1.1779, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 2.0461402704296123, | |
| "grad_norm": 0.7179313898086548, | |
| "learning_rate": 2.0500055555555555e-05, | |
| "loss": 1.1774, | |
| "step": 531000 | |
| }, | |
| { | |
| "epoch": 2.0499936419370126, | |
| "grad_norm": 0.70022052526474, | |
| "learning_rate": 2.04445e-05, | |
| "loss": 1.1781, | |
| "step": 532000 | |
| }, | |
| { | |
| "epoch": 2.0538470134444133, | |
| "grad_norm": 0.6912038326263428, | |
| "learning_rate": 2.0388944444444446e-05, | |
| "loss": 1.1772, | |
| "step": 533000 | |
| }, | |
| { | |
| "epoch": 2.0577003849518136, | |
| "grad_norm": 0.6987645030021667, | |
| "learning_rate": 2.0333388888888892e-05, | |
| "loss": 1.1774, | |
| "step": 534000 | |
| }, | |
| { | |
| "epoch": 2.061553756459214, | |
| "grad_norm": 0.7104570269584656, | |
| "learning_rate": 2.0277833333333334e-05, | |
| "loss": 1.1772, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 2.065407127966614, | |
| "grad_norm": 0.6777941584587097, | |
| "learning_rate": 2.022227777777778e-05, | |
| "loss": 1.1771, | |
| "step": 536000 | |
| }, | |
| { | |
| "epoch": 2.069260499474015, | |
| "grad_norm": 0.7318848967552185, | |
| "learning_rate": 2.0166722222222222e-05, | |
| "loss": 1.1767, | |
| "step": 537000 | |
| }, | |
| { | |
| "epoch": 2.073113870981415, | |
| "grad_norm": 0.6985452175140381, | |
| "learning_rate": 2.0111166666666668e-05, | |
| "loss": 1.1777, | |
| "step": 538000 | |
| }, | |
| { | |
| "epoch": 2.0769672424888155, | |
| "grad_norm": 0.6963249444961548, | |
| "learning_rate": 2.005561111111111e-05, | |
| "loss": 1.1771, | |
| "step": 539000 | |
| }, | |
| { | |
| "epoch": 2.0808206139962158, | |
| "grad_norm": 0.7212308049201965, | |
| "learning_rate": 2.0000055555555556e-05, | |
| "loss": 1.1763, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 2.0846739855036165, | |
| "grad_norm": 0.7275625467300415, | |
| "learning_rate": 1.9944500000000002e-05, | |
| "loss": 1.1766, | |
| "step": 541000 | |
| }, | |
| { | |
| "epoch": 2.0885273570110168, | |
| "grad_norm": 0.6992737054824829, | |
| "learning_rate": 1.9888944444444448e-05, | |
| "loss": 1.1768, | |
| "step": 542000 | |
| }, | |
| { | |
| "epoch": 2.092380728518417, | |
| "grad_norm": 0.736659049987793, | |
| "learning_rate": 1.983338888888889e-05, | |
| "loss": 1.1757, | |
| "step": 543000 | |
| }, | |
| { | |
| "epoch": 2.096234100025818, | |
| "grad_norm": 0.6867293119430542, | |
| "learning_rate": 1.9777833333333336e-05, | |
| "loss": 1.1771, | |
| "step": 544000 | |
| }, | |
| { | |
| "epoch": 2.100087471533218, | |
| "grad_norm": 0.7108346819877625, | |
| "learning_rate": 1.9722277777777778e-05, | |
| "loss": 1.1763, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 2.1039408430406183, | |
| "grad_norm": 0.7174147367477417, | |
| "learning_rate": 1.9666722222222224e-05, | |
| "loss": 1.1741, | |
| "step": 546000 | |
| }, | |
| { | |
| "epoch": 2.1077942145480186, | |
| "grad_norm": 0.6843900680541992, | |
| "learning_rate": 1.9611166666666666e-05, | |
| "loss": 1.1761, | |
| "step": 547000 | |
| }, | |
| { | |
| "epoch": 2.1116475860554194, | |
| "grad_norm": 0.7024357318878174, | |
| "learning_rate": 1.9555611111111112e-05, | |
| "loss": 1.1747, | |
| "step": 548000 | |
| }, | |
| { | |
| "epoch": 2.1155009575628196, | |
| "grad_norm": 0.7079586386680603, | |
| "learning_rate": 1.9500055555555558e-05, | |
| "loss": 1.1756, | |
| "step": 549000 | |
| }, | |
| { | |
| "epoch": 2.11935432907022, | |
| "grad_norm": 0.7204769253730774, | |
| "learning_rate": 1.94445e-05, | |
| "loss": 1.1751, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 2.12320770057762, | |
| "grad_norm": 0.6855219006538391, | |
| "learning_rate": 1.9388944444444446e-05, | |
| "loss": 1.1743, | |
| "step": 551000 | |
| }, | |
| { | |
| "epoch": 2.127061072085021, | |
| "grad_norm": 0.7142133712768555, | |
| "learning_rate": 1.933338888888889e-05, | |
| "loss": 1.1749, | |
| "step": 552000 | |
| }, | |
| { | |
| "epoch": 2.130914443592421, | |
| "grad_norm": 0.716454803943634, | |
| "learning_rate": 1.9277833333333334e-05, | |
| "loss": 1.1742, | |
| "step": 553000 | |
| }, | |
| { | |
| "epoch": 2.1347678150998215, | |
| "grad_norm": 0.734761118888855, | |
| "learning_rate": 1.922227777777778e-05, | |
| "loss": 1.175, | |
| "step": 554000 | |
| }, | |
| { | |
| "epoch": 2.138621186607222, | |
| "grad_norm": 0.7150977849960327, | |
| "learning_rate": 1.9166722222222222e-05, | |
| "loss": 1.1741, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 2.1424745581146225, | |
| "grad_norm": 0.7086408734321594, | |
| "learning_rate": 1.9111166666666668e-05, | |
| "loss": 1.1733, | |
| "step": 556000 | |
| }, | |
| { | |
| "epoch": 2.146327929622023, | |
| "grad_norm": 0.689511775970459, | |
| "learning_rate": 1.9055611111111113e-05, | |
| "loss": 1.1736, | |
| "step": 557000 | |
| }, | |
| { | |
| "epoch": 2.150181301129423, | |
| "grad_norm": 0.6908608078956604, | |
| "learning_rate": 1.9000055555555556e-05, | |
| "loss": 1.1734, | |
| "step": 558000 | |
| }, | |
| { | |
| "epoch": 2.154034672636824, | |
| "grad_norm": 0.7414750456809998, | |
| "learning_rate": 1.89445e-05, | |
| "loss": 1.1735, | |
| "step": 559000 | |
| }, | |
| { | |
| "epoch": 2.157888044144224, | |
| "grad_norm": 0.6861109137535095, | |
| "learning_rate": 1.8888944444444444e-05, | |
| "loss": 1.1739, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 2.1617414156516244, | |
| "grad_norm": 0.6974884271621704, | |
| "learning_rate": 1.883338888888889e-05, | |
| "loss": 1.1732, | |
| "step": 561000 | |
| }, | |
| { | |
| "epoch": 2.1655947871590246, | |
| "grad_norm": 0.7133603692054749, | |
| "learning_rate": 1.8777833333333332e-05, | |
| "loss": 1.1729, | |
| "step": 562000 | |
| }, | |
| { | |
| "epoch": 2.1694481586664254, | |
| "grad_norm": 0.7137395143508911, | |
| "learning_rate": 1.8722277777777777e-05, | |
| "loss": 1.1733, | |
| "step": 563000 | |
| }, | |
| { | |
| "epoch": 2.1733015301738257, | |
| "grad_norm": 0.724099338054657, | |
| "learning_rate": 1.8666722222222223e-05, | |
| "loss": 1.1725, | |
| "step": 564000 | |
| }, | |
| { | |
| "epoch": 2.177154901681226, | |
| "grad_norm": 0.7194118499755859, | |
| "learning_rate": 1.861116666666667e-05, | |
| "loss": 1.1725, | |
| "step": 565000 | |
| }, | |
| { | |
| "epoch": 2.1810082731886262, | |
| "grad_norm": 0.7033548355102539, | |
| "learning_rate": 1.855561111111111e-05, | |
| "loss": 1.172, | |
| "step": 566000 | |
| }, | |
| { | |
| "epoch": 2.184861644696027, | |
| "grad_norm": 0.6971196532249451, | |
| "learning_rate": 1.8500055555555557e-05, | |
| "loss": 1.1714, | |
| "step": 567000 | |
| }, | |
| { | |
| "epoch": 2.1887150162034272, | |
| "grad_norm": 0.7256486415863037, | |
| "learning_rate": 1.84445e-05, | |
| "loss": 1.1723, | |
| "step": 568000 | |
| }, | |
| { | |
| "epoch": 2.1925683877108275, | |
| "grad_norm": 0.6980853080749512, | |
| "learning_rate": 1.8388944444444445e-05, | |
| "loss": 1.1714, | |
| "step": 569000 | |
| }, | |
| { | |
| "epoch": 2.196421759218228, | |
| "grad_norm": 0.7225170731544495, | |
| "learning_rate": 1.8333388888888887e-05, | |
| "loss": 1.1721, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 2.2002751307256285, | |
| "grad_norm": 0.7157464623451233, | |
| "learning_rate": 1.8277833333333333e-05, | |
| "loss": 1.1718, | |
| "step": 571000 | |
| }, | |
| { | |
| "epoch": 2.204128502233029, | |
| "grad_norm": 0.6965727806091309, | |
| "learning_rate": 1.822227777777778e-05, | |
| "loss": 1.1719, | |
| "step": 572000 | |
| }, | |
| { | |
| "epoch": 2.207981873740429, | |
| "grad_norm": 0.6944066882133484, | |
| "learning_rate": 1.8166722222222225e-05, | |
| "loss": 1.1716, | |
| "step": 573000 | |
| }, | |
| { | |
| "epoch": 2.21183524524783, | |
| "grad_norm": 0.709082841873169, | |
| "learning_rate": 1.8111166666666667e-05, | |
| "loss": 1.1719, | |
| "step": 574000 | |
| }, | |
| { | |
| "epoch": 2.21568861675523, | |
| "grad_norm": 0.7087362408638, | |
| "learning_rate": 1.8055611111111113e-05, | |
| "loss": 1.1707, | |
| "step": 575000 | |
| }, | |
| { | |
| "epoch": 2.2195419882626304, | |
| "grad_norm": 0.7397356033325195, | |
| "learning_rate": 1.8000055555555555e-05, | |
| "loss": 1.1713, | |
| "step": 576000 | |
| }, | |
| { | |
| "epoch": 2.2233953597700307, | |
| "grad_norm": 0.7147518396377563, | |
| "learning_rate": 1.79445e-05, | |
| "loss": 1.1705, | |
| "step": 577000 | |
| }, | |
| { | |
| "epoch": 2.2272487312774314, | |
| "grad_norm": 0.7075289487838745, | |
| "learning_rate": 1.7888944444444443e-05, | |
| "loss": 1.1707, | |
| "step": 578000 | |
| }, | |
| { | |
| "epoch": 2.2311021027848317, | |
| "grad_norm": 0.7047394514083862, | |
| "learning_rate": 1.783338888888889e-05, | |
| "loss": 1.1696, | |
| "step": 579000 | |
| }, | |
| { | |
| "epoch": 2.234955474292232, | |
| "grad_norm": 0.7000110149383545, | |
| "learning_rate": 1.7777833333333335e-05, | |
| "loss": 1.1711, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 2.2388088457996322, | |
| "grad_norm": 0.7090974450111389, | |
| "learning_rate": 1.772227777777778e-05, | |
| "loss": 1.1701, | |
| "step": 581000 | |
| }, | |
| { | |
| "epoch": 2.242662217307033, | |
| "grad_norm": 0.7133679389953613, | |
| "learning_rate": 1.7666722222222223e-05, | |
| "loss": 1.1701, | |
| "step": 582000 | |
| }, | |
| { | |
| "epoch": 2.2465155888144333, | |
| "grad_norm": 0.69938063621521, | |
| "learning_rate": 1.761116666666667e-05, | |
| "loss": 1.1692, | |
| "step": 583000 | |
| }, | |
| { | |
| "epoch": 2.2503689603218335, | |
| "grad_norm": 0.7067489624023438, | |
| "learning_rate": 1.755561111111111e-05, | |
| "loss": 1.1703, | |
| "step": 584000 | |
| }, | |
| { | |
| "epoch": 2.254222331829234, | |
| "grad_norm": 0.7081299424171448, | |
| "learning_rate": 1.7500055555555556e-05, | |
| "loss": 1.1689, | |
| "step": 585000 | |
| }, | |
| { | |
| "epoch": 2.2580757033366345, | |
| "grad_norm": 0.71224445104599, | |
| "learning_rate": 1.74445e-05, | |
| "loss": 1.169, | |
| "step": 586000 | |
| }, | |
| { | |
| "epoch": 2.261929074844035, | |
| "grad_norm": 0.7073362469673157, | |
| "learning_rate": 1.7388944444444448e-05, | |
| "loss": 1.1691, | |
| "step": 587000 | |
| }, | |
| { | |
| "epoch": 2.265782446351435, | |
| "grad_norm": 0.7043183445930481, | |
| "learning_rate": 1.733338888888889e-05, | |
| "loss": 1.1703, | |
| "step": 588000 | |
| }, | |
| { | |
| "epoch": 2.269635817858836, | |
| "grad_norm": 0.6992381811141968, | |
| "learning_rate": 1.7277833333333336e-05, | |
| "loss": 1.1689, | |
| "step": 589000 | |
| }, | |
| { | |
| "epoch": 2.273489189366236, | |
| "grad_norm": 0.6977965235710144, | |
| "learning_rate": 1.7222277777777778e-05, | |
| "loss": 1.1687, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 2.2773425608736364, | |
| "grad_norm": 0.7121700644493103, | |
| "learning_rate": 1.7166722222222224e-05, | |
| "loss": 1.168, | |
| "step": 591000 | |
| }, | |
| { | |
| "epoch": 2.2811959323810367, | |
| "grad_norm": 0.7015835642814636, | |
| "learning_rate": 1.7111166666666666e-05, | |
| "loss": 1.1687, | |
| "step": 592000 | |
| }, | |
| { | |
| "epoch": 2.285049303888437, | |
| "grad_norm": 0.6966621279716492, | |
| "learning_rate": 1.7055611111111112e-05, | |
| "loss": 1.1675, | |
| "step": 593000 | |
| }, | |
| { | |
| "epoch": 2.2889026753958377, | |
| "grad_norm": 0.7022546529769897, | |
| "learning_rate": 1.7000055555555554e-05, | |
| "loss": 1.1685, | |
| "step": 594000 | |
| }, | |
| { | |
| "epoch": 2.292756046903238, | |
| "grad_norm": 0.7172012329101562, | |
| "learning_rate": 1.6944500000000004e-05, | |
| "loss": 1.1675, | |
| "step": 595000 | |
| }, | |
| { | |
| "epoch": 2.2966094184106383, | |
| "grad_norm": 0.7011358141899109, | |
| "learning_rate": 1.6888944444444446e-05, | |
| "loss": 1.1673, | |
| "step": 596000 | |
| }, | |
| { | |
| "epoch": 2.300462789918039, | |
| "grad_norm": 0.684775710105896, | |
| "learning_rate": 1.683338888888889e-05, | |
| "loss": 1.1682, | |
| "step": 597000 | |
| }, | |
| { | |
| "epoch": 2.3043161614254393, | |
| "grad_norm": 0.7131794095039368, | |
| "learning_rate": 1.6777833333333334e-05, | |
| "loss": 1.1677, | |
| "step": 598000 | |
| }, | |
| { | |
| "epoch": 2.3081695329328396, | |
| "grad_norm": 0.7285271883010864, | |
| "learning_rate": 1.672227777777778e-05, | |
| "loss": 1.1672, | |
| "step": 599000 | |
| }, | |
| { | |
| "epoch": 2.31202290444024, | |
| "grad_norm": 0.7372791171073914, | |
| "learning_rate": 1.6666722222222222e-05, | |
| "loss": 1.1671, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 2.3158762759476406, | |
| "grad_norm": 0.7125265002250671, | |
| "learning_rate": 1.6611166666666668e-05, | |
| "loss": 1.1678, | |
| "step": 601000 | |
| }, | |
| { | |
| "epoch": 2.319729647455041, | |
| "grad_norm": 0.7177942395210266, | |
| "learning_rate": 1.655561111111111e-05, | |
| "loss": 1.1663, | |
| "step": 602000 | |
| }, | |
| { | |
| "epoch": 2.323583018962441, | |
| "grad_norm": 0.7199053168296814, | |
| "learning_rate": 1.6500055555555556e-05, | |
| "loss": 1.1658, | |
| "step": 603000 | |
| }, | |
| { | |
| "epoch": 2.3274363904698414, | |
| "grad_norm": 0.7532017827033997, | |
| "learning_rate": 1.64445e-05, | |
| "loss": 1.1664, | |
| "step": 604000 | |
| }, | |
| { | |
| "epoch": 2.331289761977242, | |
| "grad_norm": 0.7073883414268494, | |
| "learning_rate": 1.6388944444444447e-05, | |
| "loss": 1.1658, | |
| "step": 605000 | |
| }, | |
| { | |
| "epoch": 2.3351431334846424, | |
| "grad_norm": 0.748778223991394, | |
| "learning_rate": 1.633338888888889e-05, | |
| "loss": 1.1666, | |
| "step": 606000 | |
| }, | |
| { | |
| "epoch": 2.3389965049920427, | |
| "grad_norm": 0.722709596157074, | |
| "learning_rate": 1.6277833333333335e-05, | |
| "loss": 1.1663, | |
| "step": 607000 | |
| }, | |
| { | |
| "epoch": 2.342849876499443, | |
| "grad_norm": 0.7432075142860413, | |
| "learning_rate": 1.6222277777777778e-05, | |
| "loss": 1.1664, | |
| "step": 608000 | |
| }, | |
| { | |
| "epoch": 2.3467032480068437, | |
| "grad_norm": 0.7249975204467773, | |
| "learning_rate": 1.6166722222222223e-05, | |
| "loss": 1.1662, | |
| "step": 609000 | |
| }, | |
| { | |
| "epoch": 2.350556619514244, | |
| "grad_norm": 0.7158792614936829, | |
| "learning_rate": 1.6111166666666666e-05, | |
| "loss": 1.1652, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 2.3544099910216443, | |
| "grad_norm": 0.703758180141449, | |
| "learning_rate": 1.605561111111111e-05, | |
| "loss": 1.1647, | |
| "step": 611000 | |
| }, | |
| { | |
| "epoch": 2.358263362529045, | |
| "grad_norm": 0.7096312046051025, | |
| "learning_rate": 1.6000055555555557e-05, | |
| "loss": 1.1649, | |
| "step": 612000 | |
| }, | |
| { | |
| "epoch": 2.3621167340364453, | |
| "grad_norm": 0.7235798239707947, | |
| "learning_rate": 1.59445e-05, | |
| "loss": 1.166, | |
| "step": 613000 | |
| }, | |
| { | |
| "epoch": 2.3659701055438456, | |
| "grad_norm": 0.7283411026000977, | |
| "learning_rate": 1.5888944444444445e-05, | |
| "loss": 1.1656, | |
| "step": 614000 | |
| }, | |
| { | |
| "epoch": 2.369823477051246, | |
| "grad_norm": 0.6973391175270081, | |
| "learning_rate": 1.5833388888888888e-05, | |
| "loss": 1.1662, | |
| "step": 615000 | |
| }, | |
| { | |
| "epoch": 2.3736768485586466, | |
| "grad_norm": 0.6956027150154114, | |
| "learning_rate": 1.5777833333333333e-05, | |
| "loss": 1.1642, | |
| "step": 616000 | |
| }, | |
| { | |
| "epoch": 2.377530220066047, | |
| "grad_norm": 0.7057496309280396, | |
| "learning_rate": 1.572227777777778e-05, | |
| "loss": 1.165, | |
| "step": 617000 | |
| }, | |
| { | |
| "epoch": 2.381383591573447, | |
| "grad_norm": 0.7290093302726746, | |
| "learning_rate": 1.5666722222222225e-05, | |
| "loss": 1.1643, | |
| "step": 618000 | |
| }, | |
| { | |
| "epoch": 2.3852369630808474, | |
| "grad_norm": 0.7349111437797546, | |
| "learning_rate": 1.5611166666666667e-05, | |
| "loss": 1.1639, | |
| "step": 619000 | |
| }, | |
| { | |
| "epoch": 2.389090334588248, | |
| "grad_norm": 0.6929183006286621, | |
| "learning_rate": 1.5555611111111113e-05, | |
| "loss": 1.1647, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 2.3929437060956484, | |
| "grad_norm": 0.7300617694854736, | |
| "learning_rate": 1.5500055555555555e-05, | |
| "loss": 1.1643, | |
| "step": 621000 | |
| }, | |
| { | |
| "epoch": 2.3967970776030487, | |
| "grad_norm": 0.7160629034042358, | |
| "learning_rate": 1.54445e-05, | |
| "loss": 1.1639, | |
| "step": 622000 | |
| }, | |
| { | |
| "epoch": 2.400650449110449, | |
| "grad_norm": 0.7036823034286499, | |
| "learning_rate": 1.5388944444444443e-05, | |
| "loss": 1.1641, | |
| "step": 623000 | |
| }, | |
| { | |
| "epoch": 2.4045038206178497, | |
| "grad_norm": 0.7350702285766602, | |
| "learning_rate": 1.533338888888889e-05, | |
| "loss": 1.1636, | |
| "step": 624000 | |
| }, | |
| { | |
| "epoch": 2.40835719212525, | |
| "grad_norm": 0.7381563186645508, | |
| "learning_rate": 1.527783333333333e-05, | |
| "loss": 1.1649, | |
| "step": 625000 | |
| }, | |
| { | |
| "epoch": 2.4122105636326503, | |
| "grad_norm": 0.7308299541473389, | |
| "learning_rate": 1.522227777777778e-05, | |
| "loss": 1.1638, | |
| "step": 626000 | |
| }, | |
| { | |
| "epoch": 2.416063935140051, | |
| "grad_norm": 0.7187788486480713, | |
| "learning_rate": 1.5166722222222225e-05, | |
| "loss": 1.1637, | |
| "step": 627000 | |
| }, | |
| { | |
| "epoch": 2.4199173066474513, | |
| "grad_norm": 0.7224980592727661, | |
| "learning_rate": 1.5111166666666669e-05, | |
| "loss": 1.1623, | |
| "step": 628000 | |
| }, | |
| { | |
| "epoch": 2.4237706781548516, | |
| "grad_norm": 0.7195943593978882, | |
| "learning_rate": 1.5055611111111113e-05, | |
| "loss": 1.1635, | |
| "step": 629000 | |
| }, | |
| { | |
| "epoch": 2.427624049662252, | |
| "grad_norm": 0.7227275371551514, | |
| "learning_rate": 1.5000055555555557e-05, | |
| "loss": 1.1625, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 2.431477421169652, | |
| "grad_norm": 0.7128928899765015, | |
| "learning_rate": 1.49445e-05, | |
| "loss": 1.1627, | |
| "step": 631000 | |
| }, | |
| { | |
| "epoch": 2.435330792677053, | |
| "grad_norm": 0.6905311346054077, | |
| "learning_rate": 1.4888944444444445e-05, | |
| "loss": 1.1627, | |
| "step": 632000 | |
| }, | |
| { | |
| "epoch": 2.439184164184453, | |
| "grad_norm": 0.7287958860397339, | |
| "learning_rate": 1.4833388888888889e-05, | |
| "loss": 1.1635, | |
| "step": 633000 | |
| }, | |
| { | |
| "epoch": 2.4430375356918534, | |
| "grad_norm": 0.7288080453872681, | |
| "learning_rate": 1.4777833333333334e-05, | |
| "loss": 1.1615, | |
| "step": 634000 | |
| }, | |
| { | |
| "epoch": 2.446890907199254, | |
| "grad_norm": 0.7119095921516418, | |
| "learning_rate": 1.4722277777777778e-05, | |
| "loss": 1.1626, | |
| "step": 635000 | |
| }, | |
| { | |
| "epoch": 2.4507442787066545, | |
| "grad_norm": 0.7402783036231995, | |
| "learning_rate": 1.4666722222222223e-05, | |
| "loss": 1.1625, | |
| "step": 636000 | |
| }, | |
| { | |
| "epoch": 2.4545976502140547, | |
| "grad_norm": 0.7251101732254028, | |
| "learning_rate": 1.4611166666666668e-05, | |
| "loss": 1.162, | |
| "step": 637000 | |
| }, | |
| { | |
| "epoch": 2.458451021721455, | |
| "grad_norm": 0.7154144644737244, | |
| "learning_rate": 1.4555611111111112e-05, | |
| "loss": 1.1629, | |
| "step": 638000 | |
| }, | |
| { | |
| "epoch": 2.4623043932288557, | |
| "grad_norm": 0.7547957301139832, | |
| "learning_rate": 1.4500055555555556e-05, | |
| "loss": 1.1618, | |
| "step": 639000 | |
| }, | |
| { | |
| "epoch": 2.466157764736256, | |
| "grad_norm": 0.7051396369934082, | |
| "learning_rate": 1.44445e-05, | |
| "loss": 1.1625, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 2.4700111362436563, | |
| "grad_norm": 0.7302169799804688, | |
| "learning_rate": 1.4388944444444444e-05, | |
| "loss": 1.1619, | |
| "step": 641000 | |
| }, | |
| { | |
| "epoch": 2.4738645077510566, | |
| "grad_norm": 0.7116556167602539, | |
| "learning_rate": 1.433338888888889e-05, | |
| "loss": 1.1615, | |
| "step": 642000 | |
| }, | |
| { | |
| "epoch": 2.4777178792584573, | |
| "grad_norm": 0.6997031569480896, | |
| "learning_rate": 1.4277833333333334e-05, | |
| "loss": 1.1609, | |
| "step": 643000 | |
| }, | |
| { | |
| "epoch": 2.4815712507658576, | |
| "grad_norm": 0.7330045700073242, | |
| "learning_rate": 1.4222277777777778e-05, | |
| "loss": 1.1608, | |
| "step": 644000 | |
| }, | |
| { | |
| "epoch": 2.485424622273258, | |
| "grad_norm": 0.7158441543579102, | |
| "learning_rate": 1.4166722222222222e-05, | |
| "loss": 1.1612, | |
| "step": 645000 | |
| }, | |
| { | |
| "epoch": 2.489277993780658, | |
| "grad_norm": 0.7308253049850464, | |
| "learning_rate": 1.4111166666666666e-05, | |
| "loss": 1.1608, | |
| "step": 646000 | |
| }, | |
| { | |
| "epoch": 2.493131365288059, | |
| "grad_norm": 0.746481716632843, | |
| "learning_rate": 1.405561111111111e-05, | |
| "loss": 1.1614, | |
| "step": 647000 | |
| }, | |
| { | |
| "epoch": 2.496984736795459, | |
| "grad_norm": 0.740993320941925, | |
| "learning_rate": 1.4000055555555554e-05, | |
| "loss": 1.1613, | |
| "step": 648000 | |
| }, | |
| { | |
| "epoch": 2.5008381083028595, | |
| "grad_norm": 0.7341724038124084, | |
| "learning_rate": 1.3944500000000002e-05, | |
| "loss": 1.1603, | |
| "step": 649000 | |
| }, | |
| { | |
| "epoch": 2.50469147981026, | |
| "grad_norm": 0.7422592639923096, | |
| "learning_rate": 1.3888944444444446e-05, | |
| "loss": 1.1598, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 2.5085448513176605, | |
| "grad_norm": 0.7050088047981262, | |
| "learning_rate": 1.383338888888889e-05, | |
| "loss": 1.1599, | |
| "step": 651000 | |
| }, | |
| { | |
| "epoch": 2.5123982228250608, | |
| "grad_norm": 0.7259443998336792, | |
| "learning_rate": 1.3777833333333334e-05, | |
| "loss": 1.1595, | |
| "step": 652000 | |
| }, | |
| { | |
| "epoch": 2.516251594332461, | |
| "grad_norm": 0.7026506066322327, | |
| "learning_rate": 1.3722277777777778e-05, | |
| "loss": 1.16, | |
| "step": 653000 | |
| }, | |
| { | |
| "epoch": 2.5201049658398613, | |
| "grad_norm": 0.7095285058021545, | |
| "learning_rate": 1.3666722222222222e-05, | |
| "loss": 1.1597, | |
| "step": 654000 | |
| }, | |
| { | |
| "epoch": 2.523958337347262, | |
| "grad_norm": 0.7308704853057861, | |
| "learning_rate": 1.3611166666666666e-05, | |
| "loss": 1.1602, | |
| "step": 655000 | |
| }, | |
| { | |
| "epoch": 2.5278117088546623, | |
| "grad_norm": 0.7496655583381653, | |
| "learning_rate": 1.355561111111111e-05, | |
| "loss": 1.1587, | |
| "step": 656000 | |
| }, | |
| { | |
| "epoch": 2.531665080362063, | |
| "grad_norm": 0.7287865877151489, | |
| "learning_rate": 1.3500055555555557e-05, | |
| "loss": 1.1595, | |
| "step": 657000 | |
| }, | |
| { | |
| "epoch": 2.5355184518694633, | |
| "grad_norm": 0.740280032157898, | |
| "learning_rate": 1.3444500000000001e-05, | |
| "loss": 1.1586, | |
| "step": 658000 | |
| }, | |
| { | |
| "epoch": 2.5393718233768636, | |
| "grad_norm": 0.7381448149681091, | |
| "learning_rate": 1.3388944444444446e-05, | |
| "loss": 1.1599, | |
| "step": 659000 | |
| }, | |
| { | |
| "epoch": 2.543225194884264, | |
| "grad_norm": 0.7290709614753723, | |
| "learning_rate": 1.333338888888889e-05, | |
| "loss": 1.1589, | |
| "step": 660000 | |
| }, | |
| { | |
| "epoch": 2.547078566391664, | |
| "grad_norm": 0.7309290766716003, | |
| "learning_rate": 1.3277833333333334e-05, | |
| "loss": 1.1583, | |
| "step": 661000 | |
| }, | |
| { | |
| "epoch": 2.550931937899065, | |
| "grad_norm": 0.7135186195373535, | |
| "learning_rate": 1.3222277777777778e-05, | |
| "loss": 1.1586, | |
| "step": 662000 | |
| }, | |
| { | |
| "epoch": 2.554785309406465, | |
| "grad_norm": 0.7129361033439636, | |
| "learning_rate": 1.3166722222222222e-05, | |
| "loss": 1.1583, | |
| "step": 663000 | |
| }, | |
| { | |
| "epoch": 2.5586386809138655, | |
| "grad_norm": 0.7506452798843384, | |
| "learning_rate": 1.3111166666666666e-05, | |
| "loss": 1.159, | |
| "step": 664000 | |
| }, | |
| { | |
| "epoch": 2.562492052421266, | |
| "grad_norm": 0.7159491181373596, | |
| "learning_rate": 1.3055611111111113e-05, | |
| "loss": 1.158, | |
| "step": 665000 | |
| }, | |
| { | |
| "epoch": 2.5663454239286665, | |
| "grad_norm": 0.7607939839363098, | |
| "learning_rate": 1.3000055555555557e-05, | |
| "loss": 1.1584, | |
| "step": 666000 | |
| }, | |
| { | |
| "epoch": 2.5701987954360668, | |
| "grad_norm": 0.737775444984436, | |
| "learning_rate": 1.2944500000000001e-05, | |
| "loss": 1.1576, | |
| "step": 667000 | |
| }, | |
| { | |
| "epoch": 2.574052166943467, | |
| "grad_norm": 0.7362163662910461, | |
| "learning_rate": 1.2888944444444445e-05, | |
| "loss": 1.158, | |
| "step": 668000 | |
| }, | |
| { | |
| "epoch": 2.5779055384508673, | |
| "grad_norm": 0.7213948369026184, | |
| "learning_rate": 1.283338888888889e-05, | |
| "loss": 1.1582, | |
| "step": 669000 | |
| }, | |
| { | |
| "epoch": 2.581758909958268, | |
| "grad_norm": 0.7307848334312439, | |
| "learning_rate": 1.2777833333333333e-05, | |
| "loss": 1.158, | |
| "step": 670000 | |
| }, | |
| { | |
| "epoch": 2.5856122814656683, | |
| "grad_norm": 0.7397758960723877, | |
| "learning_rate": 1.2722277777777777e-05, | |
| "loss": 1.1583, | |
| "step": 671000 | |
| }, | |
| { | |
| "epoch": 2.5894656529730686, | |
| "grad_norm": 0.730469822883606, | |
| "learning_rate": 1.2666722222222221e-05, | |
| "loss": 1.1574, | |
| "step": 672000 | |
| }, | |
| { | |
| "epoch": 2.5933190244804694, | |
| "grad_norm": 0.7288331985473633, | |
| "learning_rate": 1.2611166666666669e-05, | |
| "loss": 1.1568, | |
| "step": 673000 | |
| }, | |
| { | |
| "epoch": 2.5971723959878696, | |
| "grad_norm": 0.7394465208053589, | |
| "learning_rate": 1.2555611111111113e-05, | |
| "loss": 1.1573, | |
| "step": 674000 | |
| }, | |
| { | |
| "epoch": 2.60102576749527, | |
| "grad_norm": 0.7209343314170837, | |
| "learning_rate": 1.2500055555555557e-05, | |
| "loss": 1.1566, | |
| "step": 675000 | |
| }, | |
| { | |
| "epoch": 2.60487913900267, | |
| "grad_norm": 0.7466188669204712, | |
| "learning_rate": 1.2444500000000001e-05, | |
| "loss": 1.1567, | |
| "step": 676000 | |
| }, | |
| { | |
| "epoch": 2.608732510510071, | |
| "grad_norm": 0.7297884821891785, | |
| "learning_rate": 1.2388944444444445e-05, | |
| "loss": 1.1562, | |
| "step": 677000 | |
| }, | |
| { | |
| "epoch": 2.612585882017471, | |
| "grad_norm": 0.7409054040908813, | |
| "learning_rate": 1.233338888888889e-05, | |
| "loss": 1.1563, | |
| "step": 678000 | |
| }, | |
| { | |
| "epoch": 2.6164392535248715, | |
| "grad_norm": 0.7495500445365906, | |
| "learning_rate": 1.2277833333333335e-05, | |
| "loss": 1.1568, | |
| "step": 679000 | |
| }, | |
| { | |
| "epoch": 2.6202926250322722, | |
| "grad_norm": 0.7482118606567383, | |
| "learning_rate": 1.2222277777777779e-05, | |
| "loss": 1.1559, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 2.6241459965396725, | |
| "grad_norm": 0.7128192782402039, | |
| "learning_rate": 1.2166722222222223e-05, | |
| "loss": 1.1564, | |
| "step": 681000 | |
| }, | |
| { | |
| "epoch": 2.627999368047073, | |
| "grad_norm": 0.7315691709518433, | |
| "learning_rate": 1.2111166666666668e-05, | |
| "loss": 1.1555, | |
| "step": 682000 | |
| }, | |
| { | |
| "epoch": 2.631852739554473, | |
| "grad_norm": 0.7355465292930603, | |
| "learning_rate": 1.2055611111111113e-05, | |
| "loss": 1.1555, | |
| "step": 683000 | |
| }, | |
| { | |
| "epoch": 2.6357061110618734, | |
| "grad_norm": 0.7146631479263306, | |
| "learning_rate": 1.2000055555555557e-05, | |
| "loss": 1.1553, | |
| "step": 684000 | |
| }, | |
| { | |
| "epoch": 2.639559482569274, | |
| "grad_norm": 0.7412270903587341, | |
| "learning_rate": 1.19445e-05, | |
| "loss": 1.1554, | |
| "step": 685000 | |
| }, | |
| { | |
| "epoch": 2.6434128540766744, | |
| "grad_norm": 0.7790284752845764, | |
| "learning_rate": 1.1888944444444446e-05, | |
| "loss": 1.1557, | |
| "step": 686000 | |
| }, | |
| { | |
| "epoch": 2.6472662255840747, | |
| "grad_norm": 0.730482280254364, | |
| "learning_rate": 1.183338888888889e-05, | |
| "loss": 1.1553, | |
| "step": 687000 | |
| }, | |
| { | |
| "epoch": 2.6511195970914754, | |
| "grad_norm": 0.7502373456954956, | |
| "learning_rate": 1.1777833333333334e-05, | |
| "loss": 1.1556, | |
| "step": 688000 | |
| }, | |
| { | |
| "epoch": 2.6549729685988757, | |
| "grad_norm": 0.7493919730186462, | |
| "learning_rate": 1.1722277777777778e-05, | |
| "loss": 1.1549, | |
| "step": 689000 | |
| }, | |
| { | |
| "epoch": 2.658826340106276, | |
| "grad_norm": 0.7542137503623962, | |
| "learning_rate": 1.1666722222222224e-05, | |
| "loss": 1.1555, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 2.6626797116136762, | |
| "grad_norm": 0.7587451338768005, | |
| "learning_rate": 1.1611166666666668e-05, | |
| "loss": 1.1552, | |
| "step": 691000 | |
| }, | |
| { | |
| "epoch": 2.666533083121077, | |
| "grad_norm": 0.7310791611671448, | |
| "learning_rate": 1.1555611111111112e-05, | |
| "loss": 1.1546, | |
| "step": 692000 | |
| }, | |
| { | |
| "epoch": 2.6703864546284772, | |
| "grad_norm": 0.7424149513244629, | |
| "learning_rate": 1.1500055555555556e-05, | |
| "loss": 1.1546, | |
| "step": 693000 | |
| }, | |
| { | |
| "epoch": 2.6742398261358775, | |
| "grad_norm": 0.7413462996482849, | |
| "learning_rate": 1.14445e-05, | |
| "loss": 1.1544, | |
| "step": 694000 | |
| }, | |
| { | |
| "epoch": 2.6780931976432782, | |
| "grad_norm": 0.76901775598526, | |
| "learning_rate": 1.1388944444444444e-05, | |
| "loss": 1.1544, | |
| "step": 695000 | |
| }, | |
| { | |
| "epoch": 2.6819465691506785, | |
| "grad_norm": 0.736726701259613, | |
| "learning_rate": 1.133338888888889e-05, | |
| "loss": 1.154, | |
| "step": 696000 | |
| }, | |
| { | |
| "epoch": 2.685799940658079, | |
| "grad_norm": 0.7254749536514282, | |
| "learning_rate": 1.1277833333333334e-05, | |
| "loss": 1.1545, | |
| "step": 697000 | |
| }, | |
| { | |
| "epoch": 2.689653312165479, | |
| "grad_norm": 0.7280982136726379, | |
| "learning_rate": 1.1222277777777778e-05, | |
| "loss": 1.1545, | |
| "step": 698000 | |
| }, | |
| { | |
| "epoch": 2.6935066836728794, | |
| "grad_norm": 0.722722053527832, | |
| "learning_rate": 1.1166722222222222e-05, | |
| "loss": 1.1538, | |
| "step": 699000 | |
| }, | |
| { | |
| "epoch": 2.69736005518028, | |
| "grad_norm": 0.7584505081176758, | |
| "learning_rate": 1.1111166666666666e-05, | |
| "loss": 1.1535, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 2.7012134266876804, | |
| "grad_norm": 0.7559113502502441, | |
| "learning_rate": 1.105561111111111e-05, | |
| "loss": 1.1539, | |
| "step": 701000 | |
| }, | |
| { | |
| "epoch": 2.7050667981950807, | |
| "grad_norm": 0.735378086566925, | |
| "learning_rate": 1.1000055555555556e-05, | |
| "loss": 1.1538, | |
| "step": 702000 | |
| }, | |
| { | |
| "epoch": 2.7089201697024814, | |
| "grad_norm": 0.7334083318710327, | |
| "learning_rate": 1.09445e-05, | |
| "loss": 1.1538, | |
| "step": 703000 | |
| }, | |
| { | |
| "epoch": 2.7127735412098817, | |
| "grad_norm": 0.7132292985916138, | |
| "learning_rate": 1.0888944444444444e-05, | |
| "loss": 1.1529, | |
| "step": 704000 | |
| }, | |
| { | |
| "epoch": 2.716626912717282, | |
| "grad_norm": 0.7504042983055115, | |
| "learning_rate": 1.0833388888888888e-05, | |
| "loss": 1.1522, | |
| "step": 705000 | |
| }, | |
| { | |
| "epoch": 2.7204802842246822, | |
| "grad_norm": 0.7536928057670593, | |
| "learning_rate": 1.0777833333333334e-05, | |
| "loss": 1.1532, | |
| "step": 706000 | |
| }, | |
| { | |
| "epoch": 2.7243336557320825, | |
| "grad_norm": 0.7525560259819031, | |
| "learning_rate": 1.0722277777777778e-05, | |
| "loss": 1.1531, | |
| "step": 707000 | |
| }, | |
| { | |
| "epoch": 2.7281870272394833, | |
| "grad_norm": 0.7648515701293945, | |
| "learning_rate": 1.0666722222222222e-05, | |
| "loss": 1.1537, | |
| "step": 708000 | |
| }, | |
| { | |
| "epoch": 2.7320403987468835, | |
| "grad_norm": 0.7191064953804016, | |
| "learning_rate": 1.0611166666666668e-05, | |
| "loss": 1.1532, | |
| "step": 709000 | |
| }, | |
| { | |
| "epoch": 2.7358937702542843, | |
| "grad_norm": 0.7417696714401245, | |
| "learning_rate": 1.0555611111111112e-05, | |
| "loss": 1.1536, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 2.7397471417616845, | |
| "grad_norm": 0.7305838465690613, | |
| "learning_rate": 1.0500055555555556e-05, | |
| "loss": 1.153, | |
| "step": 711000 | |
| }, | |
| { | |
| "epoch": 2.743600513269085, | |
| "grad_norm": 0.7491075992584229, | |
| "learning_rate": 1.04445e-05, | |
| "loss": 1.1526, | |
| "step": 712000 | |
| }, | |
| { | |
| "epoch": 2.747453884776485, | |
| "grad_norm": 0.7386889457702637, | |
| "learning_rate": 1.0388944444444445e-05, | |
| "loss": 1.1523, | |
| "step": 713000 | |
| }, | |
| { | |
| "epoch": 2.7513072562838854, | |
| "grad_norm": 0.7188745737075806, | |
| "learning_rate": 1.033338888888889e-05, | |
| "loss": 1.1524, | |
| "step": 714000 | |
| }, | |
| { | |
| "epoch": 2.755160627791286, | |
| "grad_norm": 0.7587524652481079, | |
| "learning_rate": 1.0277833333333333e-05, | |
| "loss": 1.1523, | |
| "step": 715000 | |
| }, | |
| { | |
| "epoch": 2.7590139992986864, | |
| "grad_norm": 0.74712073802948, | |
| "learning_rate": 1.0222277777777778e-05, | |
| "loss": 1.1523, | |
| "step": 716000 | |
| }, | |
| { | |
| "epoch": 2.7628673708060867, | |
| "grad_norm": 0.700933039188385, | |
| "learning_rate": 1.0166722222222223e-05, | |
| "loss": 1.1517, | |
| "step": 717000 | |
| }, | |
| { | |
| "epoch": 2.7667207423134874, | |
| "grad_norm": 0.7546241879463196, | |
| "learning_rate": 1.0111166666666667e-05, | |
| "loss": 1.1515, | |
| "step": 718000 | |
| }, | |
| { | |
| "epoch": 2.7705741138208877, | |
| "grad_norm": 0.7323986291885376, | |
| "learning_rate": 1.0055611111111111e-05, | |
| "loss": 1.1522, | |
| "step": 719000 | |
| }, | |
| { | |
| "epoch": 2.774427485328288, | |
| "grad_norm": 0.7574964761734009, | |
| "learning_rate": 1.0000055555555555e-05, | |
| "loss": 1.1516, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 2.7782808568356883, | |
| "grad_norm": 0.7652174234390259, | |
| "learning_rate": 9.944500000000001e-06, | |
| "loss": 1.1512, | |
| "step": 721000 | |
| }, | |
| { | |
| "epoch": 2.7821342283430885, | |
| "grad_norm": 0.7431154251098633, | |
| "learning_rate": 9.888944444444445e-06, | |
| "loss": 1.1515, | |
| "step": 722000 | |
| }, | |
| { | |
| "epoch": 2.7859875998504893, | |
| "grad_norm": 0.7359040975570679, | |
| "learning_rate": 9.83338888888889e-06, | |
| "loss": 1.1507, | |
| "step": 723000 | |
| }, | |
| { | |
| "epoch": 2.7898409713578896, | |
| "grad_norm": 0.7209459543228149, | |
| "learning_rate": 9.777833333333333e-06, | |
| "loss": 1.1515, | |
| "step": 724000 | |
| }, | |
| { | |
| "epoch": 2.79369434286529, | |
| "grad_norm": 0.7616684436798096, | |
| "learning_rate": 9.722277777777779e-06, | |
| "loss": 1.1509, | |
| "step": 725000 | |
| }, | |
| { | |
| "epoch": 2.7975477143726906, | |
| "grad_norm": 0.7581999897956848, | |
| "learning_rate": 9.666722222222223e-06, | |
| "loss": 1.1512, | |
| "step": 726000 | |
| }, | |
| { | |
| "epoch": 2.801401085880091, | |
| "grad_norm": 0.7473781704902649, | |
| "learning_rate": 9.611166666666667e-06, | |
| "loss": 1.1516, | |
| "step": 727000 | |
| }, | |
| { | |
| "epoch": 2.805254457387491, | |
| "grad_norm": 0.7421666383743286, | |
| "learning_rate": 9.555611111111111e-06, | |
| "loss": 1.1503, | |
| "step": 728000 | |
| }, | |
| { | |
| "epoch": 2.8091078288948914, | |
| "grad_norm": 0.7370326519012451, | |
| "learning_rate": 9.500055555555557e-06, | |
| "loss": 1.1512, | |
| "step": 729000 | |
| }, | |
| { | |
| "epoch": 2.812961200402292, | |
| "grad_norm": 0.7482893466949463, | |
| "learning_rate": 9.4445e-06, | |
| "loss": 1.1502, | |
| "step": 730000 | |
| }, | |
| { | |
| "epoch": 2.8168145719096924, | |
| "grad_norm": 0.7539274096488953, | |
| "learning_rate": 9.388944444444445e-06, | |
| "loss": 1.1499, | |
| "step": 731000 | |
| }, | |
| { | |
| "epoch": 2.8206679434170927, | |
| "grad_norm": 0.7609211802482605, | |
| "learning_rate": 9.333388888888889e-06, | |
| "loss": 1.1502, | |
| "step": 732000 | |
| }, | |
| { | |
| "epoch": 2.8245213149244934, | |
| "grad_norm": 0.7571685314178467, | |
| "learning_rate": 9.277833333333335e-06, | |
| "loss": 1.1501, | |
| "step": 733000 | |
| }, | |
| { | |
| "epoch": 2.8283746864318937, | |
| "grad_norm": 0.7442651987075806, | |
| "learning_rate": 9.222277777777779e-06, | |
| "loss": 1.1505, | |
| "step": 734000 | |
| }, | |
| { | |
| "epoch": 2.832228057939294, | |
| "grad_norm": 0.7343342304229736, | |
| "learning_rate": 9.166722222222223e-06, | |
| "loss": 1.1502, | |
| "step": 735000 | |
| }, | |
| { | |
| "epoch": 2.8360814294466943, | |
| "grad_norm": 0.7149516940116882, | |
| "learning_rate": 9.111166666666667e-06, | |
| "loss": 1.1498, | |
| "step": 736000 | |
| }, | |
| { | |
| "epoch": 2.8399348009540946, | |
| "grad_norm": 0.7458763718605042, | |
| "learning_rate": 9.055611111111112e-06, | |
| "loss": 1.1494, | |
| "step": 737000 | |
| }, | |
| { | |
| "epoch": 2.8437881724614953, | |
| "grad_norm": 0.7562174797058105, | |
| "learning_rate": 9.000055555555556e-06, | |
| "loss": 1.1497, | |
| "step": 738000 | |
| }, | |
| { | |
| "epoch": 2.8476415439688956, | |
| "grad_norm": 0.7606706619262695, | |
| "learning_rate": 8.9445e-06, | |
| "loss": 1.1494, | |
| "step": 739000 | |
| }, | |
| { | |
| "epoch": 2.851494915476296, | |
| "grad_norm": 0.7609912753105164, | |
| "learning_rate": 8.888944444444445e-06, | |
| "loss": 1.149, | |
| "step": 740000 | |
| }, | |
| { | |
| "epoch": 2.8553482869836966, | |
| "grad_norm": 0.7673037052154541, | |
| "learning_rate": 8.83338888888889e-06, | |
| "loss": 1.1499, | |
| "step": 741000 | |
| }, | |
| { | |
| "epoch": 2.859201658491097, | |
| "grad_norm": 0.7546699643135071, | |
| "learning_rate": 8.777833333333334e-06, | |
| "loss": 1.1491, | |
| "step": 742000 | |
| }, | |
| { | |
| "epoch": 2.863055029998497, | |
| "grad_norm": 0.7696357369422913, | |
| "learning_rate": 8.722277777777778e-06, | |
| "loss": 1.1494, | |
| "step": 743000 | |
| }, | |
| { | |
| "epoch": 2.8669084015058974, | |
| "grad_norm": 0.7652831673622131, | |
| "learning_rate": 8.666722222222224e-06, | |
| "loss": 1.1492, | |
| "step": 744000 | |
| }, | |
| { | |
| "epoch": 2.8707617730132977, | |
| "grad_norm": 0.7520629167556763, | |
| "learning_rate": 8.611166666666668e-06, | |
| "loss": 1.1485, | |
| "step": 745000 | |
| }, | |
| { | |
| "epoch": 2.8746151445206984, | |
| "grad_norm": 0.7434529066085815, | |
| "learning_rate": 8.555611111111112e-06, | |
| "loss": 1.1483, | |
| "step": 746000 | |
| }, | |
| { | |
| "epoch": 2.8784685160280987, | |
| "grad_norm": 0.7456247210502625, | |
| "learning_rate": 8.500055555555556e-06, | |
| "loss": 1.1486, | |
| "step": 747000 | |
| }, | |
| { | |
| "epoch": 2.8823218875354994, | |
| "grad_norm": 0.7586479187011719, | |
| "learning_rate": 8.4445e-06, | |
| "loss": 1.1478, | |
| "step": 748000 | |
| }, | |
| { | |
| "epoch": 2.8861752590428997, | |
| "grad_norm": 0.7417891025543213, | |
| "learning_rate": 8.388944444444446e-06, | |
| "loss": 1.1473, | |
| "step": 749000 | |
| }, | |
| { | |
| "epoch": 2.8900286305503, | |
| "grad_norm": 0.7475762367248535, | |
| "learning_rate": 8.33338888888889e-06, | |
| "loss": 1.1484, | |
| "step": 750000 | |
| }, | |
| { | |
| "epoch": 2.8938820020577003, | |
| "grad_norm": 0.7549064755439758, | |
| "learning_rate": 8.277833333333334e-06, | |
| "loss": 1.1483, | |
| "step": 751000 | |
| }, | |
| { | |
| "epoch": 2.8977353735651006, | |
| "grad_norm": 0.7571744322776794, | |
| "learning_rate": 8.222277777777778e-06, | |
| "loss": 1.148, | |
| "step": 752000 | |
| }, | |
| { | |
| "epoch": 2.9015887450725013, | |
| "grad_norm": 0.7129827737808228, | |
| "learning_rate": 8.166722222222222e-06, | |
| "loss": 1.1486, | |
| "step": 753000 | |
| }, | |
| { | |
| "epoch": 2.9054421165799016, | |
| "grad_norm": 0.7594377994537354, | |
| "learning_rate": 8.111166666666666e-06, | |
| "loss": 1.1478, | |
| "step": 754000 | |
| }, | |
| { | |
| "epoch": 2.909295488087302, | |
| "grad_norm": 0.7634985446929932, | |
| "learning_rate": 8.05561111111111e-06, | |
| "loss": 1.1473, | |
| "step": 755000 | |
| }, | |
| { | |
| "epoch": 2.9131488595947026, | |
| "grad_norm": 0.7703734636306763, | |
| "learning_rate": 8.000055555555556e-06, | |
| "loss": 1.148, | |
| "step": 756000 | |
| }, | |
| { | |
| "epoch": 2.917002231102103, | |
| "grad_norm": 0.7532411813735962, | |
| "learning_rate": 7.9445e-06, | |
| "loss": 1.1475, | |
| "step": 757000 | |
| }, | |
| { | |
| "epoch": 2.920855602609503, | |
| "grad_norm": 0.7445677518844604, | |
| "learning_rate": 7.888944444444444e-06, | |
| "loss": 1.1462, | |
| "step": 758000 | |
| }, | |
| { | |
| "epoch": 2.9247089741169034, | |
| "grad_norm": 0.7641280293464661, | |
| "learning_rate": 7.833388888888888e-06, | |
| "loss": 1.1463, | |
| "step": 759000 | |
| }, | |
| { | |
| "epoch": 2.9285623456243037, | |
| "grad_norm": 0.7298674583435059, | |
| "learning_rate": 7.777833333333334e-06, | |
| "loss": 1.1475, | |
| "step": 760000 | |
| }, | |
| { | |
| "epoch": 2.9324157171317045, | |
| "grad_norm": 0.7470799684524536, | |
| "learning_rate": 7.722277777777778e-06, | |
| "loss": 1.1478, | |
| "step": 761000 | |
| }, | |
| { | |
| "epoch": 2.9362690886391047, | |
| "grad_norm": 0.7401430010795593, | |
| "learning_rate": 7.666722222222222e-06, | |
| "loss": 1.1471, | |
| "step": 762000 | |
| }, | |
| { | |
| "epoch": 2.940122460146505, | |
| "grad_norm": 0.7350935935974121, | |
| "learning_rate": 7.611166666666667e-06, | |
| "loss": 1.1461, | |
| "step": 763000 | |
| }, | |
| { | |
| "epoch": 2.9439758316539058, | |
| "grad_norm": 0.7517477869987488, | |
| "learning_rate": 7.5556111111111115e-06, | |
| "loss": 1.1474, | |
| "step": 764000 | |
| }, | |
| { | |
| "epoch": 2.947829203161306, | |
| "grad_norm": 0.7496416568756104, | |
| "learning_rate": 7.500055555555556e-06, | |
| "loss": 1.1469, | |
| "step": 765000 | |
| }, | |
| { | |
| "epoch": 2.9516825746687063, | |
| "grad_norm": 0.7540420889854431, | |
| "learning_rate": 7.4445000000000005e-06, | |
| "loss": 1.1468, | |
| "step": 766000 | |
| }, | |
| { | |
| "epoch": 2.9555359461761066, | |
| "grad_norm": 0.7611315846443176, | |
| "learning_rate": 7.3889444444444445e-06, | |
| "loss": 1.1471, | |
| "step": 767000 | |
| }, | |
| { | |
| "epoch": 2.9593893176835073, | |
| "grad_norm": 0.7721447944641113, | |
| "learning_rate": 7.333388888888889e-06, | |
| "loss": 1.1462, | |
| "step": 768000 | |
| }, | |
| { | |
| "epoch": 2.9632426891909076, | |
| "grad_norm": 0.7783055305480957, | |
| "learning_rate": 7.277833333333333e-06, | |
| "loss": 1.1464, | |
| "step": 769000 | |
| }, | |
| { | |
| "epoch": 2.967096060698308, | |
| "grad_norm": 0.7445676326751709, | |
| "learning_rate": 7.2222777777777775e-06, | |
| "loss": 1.146, | |
| "step": 770000 | |
| }, | |
| { | |
| "epoch": 2.9709494322057086, | |
| "grad_norm": 0.7596333622932434, | |
| "learning_rate": 7.1667222222222215e-06, | |
| "loss": 1.1466, | |
| "step": 771000 | |
| }, | |
| { | |
| "epoch": 2.974802803713109, | |
| "grad_norm": 0.7583540081977844, | |
| "learning_rate": 7.111166666666667e-06, | |
| "loss": 1.1466, | |
| "step": 772000 | |
| }, | |
| { | |
| "epoch": 2.978656175220509, | |
| "grad_norm": 0.7816259860992432, | |
| "learning_rate": 7.055611111111111e-06, | |
| "loss": 1.1461, | |
| "step": 773000 | |
| }, | |
| { | |
| "epoch": 2.9825095467279095, | |
| "grad_norm": 0.757046639919281, | |
| "learning_rate": 7.000055555555555e-06, | |
| "loss": 1.146, | |
| "step": 774000 | |
| }, | |
| { | |
| "epoch": 2.9863629182353097, | |
| "grad_norm": 0.7732102870941162, | |
| "learning_rate": 6.944500000000001e-06, | |
| "loss": 1.1456, | |
| "step": 775000 | |
| }, | |
| { | |
| "epoch": 2.9902162897427105, | |
| "grad_norm": 0.7478325963020325, | |
| "learning_rate": 6.888944444444445e-06, | |
| "loss": 1.1448, | |
| "step": 776000 | |
| }, | |
| { | |
| "epoch": 2.9940696612501108, | |
| "grad_norm": 0.7565175294876099, | |
| "learning_rate": 6.833388888888889e-06, | |
| "loss": 1.1461, | |
| "step": 777000 | |
| }, | |
| { | |
| "epoch": 2.997923032757511, | |
| "grad_norm": 0.7752691507339478, | |
| "learning_rate": 6.777833333333333e-06, | |
| "loss": 1.1451, | |
| "step": 778000 | |
| }, | |
| { | |
| "epoch": 3.0017764042649118, | |
| "grad_norm": 0.7705357670783997, | |
| "learning_rate": 6.722277777777779e-06, | |
| "loss": 1.1455, | |
| "step": 779000 | |
| }, | |
| { | |
| "epoch": 3.005629775772312, | |
| "grad_norm": 0.729576587677002, | |
| "learning_rate": 6.666722222222223e-06, | |
| "loss": 1.145, | |
| "step": 780000 | |
| }, | |
| { | |
| "epoch": 3.0094831472797123, | |
| "grad_norm": 0.7434535026550293, | |
| "learning_rate": 6.611166666666667e-06, | |
| "loss": 1.1451, | |
| "step": 781000 | |
| }, | |
| { | |
| "epoch": 3.0133365187871126, | |
| "grad_norm": 0.7545701861381531, | |
| "learning_rate": 6.555611111111111e-06, | |
| "loss": 1.1449, | |
| "step": 782000 | |
| }, | |
| { | |
| "epoch": 3.0171898902945133, | |
| "grad_norm": 0.7772207856178284, | |
| "learning_rate": 6.500055555555557e-06, | |
| "loss": 1.1456, | |
| "step": 783000 | |
| }, | |
| { | |
| "epoch": 3.0210432618019136, | |
| "grad_norm": 0.7683879137039185, | |
| "learning_rate": 6.444500000000001e-06, | |
| "loss": 1.1446, | |
| "step": 784000 | |
| }, | |
| { | |
| "epoch": 3.024896633309314, | |
| "grad_norm": 0.8129323720932007, | |
| "learning_rate": 6.388944444444445e-06, | |
| "loss": 1.1453, | |
| "step": 785000 | |
| }, | |
| { | |
| "epoch": 3.028750004816714, | |
| "grad_norm": 0.7712565660476685, | |
| "learning_rate": 6.333388888888889e-06, | |
| "loss": 1.1438, | |
| "step": 786000 | |
| }, | |
| { | |
| "epoch": 3.032603376324115, | |
| "grad_norm": 0.7380220293998718, | |
| "learning_rate": 6.2778333333333345e-06, | |
| "loss": 1.1445, | |
| "step": 787000 | |
| }, | |
| { | |
| "epoch": 3.036456747831515, | |
| "grad_norm": 0.7839773297309875, | |
| "learning_rate": 6.2222777777777786e-06, | |
| "loss": 1.1445, | |
| "step": 788000 | |
| }, | |
| { | |
| "epoch": 3.0403101193389155, | |
| "grad_norm": 0.7741373181343079, | |
| "learning_rate": 6.166722222222223e-06, | |
| "loss": 1.144, | |
| "step": 789000 | |
| }, | |
| { | |
| "epoch": 3.0441634908463158, | |
| "grad_norm": 0.7754467129707336, | |
| "learning_rate": 6.1111666666666675e-06, | |
| "loss": 1.1442, | |
| "step": 790000 | |
| }, | |
| { | |
| "epoch": 3.0480168623537165, | |
| "grad_norm": 0.7377423644065857, | |
| "learning_rate": 6.0556111111111115e-06, | |
| "loss": 1.1448, | |
| "step": 791000 | |
| }, | |
| { | |
| "epoch": 3.0518702338611168, | |
| "grad_norm": 0.7510783672332764, | |
| "learning_rate": 6.000055555555556e-06, | |
| "loss": 1.1441, | |
| "step": 792000 | |
| }, | |
| { | |
| "epoch": 3.055723605368517, | |
| "grad_norm": 0.7558544278144836, | |
| "learning_rate": 5.9445000000000004e-06, | |
| "loss": 1.1439, | |
| "step": 793000 | |
| }, | |
| { | |
| "epoch": 3.059576976875918, | |
| "grad_norm": 0.763209342956543, | |
| "learning_rate": 5.8889444444444445e-06, | |
| "loss": 1.143, | |
| "step": 794000 | |
| }, | |
| { | |
| "epoch": 3.063430348383318, | |
| "grad_norm": 0.7716565132141113, | |
| "learning_rate": 5.833388888888889e-06, | |
| "loss": 1.1439, | |
| "step": 795000 | |
| }, | |
| { | |
| "epoch": 3.0672837198907184, | |
| "grad_norm": 0.8171836137771606, | |
| "learning_rate": 5.777833333333333e-06, | |
| "loss": 1.1431, | |
| "step": 796000 | |
| }, | |
| { | |
| "epoch": 3.0711370913981186, | |
| "grad_norm": 0.7793330550193787, | |
| "learning_rate": 5.722277777777777e-06, | |
| "loss": 1.1435, | |
| "step": 797000 | |
| }, | |
| { | |
| "epoch": 3.0749904629055194, | |
| "grad_norm": 0.7747001051902771, | |
| "learning_rate": 5.666722222222222e-06, | |
| "loss": 1.1438, | |
| "step": 798000 | |
| }, | |
| { | |
| "epoch": 3.0788438344129196, | |
| "grad_norm": 0.7851794958114624, | |
| "learning_rate": 5.611166666666666e-06, | |
| "loss": 1.1431, | |
| "step": 799000 | |
| }, | |
| { | |
| "epoch": 3.08269720592032, | |
| "grad_norm": 0.7496747970581055, | |
| "learning_rate": 5.555611111111111e-06, | |
| "loss": 1.143, | |
| "step": 800000 | |
| }, | |
| { | |
| "epoch": 3.08655057742772, | |
| "grad_norm": 0.7908812761306763, | |
| "learning_rate": 5.500055555555555e-06, | |
| "loss": 1.1435, | |
| "step": 801000 | |
| }, | |
| { | |
| "epoch": 3.090403948935121, | |
| "grad_norm": 0.7809085249900818, | |
| "learning_rate": 5.4445e-06, | |
| "loss": 1.1435, | |
| "step": 802000 | |
| }, | |
| { | |
| "epoch": 3.094257320442521, | |
| "grad_norm": 0.7660035490989685, | |
| "learning_rate": 5.388944444444444e-06, | |
| "loss": 1.1421, | |
| "step": 803000 | |
| }, | |
| { | |
| "epoch": 3.0981106919499215, | |
| "grad_norm": 0.7446494102478027, | |
| "learning_rate": 5.333388888888889e-06, | |
| "loss": 1.1431, | |
| "step": 804000 | |
| }, | |
| { | |
| "epoch": 3.101964063457322, | |
| "grad_norm": 0.7707033753395081, | |
| "learning_rate": 5.277833333333333e-06, | |
| "loss": 1.143, | |
| "step": 805000 | |
| }, | |
| { | |
| "epoch": 3.1058174349647225, | |
| "grad_norm": 0.7882303595542908, | |
| "learning_rate": 5.222277777777778e-06, | |
| "loss": 1.1425, | |
| "step": 806000 | |
| }, | |
| { | |
| "epoch": 3.109670806472123, | |
| "grad_norm": 0.7790716290473938, | |
| "learning_rate": 5.166722222222223e-06, | |
| "loss": 1.1435, | |
| "step": 807000 | |
| }, | |
| { | |
| "epoch": 3.113524177979523, | |
| "grad_norm": 0.7655811905860901, | |
| "learning_rate": 5.111166666666667e-06, | |
| "loss": 1.1428, | |
| "step": 808000 | |
| }, | |
| { | |
| "epoch": 3.117377549486924, | |
| "grad_norm": 0.7627564072608948, | |
| "learning_rate": 5.055611111111112e-06, | |
| "loss": 1.1431, | |
| "step": 809000 | |
| }, | |
| { | |
| "epoch": 3.121230920994324, | |
| "grad_norm": 0.7675108909606934, | |
| "learning_rate": 5.000055555555556e-06, | |
| "loss": 1.1417, | |
| "step": 810000 | |
| }, | |
| { | |
| "epoch": 3.1250842925017244, | |
| "grad_norm": 0.7635100483894348, | |
| "learning_rate": 4.944500000000001e-06, | |
| "loss": 1.142, | |
| "step": 811000 | |
| }, | |
| { | |
| "epoch": 3.1289376640091247, | |
| "grad_norm": 0.774726927280426, | |
| "learning_rate": 4.888944444444445e-06, | |
| "loss": 1.142, | |
| "step": 812000 | |
| }, | |
| { | |
| "epoch": 3.1327910355165254, | |
| "grad_norm": 0.7784895300865173, | |
| "learning_rate": 4.83338888888889e-06, | |
| "loss": 1.1416, | |
| "step": 813000 | |
| }, | |
| { | |
| "epoch": 3.1366444070239257, | |
| "grad_norm": 0.7654526233673096, | |
| "learning_rate": 4.777833333333334e-06, | |
| "loss": 1.1418, | |
| "step": 814000 | |
| }, | |
| { | |
| "epoch": 3.140497778531326, | |
| "grad_norm": 0.7536936402320862, | |
| "learning_rate": 4.7222777777777785e-06, | |
| "loss": 1.1416, | |
| "step": 815000 | |
| }, | |
| { | |
| "epoch": 3.1443511500387262, | |
| "grad_norm": 0.7567889094352722, | |
| "learning_rate": 4.6667222222222226e-06, | |
| "loss": 1.1407, | |
| "step": 816000 | |
| }, | |
| { | |
| "epoch": 3.148204521546127, | |
| "grad_norm": 0.7922675609588623, | |
| "learning_rate": 4.6111666666666674e-06, | |
| "loss": 1.141, | |
| "step": 817000 | |
| }, | |
| { | |
| "epoch": 3.1520578930535272, | |
| "grad_norm": 0.7856247425079346, | |
| "learning_rate": 4.5556111111111115e-06, | |
| "loss": 1.1418, | |
| "step": 818000 | |
| }, | |
| { | |
| "epoch": 3.1559112645609275, | |
| "grad_norm": 0.7786199450492859, | |
| "learning_rate": 4.500055555555556e-06, | |
| "loss": 1.1411, | |
| "step": 819000 | |
| }, | |
| { | |
| "epoch": 3.159764636068328, | |
| "grad_norm": 0.7758617997169495, | |
| "learning_rate": 4.4445e-06, | |
| "loss": 1.1415, | |
| "step": 820000 | |
| }, | |
| { | |
| "epoch": 3.1636180075757285, | |
| "grad_norm": 0.769377589225769, | |
| "learning_rate": 4.3889444444444444e-06, | |
| "loss": 1.1415, | |
| "step": 821000 | |
| }, | |
| { | |
| "epoch": 3.167471379083129, | |
| "grad_norm": 0.752196729183197, | |
| "learning_rate": 4.333388888888889e-06, | |
| "loss": 1.1409, | |
| "step": 822000 | |
| }, | |
| { | |
| "epoch": 3.171324750590529, | |
| "grad_norm": 0.7932141423225403, | |
| "learning_rate": 4.277833333333333e-06, | |
| "loss": 1.1409, | |
| "step": 823000 | |
| }, | |
| { | |
| "epoch": 3.1751781220979294, | |
| "grad_norm": 0.7658106684684753, | |
| "learning_rate": 4.222277777777777e-06, | |
| "loss": 1.1411, | |
| "step": 824000 | |
| }, | |
| { | |
| "epoch": 3.17903149360533, | |
| "grad_norm": 0.7765457630157471, | |
| "learning_rate": 4.166722222222222e-06, | |
| "loss": 1.1412, | |
| "step": 825000 | |
| }, | |
| { | |
| "epoch": 3.1828848651127304, | |
| "grad_norm": 0.7698619365692139, | |
| "learning_rate": 4.111166666666666e-06, | |
| "loss": 1.1416, | |
| "step": 826000 | |
| }, | |
| { | |
| "epoch": 3.1867382366201307, | |
| "grad_norm": 0.7630689144134521, | |
| "learning_rate": 4.055611111111111e-06, | |
| "loss": 1.1412, | |
| "step": 827000 | |
| }, | |
| { | |
| "epoch": 3.190591608127531, | |
| "grad_norm": 0.7751487493515015, | |
| "learning_rate": 4.000055555555555e-06, | |
| "loss": 1.1405, | |
| "step": 828000 | |
| }, | |
| { | |
| "epoch": 3.1944449796349317, | |
| "grad_norm": 0.7852933406829834, | |
| "learning_rate": 3.9445e-06, | |
| "loss": 1.1416, | |
| "step": 829000 | |
| }, | |
| { | |
| "epoch": 3.198298351142332, | |
| "grad_norm": 0.7691949605941772, | |
| "learning_rate": 3.888944444444444e-06, | |
| "loss": 1.1398, | |
| "step": 830000 | |
| }, | |
| { | |
| "epoch": 3.2021517226497322, | |
| "grad_norm": 0.7742173671722412, | |
| "learning_rate": 3.833388888888889e-06, | |
| "loss": 1.1397, | |
| "step": 831000 | |
| }, | |
| { | |
| "epoch": 3.206005094157133, | |
| "grad_norm": 0.7698484063148499, | |
| "learning_rate": 3.777833333333333e-06, | |
| "loss": 1.1397, | |
| "step": 832000 | |
| }, | |
| { | |
| "epoch": 3.2098584656645333, | |
| "grad_norm": 0.7852274775505066, | |
| "learning_rate": 3.722277777777778e-06, | |
| "loss": 1.1394, | |
| "step": 833000 | |
| }, | |
| { | |
| "epoch": 3.2137118371719335, | |
| "grad_norm": 0.7859106063842773, | |
| "learning_rate": 3.666722222222222e-06, | |
| "loss": 1.1407, | |
| "step": 834000 | |
| }, | |
| { | |
| "epoch": 3.217565208679334, | |
| "grad_norm": 0.7774125337600708, | |
| "learning_rate": 3.611166666666667e-06, | |
| "loss": 1.1401, | |
| "step": 835000 | |
| }, | |
| { | |
| "epoch": 3.2214185801867345, | |
| "grad_norm": 0.7690660357475281, | |
| "learning_rate": 3.555611111111111e-06, | |
| "loss": 1.1401, | |
| "step": 836000 | |
| }, | |
| { | |
| "epoch": 3.225271951694135, | |
| "grad_norm": 0.7808369994163513, | |
| "learning_rate": 3.5000555555555558e-06, | |
| "loss": 1.1404, | |
| "step": 837000 | |
| }, | |
| { | |
| "epoch": 3.229125323201535, | |
| "grad_norm": 0.7723608613014221, | |
| "learning_rate": 3.4445000000000006e-06, | |
| "loss": 1.1391, | |
| "step": 838000 | |
| }, | |
| { | |
| "epoch": 3.2329786947089354, | |
| "grad_norm": 0.7605135440826416, | |
| "learning_rate": 3.3889444444444447e-06, | |
| "loss": 1.1398, | |
| "step": 839000 | |
| }, | |
| { | |
| "epoch": 3.236832066216336, | |
| "grad_norm": 0.7743427753448486, | |
| "learning_rate": 3.3333888888888896e-06, | |
| "loss": 1.1398, | |
| "step": 840000 | |
| }, | |
| { | |
| "epoch": 3.2406854377237364, | |
| "grad_norm": 0.783277690410614, | |
| "learning_rate": 3.2778333333333336e-06, | |
| "loss": 1.1404, | |
| "step": 841000 | |
| }, | |
| { | |
| "epoch": 3.2445388092311367, | |
| "grad_norm": 0.7651547789573669, | |
| "learning_rate": 3.222277777777778e-06, | |
| "loss": 1.1396, | |
| "step": 842000 | |
| }, | |
| { | |
| "epoch": 3.248392180738537, | |
| "grad_norm": 0.7884653806686401, | |
| "learning_rate": 3.1667222222222225e-06, | |
| "loss": 1.1398, | |
| "step": 843000 | |
| }, | |
| { | |
| "epoch": 3.2522455522459377, | |
| "grad_norm": 0.7730636596679688, | |
| "learning_rate": 3.1111666666666666e-06, | |
| "loss": 1.1393, | |
| "step": 844000 | |
| }, | |
| { | |
| "epoch": 3.256098923753338, | |
| "grad_norm": 0.7686559557914734, | |
| "learning_rate": 3.055611111111111e-06, | |
| "loss": 1.1402, | |
| "step": 845000 | |
| }, | |
| { | |
| "epoch": 3.2599522952607383, | |
| "grad_norm": 0.7743884921073914, | |
| "learning_rate": 3.0000555555555555e-06, | |
| "loss": 1.1396, | |
| "step": 846000 | |
| }, | |
| { | |
| "epoch": 3.263805666768139, | |
| "grad_norm": 0.7998344302177429, | |
| "learning_rate": 2.9445e-06, | |
| "loss": 1.1398, | |
| "step": 847000 | |
| }, | |
| { | |
| "epoch": 3.2676590382755393, | |
| "grad_norm": 0.7782961130142212, | |
| "learning_rate": 2.8889444444444444e-06, | |
| "loss": 1.1397, | |
| "step": 848000 | |
| }, | |
| { | |
| "epoch": 3.2715124097829396, | |
| "grad_norm": 0.788406252861023, | |
| "learning_rate": 2.833388888888889e-06, | |
| "loss": 1.1388, | |
| "step": 849000 | |
| }, | |
| { | |
| "epoch": 3.27536578129034, | |
| "grad_norm": 0.7676372528076172, | |
| "learning_rate": 2.7778333333333333e-06, | |
| "loss": 1.1389, | |
| "step": 850000 | |
| }, | |
| { | |
| "epoch": 3.2792191527977406, | |
| "grad_norm": 0.7785215377807617, | |
| "learning_rate": 2.7222777777777778e-06, | |
| "loss": 1.138, | |
| "step": 851000 | |
| }, | |
| { | |
| "epoch": 3.283072524305141, | |
| "grad_norm": 0.7590740919113159, | |
| "learning_rate": 2.6667222222222222e-06, | |
| "loss": 1.1392, | |
| "step": 852000 | |
| }, | |
| { | |
| "epoch": 3.286925895812541, | |
| "grad_norm": 0.7940697073936462, | |
| "learning_rate": 2.6111666666666667e-06, | |
| "loss": 1.1378, | |
| "step": 853000 | |
| }, | |
| { | |
| "epoch": 3.2907792673199414, | |
| "grad_norm": 0.7704636454582214, | |
| "learning_rate": 2.5556111111111116e-06, | |
| "loss": 1.1381, | |
| "step": 854000 | |
| }, | |
| { | |
| "epoch": 3.294632638827342, | |
| "grad_norm": 0.7758104801177979, | |
| "learning_rate": 2.500055555555556e-06, | |
| "loss": 1.1381, | |
| "step": 855000 | |
| }, | |
| { | |
| "epoch": 3.2984860103347424, | |
| "grad_norm": 0.7805718183517456, | |
| "learning_rate": 2.4445e-06, | |
| "loss": 1.1378, | |
| "step": 856000 | |
| }, | |
| { | |
| "epoch": 3.3023393818421427, | |
| "grad_norm": 0.7670098543167114, | |
| "learning_rate": 2.3889444444444445e-06, | |
| "loss": 1.1386, | |
| "step": 857000 | |
| }, | |
| { | |
| "epoch": 3.306192753349543, | |
| "grad_norm": 0.7733041048049927, | |
| "learning_rate": 2.333388888888889e-06, | |
| "loss": 1.139, | |
| "step": 858000 | |
| }, | |
| { | |
| "epoch": 3.3100461248569437, | |
| "grad_norm": 0.7957353591918945, | |
| "learning_rate": 2.2778333333333334e-06, | |
| "loss": 1.1383, | |
| "step": 859000 | |
| }, | |
| { | |
| "epoch": 3.313899496364344, | |
| "grad_norm": 0.7921308875083923, | |
| "learning_rate": 2.222277777777778e-06, | |
| "loss": 1.1385, | |
| "step": 860000 | |
| }, | |
| { | |
| "epoch": 3.3177528678717443, | |
| "grad_norm": 0.7467139363288879, | |
| "learning_rate": 2.1667222222222224e-06, | |
| "loss": 1.1373, | |
| "step": 861000 | |
| }, | |
| { | |
| "epoch": 3.321606239379145, | |
| "grad_norm": 0.8030253648757935, | |
| "learning_rate": 2.111166666666667e-06, | |
| "loss": 1.1384, | |
| "step": 862000 | |
| }, | |
| { | |
| "epoch": 3.3254596108865453, | |
| "grad_norm": 0.778984546661377, | |
| "learning_rate": 2.0556111111111113e-06, | |
| "loss": 1.1383, | |
| "step": 863000 | |
| }, | |
| { | |
| "epoch": 3.3293129823939456, | |
| "grad_norm": 0.7732436656951904, | |
| "learning_rate": 2.0000555555555557e-06, | |
| "loss": 1.1385, | |
| "step": 864000 | |
| }, | |
| { | |
| "epoch": 3.333166353901346, | |
| "grad_norm": 0.7700003981590271, | |
| "learning_rate": 1.9445e-06, | |
| "loss": 1.1378, | |
| "step": 865000 | |
| }, | |
| { | |
| "epoch": 3.337019725408746, | |
| "grad_norm": 0.7778324484825134, | |
| "learning_rate": 1.8889444444444446e-06, | |
| "loss": 1.1373, | |
| "step": 866000 | |
| }, | |
| { | |
| "epoch": 3.340873096916147, | |
| "grad_norm": 0.784168004989624, | |
| "learning_rate": 1.833388888888889e-06, | |
| "loss": 1.1378, | |
| "step": 867000 | |
| }, | |
| { | |
| "epoch": 3.344726468423547, | |
| "grad_norm": 0.7781540155410767, | |
| "learning_rate": 1.7778333333333334e-06, | |
| "loss": 1.1376, | |
| "step": 868000 | |
| }, | |
| { | |
| "epoch": 3.3485798399309474, | |
| "grad_norm": 0.770268440246582, | |
| "learning_rate": 1.7222777777777778e-06, | |
| "loss": 1.1382, | |
| "step": 869000 | |
| }, | |
| { | |
| "epoch": 3.352433211438348, | |
| "grad_norm": 0.7837746143341064, | |
| "learning_rate": 1.6667222222222223e-06, | |
| "loss": 1.1373, | |
| "step": 870000 | |
| }, | |
| { | |
| "epoch": 3.3562865829457484, | |
| "grad_norm": 0.7703538537025452, | |
| "learning_rate": 1.6111666666666667e-06, | |
| "loss": 1.1383, | |
| "step": 871000 | |
| }, | |
| { | |
| "epoch": 3.3601399544531487, | |
| "grad_norm": 0.7656373977661133, | |
| "learning_rate": 1.5556111111111112e-06, | |
| "loss": 1.1374, | |
| "step": 872000 | |
| }, | |
| { | |
| "epoch": 3.363993325960549, | |
| "grad_norm": 0.7768437266349792, | |
| "learning_rate": 1.5000555555555556e-06, | |
| "loss": 1.1362, | |
| "step": 873000 | |
| }, | |
| { | |
| "epoch": 3.3678466974679497, | |
| "grad_norm": 0.7731209993362427, | |
| "learning_rate": 1.4445e-06, | |
| "loss": 1.1368, | |
| "step": 874000 | |
| }, | |
| { | |
| "epoch": 3.37170006897535, | |
| "grad_norm": 0.7907932996749878, | |
| "learning_rate": 1.3889444444444444e-06, | |
| "loss": 1.1367, | |
| "step": 875000 | |
| }, | |
| { | |
| "epoch": 3.3755534404827503, | |
| "grad_norm": 0.7951443791389465, | |
| "learning_rate": 1.3333888888888888e-06, | |
| "loss": 1.1371, | |
| "step": 876000 | |
| }, | |
| { | |
| "epoch": 3.379406811990151, | |
| "grad_norm": 0.7773862481117249, | |
| "learning_rate": 1.2778333333333333e-06, | |
| "loss": 1.1374, | |
| "step": 877000 | |
| }, | |
| { | |
| "epoch": 3.3832601834975513, | |
| "grad_norm": 0.7822207808494568, | |
| "learning_rate": 1.222277777777778e-06, | |
| "loss": 1.1373, | |
| "step": 878000 | |
| }, | |
| { | |
| "epoch": 3.3871135550049516, | |
| "grad_norm": 0.790253221988678, | |
| "learning_rate": 1.1667222222222224e-06, | |
| "loss": 1.1369, | |
| "step": 879000 | |
| }, | |
| { | |
| "epoch": 3.390966926512352, | |
| "grad_norm": 0.7918968200683594, | |
| "learning_rate": 1.1111666666666669e-06, | |
| "loss": 1.1372, | |
| "step": 880000 | |
| }, | |
| { | |
| "epoch": 3.394820298019752, | |
| "grad_norm": 0.7610453963279724, | |
| "learning_rate": 1.0556111111111113e-06, | |
| "loss": 1.137, | |
| "step": 881000 | |
| }, | |
| { | |
| "epoch": 3.398673669527153, | |
| "grad_norm": 0.7577848434448242, | |
| "learning_rate": 1.0000555555555556e-06, | |
| "loss": 1.1376, | |
| "step": 882000 | |
| }, | |
| { | |
| "epoch": 3.402527041034553, | |
| "grad_norm": 0.766459584236145, | |
| "learning_rate": 9.445e-07, | |
| "loss": 1.1362, | |
| "step": 883000 | |
| }, | |
| { | |
| "epoch": 3.4063804125419535, | |
| "grad_norm": 0.7742135524749756, | |
| "learning_rate": 8.889444444444445e-07, | |
| "loss": 1.1363, | |
| "step": 884000 | |
| }, | |
| { | |
| "epoch": 3.410233784049354, | |
| "grad_norm": 0.7687368988990784, | |
| "learning_rate": 8.333888888888889e-07, | |
| "loss": 1.1367, | |
| "step": 885000 | |
| }, | |
| { | |
| "epoch": 3.4140871555567545, | |
| "grad_norm": 0.7918124794960022, | |
| "learning_rate": 7.778333333333334e-07, | |
| "loss": 1.1371, | |
| "step": 886000 | |
| }, | |
| { | |
| "epoch": 3.4179405270641547, | |
| "grad_norm": 0.7830091118812561, | |
| "learning_rate": 7.222777777777777e-07, | |
| "loss": 1.1377, | |
| "step": 887000 | |
| }, | |
| { | |
| "epoch": 3.421793898571555, | |
| "grad_norm": 0.786780059337616, | |
| "learning_rate": 6.667222222222222e-07, | |
| "loss": 1.1364, | |
| "step": 888000 | |
| }, | |
| { | |
| "epoch": 3.4256472700789558, | |
| "grad_norm": 0.7944617867469788, | |
| "learning_rate": 6.111666666666667e-07, | |
| "loss": 1.1365, | |
| "step": 889000 | |
| }, | |
| { | |
| "epoch": 3.429500641586356, | |
| "grad_norm": 0.7738235592842102, | |
| "learning_rate": 5.556111111111111e-07, | |
| "loss": 1.1363, | |
| "step": 890000 | |
| }, | |
| { | |
| "epoch": 3.4333540130937563, | |
| "grad_norm": 0.7691417336463928, | |
| "learning_rate": 5.000555555555556e-07, | |
| "loss": 1.1373, | |
| "step": 891000 | |
| }, | |
| { | |
| "epoch": 3.4372073846011566, | |
| "grad_norm": 0.7909073829650879, | |
| "learning_rate": 4.4450000000000004e-07, | |
| "loss": 1.1363, | |
| "step": 892000 | |
| }, | |
| { | |
| "epoch": 3.4410607561085573, | |
| "grad_norm": 0.7703380584716797, | |
| "learning_rate": 3.8894444444444445e-07, | |
| "loss": 1.1357, | |
| "step": 893000 | |
| }, | |
| { | |
| "epoch": 3.4449141276159576, | |
| "grad_norm": 0.7877383232116699, | |
| "learning_rate": 3.333888888888889e-07, | |
| "loss": 1.1365, | |
| "step": 894000 | |
| }, | |
| { | |
| "epoch": 3.448767499123358, | |
| "grad_norm": 0.7765257358551025, | |
| "learning_rate": 2.778333333333333e-07, | |
| "loss": 1.1359, | |
| "step": 895000 | |
| }, | |
| { | |
| "epoch": 3.452620870630758, | |
| "grad_norm": 0.7824655175209045, | |
| "learning_rate": 2.222777777777778e-07, | |
| "loss": 1.1365, | |
| "step": 896000 | |
| }, | |
| { | |
| "epoch": 3.456474242138159, | |
| "grad_norm": 0.7947019934654236, | |
| "learning_rate": 1.6672222222222223e-07, | |
| "loss": 1.1362, | |
| "step": 897000 | |
| }, | |
| { | |
| "epoch": 3.460327613645559, | |
| "grad_norm": 0.7733472585678101, | |
| "learning_rate": 1.1116666666666666e-07, | |
| "loss": 1.1363, | |
| "step": 898000 | |
| }, | |
| { | |
| "epoch": 3.4641809851529595, | |
| "grad_norm": 0.7852097749710083, | |
| "learning_rate": 5.561111111111111e-08, | |
| "loss": 1.1357, | |
| "step": 899000 | |
| }, | |
| { | |
| "epoch": 3.46803435666036, | |
| "grad_norm": 0.7882575392723083, | |
| "learning_rate": 5.5555555555555553e-11, | |
| "loss": 1.1358, | |
| "step": 900000 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 900000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.376157208460059e+20, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |