21_BPS_noR / trainer_state.json
liyang619's picture
Upload folder using huggingface_hub
3780f81 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 31250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016,
"grad_norm": 0.4162791292708614,
"learning_rate": 1.9968e-05,
"loss": 0.4754,
"step": 50
},
{
"epoch": 0.032,
"grad_norm": 0.2034076606680038,
"learning_rate": 1.9936000000000004e-05,
"loss": 0.0184,
"step": 100
},
{
"epoch": 0.048,
"grad_norm": 0.32707728996877367,
"learning_rate": 1.9904e-05,
"loss": 0.0148,
"step": 150
},
{
"epoch": 0.064,
"grad_norm": 0.4247544882383208,
"learning_rate": 1.9872000000000002e-05,
"loss": 0.0126,
"step": 200
},
{
"epoch": 0.08,
"grad_norm": 0.25015536838552505,
"learning_rate": 1.9840000000000003e-05,
"loss": 0.0102,
"step": 250
},
{
"epoch": 0.096,
"grad_norm": 0.1173465173905855,
"learning_rate": 1.9808e-05,
"loss": 0.0086,
"step": 300
},
{
"epoch": 0.112,
"grad_norm": 0.17858061231251363,
"learning_rate": 1.9776000000000002e-05,
"loss": 0.0072,
"step": 350
},
{
"epoch": 0.128,
"grad_norm": 0.12166310007206395,
"learning_rate": 1.9744e-05,
"loss": 0.006,
"step": 400
},
{
"epoch": 0.144,
"grad_norm": 0.1115878060467963,
"learning_rate": 1.9712000000000004e-05,
"loss": 0.0052,
"step": 450
},
{
"epoch": 0.16,
"grad_norm": 0.06376502592170473,
"learning_rate": 1.968e-05,
"loss": 0.0044,
"step": 500
},
{
"epoch": 0.176,
"grad_norm": 0.10598418241747591,
"learning_rate": 1.9648000000000002e-05,
"loss": 0.0036,
"step": 550
},
{
"epoch": 0.192,
"grad_norm": 0.1240464209943554,
"learning_rate": 1.9616000000000003e-05,
"loss": 0.0031,
"step": 600
},
{
"epoch": 0.208,
"grad_norm": 0.0985656589222262,
"learning_rate": 1.9584e-05,
"loss": 0.0026,
"step": 650
},
{
"epoch": 0.224,
"grad_norm": 0.10986810134020673,
"learning_rate": 1.9552000000000002e-05,
"loss": 0.0023,
"step": 700
},
{
"epoch": 0.24,
"grad_norm": 0.1103690941151271,
"learning_rate": 1.9520000000000003e-05,
"loss": 0.002,
"step": 750
},
{
"epoch": 0.256,
"grad_norm": 0.06027351422139254,
"learning_rate": 1.9488000000000004e-05,
"loss": 0.0018,
"step": 800
},
{
"epoch": 0.272,
"grad_norm": 0.10889576295219647,
"learning_rate": 1.9456e-05,
"loss": 0.0015,
"step": 850
},
{
"epoch": 0.288,
"grad_norm": 0.06822239222520912,
"learning_rate": 1.9424e-05,
"loss": 0.0014,
"step": 900
},
{
"epoch": 0.304,
"grad_norm": 0.09673723500638307,
"learning_rate": 1.9392000000000003e-05,
"loss": 0.0012,
"step": 950
},
{
"epoch": 0.32,
"grad_norm": 0.04816136721008917,
"learning_rate": 1.936e-05,
"loss": 0.0011,
"step": 1000
},
{
"epoch": 0.336,
"grad_norm": 0.036073116914313585,
"learning_rate": 1.9328000000000002e-05,
"loss": 0.0012,
"step": 1050
},
{
"epoch": 0.352,
"grad_norm": 0.06730673946375604,
"learning_rate": 1.9296000000000003e-05,
"loss": 0.0009,
"step": 1100
},
{
"epoch": 0.368,
"grad_norm": 0.03491648747017885,
"learning_rate": 1.9264e-05,
"loss": 0.0011,
"step": 1150
},
{
"epoch": 0.384,
"grad_norm": 0.05153510670302306,
"learning_rate": 1.9232e-05,
"loss": 0.0009,
"step": 1200
},
{
"epoch": 0.4,
"grad_norm": 0.04307035845962119,
"learning_rate": 1.9200000000000003e-05,
"loss": 0.0008,
"step": 1250
},
{
"epoch": 0.416,
"grad_norm": 0.03949817663105664,
"learning_rate": 1.9168000000000004e-05,
"loss": 0.0006,
"step": 1300
},
{
"epoch": 0.432,
"grad_norm": 0.046238030209222174,
"learning_rate": 1.9136e-05,
"loss": 0.0007,
"step": 1350
},
{
"epoch": 0.448,
"grad_norm": 0.04736527158661927,
"learning_rate": 1.9104000000000002e-05,
"loss": 0.0007,
"step": 1400
},
{
"epoch": 0.464,
"grad_norm": 1.0273111933810504,
"learning_rate": 1.9072000000000003e-05,
"loss": 0.0511,
"step": 1450
},
{
"epoch": 0.48,
"grad_norm": 0.05230770995494484,
"learning_rate": 1.904e-05,
"loss": 0.0006,
"step": 1500
},
{
"epoch": 0.496,
"grad_norm": 0.030869197184465513,
"learning_rate": 1.9008e-05,
"loss": 0.0006,
"step": 1550
},
{
"epoch": 0.512,
"grad_norm": 0.035546700143440524,
"learning_rate": 1.8976000000000003e-05,
"loss": 0.0005,
"step": 1600
},
{
"epoch": 0.528,
"grad_norm": 0.05475624621759873,
"learning_rate": 1.8944000000000004e-05,
"loss": 0.0005,
"step": 1650
},
{
"epoch": 0.544,
"grad_norm": 0.011229400249910374,
"learning_rate": 1.8912e-05,
"loss": 0.0004,
"step": 1700
},
{
"epoch": 0.56,
"grad_norm": 0.03437935699788119,
"learning_rate": 1.8880000000000002e-05,
"loss": 0.0004,
"step": 1750
},
{
"epoch": 0.576,
"grad_norm": 0.032764646105168924,
"learning_rate": 1.8848000000000003e-05,
"loss": 0.0004,
"step": 1800
},
{
"epoch": 0.592,
"grad_norm": 0.020472956028111056,
"learning_rate": 1.8816e-05,
"loss": 0.0004,
"step": 1850
},
{
"epoch": 0.608,
"grad_norm": 0.014897727874505565,
"learning_rate": 1.8784000000000002e-05,
"loss": 0.0003,
"step": 1900
},
{
"epoch": 0.624,
"grad_norm": 0.016804462072975052,
"learning_rate": 1.8752000000000003e-05,
"loss": 0.0004,
"step": 1950
},
{
"epoch": 0.64,
"grad_norm": 0.0555365139060795,
"learning_rate": 1.8720000000000004e-05,
"loss": 0.0003,
"step": 2000
},
{
"epoch": 0.656,
"grad_norm": 0.04167732575085471,
"learning_rate": 1.8688e-05,
"loss": 0.0004,
"step": 2050
},
{
"epoch": 0.672,
"grad_norm": 0.02351053070911185,
"learning_rate": 1.8656000000000002e-05,
"loss": 0.0004,
"step": 2100
},
{
"epoch": 0.688,
"grad_norm": 0.04978335013967468,
"learning_rate": 1.8624000000000003e-05,
"loss": 0.0004,
"step": 2150
},
{
"epoch": 0.704,
"grad_norm": 0.03666638757885489,
"learning_rate": 1.8592e-05,
"loss": 0.0005,
"step": 2200
},
{
"epoch": 0.72,
"grad_norm": 0.032014118732009544,
"learning_rate": 1.8560000000000002e-05,
"loss": 0.0005,
"step": 2250
},
{
"epoch": 0.736,
"grad_norm": 0.05999773243277201,
"learning_rate": 1.8528000000000003e-05,
"loss": 0.0004,
"step": 2300
},
{
"epoch": 0.752,
"grad_norm": 0.040391581637471245,
"learning_rate": 1.8496e-05,
"loss": 0.0005,
"step": 2350
},
{
"epoch": 0.768,
"grad_norm": 0.03975437942972135,
"learning_rate": 1.8464e-05,
"loss": 0.0005,
"step": 2400
},
{
"epoch": 0.784,
"grad_norm": 0.025970283264487873,
"learning_rate": 1.8432000000000002e-05,
"loss": 0.0004,
"step": 2450
},
{
"epoch": 0.8,
"grad_norm": 0.02563127650055879,
"learning_rate": 1.8400000000000003e-05,
"loss": 0.0004,
"step": 2500
},
{
"epoch": 0.816,
"grad_norm": 0.045543501280072535,
"learning_rate": 1.8368e-05,
"loss": 0.0004,
"step": 2550
},
{
"epoch": 0.832,
"grad_norm": 0.03591751155162754,
"learning_rate": 1.8336000000000002e-05,
"loss": 0.0006,
"step": 2600
},
{
"epoch": 0.848,
"grad_norm": 0.02547674454985814,
"learning_rate": 1.8304000000000003e-05,
"loss": 0.0005,
"step": 2650
},
{
"epoch": 0.864,
"grad_norm": 0.053643683147153336,
"learning_rate": 1.8272e-05,
"loss": 0.0004,
"step": 2700
},
{
"epoch": 0.88,
"grad_norm": 0.03266181119414202,
"learning_rate": 1.824e-05,
"loss": 0.0006,
"step": 2750
},
{
"epoch": 0.896,
"grad_norm": 0.056105089349029955,
"learning_rate": 1.8208000000000003e-05,
"loss": 0.0005,
"step": 2800
},
{
"epoch": 0.912,
"grad_norm": 0.024340620330705078,
"learning_rate": 1.8176000000000004e-05,
"loss": 0.0005,
"step": 2850
},
{
"epoch": 0.928,
"grad_norm": 0.030931428596602917,
"learning_rate": 1.8144e-05,
"loss": 0.0004,
"step": 2900
},
{
"epoch": 0.944,
"grad_norm": 0.21827287090117428,
"learning_rate": 1.8112000000000002e-05,
"loss": 0.0012,
"step": 2950
},
{
"epoch": 0.96,
"grad_norm": 0.048404745271573284,
"learning_rate": 1.8080000000000003e-05,
"loss": 0.0009,
"step": 3000
},
{
"epoch": 0.976,
"grad_norm": 0.03511490038904797,
"learning_rate": 1.8048e-05,
"loss": 0.0004,
"step": 3050
},
{
"epoch": 0.992,
"grad_norm": 0.0450712481155652,
"learning_rate": 1.8016e-05,
"loss": 0.0005,
"step": 3100
},
{
"epoch": 1.008,
"grad_norm": 0.025904643033032682,
"learning_rate": 1.7984000000000003e-05,
"loss": 0.0003,
"step": 3150
},
{
"epoch": 1.024,
"grad_norm": 0.026407027678467632,
"learning_rate": 1.7952e-05,
"loss": 0.0003,
"step": 3200
},
{
"epoch": 1.04,
"grad_norm": 0.021041390612394642,
"learning_rate": 1.792e-05,
"loss": 0.0003,
"step": 3250
},
{
"epoch": 1.056,
"grad_norm": 0.020868716704968104,
"learning_rate": 1.7888000000000002e-05,
"loss": 0.0003,
"step": 3300
},
{
"epoch": 1.072,
"grad_norm": 0.01496037199648747,
"learning_rate": 1.7856000000000003e-05,
"loss": 0.0003,
"step": 3350
},
{
"epoch": 1.088,
"grad_norm": 0.025499824661529856,
"learning_rate": 1.7824e-05,
"loss": 0.0002,
"step": 3400
},
{
"epoch": 1.104,
"grad_norm": 0.01913077710522383,
"learning_rate": 1.7792000000000002e-05,
"loss": 0.0002,
"step": 3450
},
{
"epoch": 1.12,
"grad_norm": 0.03713721333860161,
"learning_rate": 1.7760000000000003e-05,
"loss": 0.0002,
"step": 3500
},
{
"epoch": 1.1360000000000001,
"grad_norm": 0.03222855382697803,
"learning_rate": 1.7728e-05,
"loss": 0.0003,
"step": 3550
},
{
"epoch": 1.152,
"grad_norm": 0.007975587682259639,
"learning_rate": 1.7696e-05,
"loss": 0.0002,
"step": 3600
},
{
"epoch": 1.168,
"grad_norm": 0.002794706661316828,
"learning_rate": 1.7664000000000002e-05,
"loss": 0.0002,
"step": 3650
},
{
"epoch": 1.184,
"grad_norm": 0.032076141646391186,
"learning_rate": 1.7632000000000003e-05,
"loss": 0.0004,
"step": 3700
},
{
"epoch": 1.2,
"grad_norm": 0.6234924591493982,
"learning_rate": 1.76e-05,
"loss": 0.1071,
"step": 3750
},
{
"epoch": 1.216,
"grad_norm": 0.02540240432384445,
"learning_rate": 1.7568000000000002e-05,
"loss": 0.0064,
"step": 3800
},
{
"epoch": 1.232,
"grad_norm": 0.018763443771111773,
"learning_rate": 1.7536000000000003e-05,
"loss": 0.0003,
"step": 3850
},
{
"epoch": 1.248,
"grad_norm": 0.002105113957933186,
"learning_rate": 1.7504e-05,
"loss": 0.0002,
"step": 3900
},
{
"epoch": 1.264,
"grad_norm": 0.011820986559734206,
"learning_rate": 1.7472e-05,
"loss": 0.0001,
"step": 3950
},
{
"epoch": 1.28,
"grad_norm": 0.010883352595811436,
"learning_rate": 1.7440000000000002e-05,
"loss": 0.0002,
"step": 4000
},
{
"epoch": 1.296,
"grad_norm": 0.0002846213040226481,
"learning_rate": 1.7408e-05,
"loss": 0.0001,
"step": 4050
},
{
"epoch": 1.312,
"grad_norm": 0.011665351347795698,
"learning_rate": 1.7376e-05,
"loss": 0.0001,
"step": 4100
},
{
"epoch": 1.328,
"grad_norm": 0.003619829409498692,
"learning_rate": 1.7344000000000002e-05,
"loss": 0.0001,
"step": 4150
},
{
"epoch": 1.3439999999999999,
"grad_norm": 0.009844016518088946,
"learning_rate": 1.7312000000000003e-05,
"loss": 0.0001,
"step": 4200
},
{
"epoch": 1.3599999999999999,
"grad_norm": 0.004750662567109947,
"learning_rate": 1.728e-05,
"loss": 0.0001,
"step": 4250
},
{
"epoch": 1.376,
"grad_norm": 0.011961256262424691,
"learning_rate": 1.7248e-05,
"loss": 0.0001,
"step": 4300
},
{
"epoch": 1.392,
"grad_norm": 0.0008924320200507949,
"learning_rate": 1.7216000000000003e-05,
"loss": 0.0001,
"step": 4350
},
{
"epoch": 1.408,
"grad_norm": 0.01662320118905631,
"learning_rate": 1.7184e-05,
"loss": 0.0001,
"step": 4400
},
{
"epoch": 1.424,
"grad_norm": 0.028186123188899303,
"learning_rate": 1.7152e-05,
"loss": 0.0001,
"step": 4450
},
{
"epoch": 1.44,
"grad_norm": 0.0034815142490064606,
"learning_rate": 1.7120000000000002e-05,
"loss": 0.0001,
"step": 4500
},
{
"epoch": 1.456,
"grad_norm": 0.001460364718969842,
"learning_rate": 1.7088000000000003e-05,
"loss": 0.0001,
"step": 4550
},
{
"epoch": 1.472,
"grad_norm": 0.012151855125151954,
"learning_rate": 1.7056e-05,
"loss": 0.0001,
"step": 4600
},
{
"epoch": 1.488,
"grad_norm": 0.0066837713743412835,
"learning_rate": 1.7024e-05,
"loss": 0.0002,
"step": 4650
},
{
"epoch": 1.504,
"grad_norm": 0.01614289259012644,
"learning_rate": 1.6992000000000003e-05,
"loss": 0.0002,
"step": 4700
},
{
"epoch": 1.52,
"grad_norm": 0.017017037424245766,
"learning_rate": 1.696e-05,
"loss": 0.0004,
"step": 4750
},
{
"epoch": 1.536,
"grad_norm": 0.037257132020433056,
"learning_rate": 1.6928e-05,
"loss": 0.0002,
"step": 4800
},
{
"epoch": 1.552,
"grad_norm": 0.017499460231563804,
"learning_rate": 1.6896000000000002e-05,
"loss": 0.0001,
"step": 4850
},
{
"epoch": 1.568,
"grad_norm": 0.03629289000795102,
"learning_rate": 1.6864e-05,
"loss": 0.0003,
"step": 4900
},
{
"epoch": 1.584,
"grad_norm": 0.012313804740317102,
"learning_rate": 1.6832e-05,
"loss": 0.0002,
"step": 4950
},
{
"epoch": 1.6,
"grad_norm": 0.015022727388605363,
"learning_rate": 1.6800000000000002e-05,
"loss": 0.0002,
"step": 5000
},
{
"epoch": 1.616,
"grad_norm": 0.01637075751125702,
"learning_rate": 1.6768000000000003e-05,
"loss": 0.0002,
"step": 5050
},
{
"epoch": 1.6320000000000001,
"grad_norm": 0.00817681413922093,
"learning_rate": 1.6736e-05,
"loss": 0.0002,
"step": 5100
},
{
"epoch": 1.6480000000000001,
"grad_norm": 0.05076839955582874,
"learning_rate": 1.6704e-05,
"loss": 0.0003,
"step": 5150
},
{
"epoch": 1.6640000000000001,
"grad_norm": 0.054792586149856305,
"learning_rate": 1.6672000000000002e-05,
"loss": 0.0003,
"step": 5200
},
{
"epoch": 1.6800000000000002,
"grad_norm": 0.014300407813244355,
"learning_rate": 1.664e-05,
"loss": 0.0004,
"step": 5250
},
{
"epoch": 1.696,
"grad_norm": 0.022359944469494724,
"learning_rate": 1.6608e-05,
"loss": 0.0004,
"step": 5300
},
{
"epoch": 1.712,
"grad_norm": 0.03392271894362949,
"learning_rate": 1.6576000000000002e-05,
"loss": 0.0005,
"step": 5350
},
{
"epoch": 1.728,
"grad_norm": 0.030499061647474107,
"learning_rate": 1.6544000000000003e-05,
"loss": 0.0004,
"step": 5400
},
{
"epoch": 1.744,
"grad_norm": 0.022947059194539453,
"learning_rate": 1.6512e-05,
"loss": 0.0002,
"step": 5450
},
{
"epoch": 1.76,
"grad_norm": 0.004139587011211526,
"learning_rate": 1.648e-05,
"loss": 0.0002,
"step": 5500
},
{
"epoch": 1.776,
"grad_norm": 0.019015863163501696,
"learning_rate": 1.6448000000000002e-05,
"loss": 0.0002,
"step": 5550
},
{
"epoch": 1.792,
"grad_norm": 0.02153996101513209,
"learning_rate": 1.6416e-05,
"loss": 0.0001,
"step": 5600
},
{
"epoch": 1.808,
"grad_norm": 0.02075243587775307,
"learning_rate": 1.6384e-05,
"loss": 0.0002,
"step": 5650
},
{
"epoch": 1.8239999999999998,
"grad_norm": 0.009150550879400116,
"learning_rate": 1.6352000000000002e-05,
"loss": 0.0001,
"step": 5700
},
{
"epoch": 1.8399999999999999,
"grad_norm": 0.03715955238865434,
"learning_rate": 1.632e-05,
"loss": 0.0001,
"step": 5750
},
{
"epoch": 1.8559999999999999,
"grad_norm": 0.017289500337293505,
"learning_rate": 1.6288e-05,
"loss": 0.0001,
"step": 5800
},
{
"epoch": 1.8719999999999999,
"grad_norm": 0.003573430818383637,
"learning_rate": 1.6256e-05,
"loss": 0.0001,
"step": 5850
},
{
"epoch": 1.888,
"grad_norm": 0.00746919924078368,
"learning_rate": 1.6224000000000003e-05,
"loss": 0.0001,
"step": 5900
},
{
"epoch": 1.904,
"grad_norm": 0.018846707196093635,
"learning_rate": 1.6192e-05,
"loss": 0.0001,
"step": 5950
},
{
"epoch": 1.92,
"grad_norm": 0.029690252225365133,
"learning_rate": 1.616e-05,
"loss": 0.0001,
"step": 6000
},
{
"epoch": 1.936,
"grad_norm": 0.00404539132639544,
"learning_rate": 1.6128000000000002e-05,
"loss": 0.0001,
"step": 6050
},
{
"epoch": 1.952,
"grad_norm": 0.01577316675319085,
"learning_rate": 1.6096e-05,
"loss": 0.0001,
"step": 6100
},
{
"epoch": 1.968,
"grad_norm": 0.029879625197064306,
"learning_rate": 1.6064e-05,
"loss": 0.0002,
"step": 6150
},
{
"epoch": 1.984,
"grad_norm": 0.014564254437322836,
"learning_rate": 1.6032e-05,
"loss": 0.0002,
"step": 6200
},
{
"epoch": 2.0,
"grad_norm": 0.022672964806977416,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0001,
"step": 6250
},
{
"epoch": 2.016,
"grad_norm": 0.025131584412300467,
"learning_rate": 1.5968e-05,
"loss": 0.0003,
"step": 6300
},
{
"epoch": 2.032,
"grad_norm": 0.019193509331689527,
"learning_rate": 1.5936e-05,
"loss": 0.0003,
"step": 6350
},
{
"epoch": 2.048,
"grad_norm": 0.03172642139425199,
"learning_rate": 1.5904000000000002e-05,
"loss": 0.0003,
"step": 6400
},
{
"epoch": 2.064,
"grad_norm": 0.03825097776567973,
"learning_rate": 1.5872e-05,
"loss": 0.0003,
"step": 6450
},
{
"epoch": 2.08,
"grad_norm": 0.02799126609845451,
"learning_rate": 1.584e-05,
"loss": 0.0004,
"step": 6500
},
{
"epoch": 2.096,
"grad_norm": 0.021007331756498947,
"learning_rate": 1.5808000000000002e-05,
"loss": 0.0003,
"step": 6550
},
{
"epoch": 2.112,
"grad_norm": 0.03138950848983769,
"learning_rate": 1.5776e-05,
"loss": 0.0003,
"step": 6600
},
{
"epoch": 2.128,
"grad_norm": 0.06211389122519674,
"learning_rate": 1.5744e-05,
"loss": 0.0004,
"step": 6650
},
{
"epoch": 2.144,
"grad_norm": 0.019842547638907367,
"learning_rate": 1.5712e-05,
"loss": 0.0004,
"step": 6700
},
{
"epoch": 2.16,
"grad_norm": 0.011531174317517585,
"learning_rate": 1.5680000000000002e-05,
"loss": 0.0003,
"step": 6750
},
{
"epoch": 2.176,
"grad_norm": 0.029715206936091667,
"learning_rate": 1.5648e-05,
"loss": 0.0003,
"step": 6800
},
{
"epoch": 2.192,
"grad_norm": 0.03367332739926089,
"learning_rate": 1.5616e-05,
"loss": 0.0004,
"step": 6850
},
{
"epoch": 2.208,
"grad_norm": 0.0328904913855512,
"learning_rate": 1.5584000000000002e-05,
"loss": 0.0003,
"step": 6900
},
{
"epoch": 2.224,
"grad_norm": 0.033707262406383776,
"learning_rate": 1.5552e-05,
"loss": 0.0003,
"step": 6950
},
{
"epoch": 2.24,
"grad_norm": 0.026776309909601587,
"learning_rate": 1.552e-05,
"loss": 0.0004,
"step": 7000
},
{
"epoch": 2.2560000000000002,
"grad_norm": 0.014767091414466142,
"learning_rate": 1.5488e-05,
"loss": 0.0002,
"step": 7050
},
{
"epoch": 2.2720000000000002,
"grad_norm": 0.0017357490608581442,
"learning_rate": 1.5456000000000002e-05,
"loss": 0.0002,
"step": 7100
},
{
"epoch": 2.288,
"grad_norm": 0.012533161481551452,
"learning_rate": 1.5424e-05,
"loss": 0.0003,
"step": 7150
},
{
"epoch": 2.304,
"grad_norm": 0.011606179356836947,
"learning_rate": 1.5392e-05,
"loss": 0.0002,
"step": 7200
},
{
"epoch": 2.32,
"grad_norm": 0.011141091556149417,
"learning_rate": 1.5360000000000002e-05,
"loss": 0.0002,
"step": 7250
},
{
"epoch": 2.336,
"grad_norm": 0.012970504799498298,
"learning_rate": 1.5328e-05,
"loss": 0.0002,
"step": 7300
},
{
"epoch": 2.352,
"grad_norm": 0.013792627527534363,
"learning_rate": 1.5296e-05,
"loss": 0.0001,
"step": 7350
},
{
"epoch": 2.368,
"grad_norm": 0.005003589537772858,
"learning_rate": 1.5264e-05,
"loss": 0.0001,
"step": 7400
},
{
"epoch": 2.384,
"grad_norm": 0.022468245054652756,
"learning_rate": 1.5232000000000003e-05,
"loss": 0.0001,
"step": 7450
},
{
"epoch": 2.4,
"grad_norm": 0.0036743704709482736,
"learning_rate": 1.5200000000000002e-05,
"loss": 0.0001,
"step": 7500
},
{
"epoch": 2.416,
"grad_norm": 0.02123021172151452,
"learning_rate": 1.5168000000000001e-05,
"loss": 0.0002,
"step": 7550
},
{
"epoch": 2.432,
"grad_norm": 0.0029051452232639837,
"learning_rate": 1.5136000000000002e-05,
"loss": 0.0001,
"step": 7600
},
{
"epoch": 2.448,
"grad_norm": 0.012870324143213164,
"learning_rate": 1.5104000000000001e-05,
"loss": 0.0001,
"step": 7650
},
{
"epoch": 2.464,
"grad_norm": 0.013477754787068787,
"learning_rate": 1.5072000000000002e-05,
"loss": 0.0001,
"step": 7700
},
{
"epoch": 2.48,
"grad_norm": 0.00028543856214405185,
"learning_rate": 1.5040000000000002e-05,
"loss": 0.0001,
"step": 7750
},
{
"epoch": 2.496,
"grad_norm": 7.966477694629657e-05,
"learning_rate": 1.5008000000000001e-05,
"loss": 0.0,
"step": 7800
},
{
"epoch": 2.512,
"grad_norm": 0.0086588190020715,
"learning_rate": 1.4976000000000002e-05,
"loss": 0.0001,
"step": 7850
},
{
"epoch": 2.528,
"grad_norm": 0.004551133565457303,
"learning_rate": 1.4944000000000001e-05,
"loss": 0.0001,
"step": 7900
},
{
"epoch": 2.544,
"grad_norm": 0.0011433396140536688,
"learning_rate": 1.4912000000000002e-05,
"loss": 0.0001,
"step": 7950
},
{
"epoch": 2.56,
"grad_norm": 0.0213021627246675,
"learning_rate": 1.4880000000000002e-05,
"loss": 0.0002,
"step": 8000
},
{
"epoch": 2.576,
"grad_norm": 0.03352306587680257,
"learning_rate": 1.4848e-05,
"loss": 0.0003,
"step": 8050
},
{
"epoch": 2.592,
"grad_norm": 0.02330516084282675,
"learning_rate": 1.4816000000000002e-05,
"loss": 0.0003,
"step": 8100
},
{
"epoch": 2.608,
"grad_norm": 0.0006771745351232887,
"learning_rate": 1.4784000000000001e-05,
"loss": 0.0002,
"step": 8150
},
{
"epoch": 2.624,
"grad_norm": 0.012210357305192779,
"learning_rate": 1.4752000000000002e-05,
"loss": 0.0002,
"step": 8200
},
{
"epoch": 2.64,
"grad_norm": 0.03745986985753525,
"learning_rate": 1.4720000000000001e-05,
"loss": 0.0002,
"step": 8250
},
{
"epoch": 2.656,
"grad_norm": 0.014274167297240953,
"learning_rate": 1.4688000000000002e-05,
"loss": 0.0002,
"step": 8300
},
{
"epoch": 2.672,
"grad_norm": 0.02447629236070352,
"learning_rate": 1.4656000000000002e-05,
"loss": 0.0002,
"step": 8350
},
{
"epoch": 2.6879999999999997,
"grad_norm": 0.0038311408711608334,
"learning_rate": 1.4624000000000001e-05,
"loss": 0.0003,
"step": 8400
},
{
"epoch": 2.7039999999999997,
"grad_norm": 0.0028822241550350873,
"learning_rate": 1.4592000000000002e-05,
"loss": 0.0003,
"step": 8450
},
{
"epoch": 2.7199999999999998,
"grad_norm": 0.021839407360574662,
"learning_rate": 1.4560000000000001e-05,
"loss": 0.0002,
"step": 8500
},
{
"epoch": 2.7359999999999998,
"grad_norm": 0.013328644366624556,
"learning_rate": 1.4528000000000002e-05,
"loss": 0.0002,
"step": 8550
},
{
"epoch": 2.752,
"grad_norm": 0.035287550317779214,
"learning_rate": 1.4496000000000001e-05,
"loss": 0.0004,
"step": 8600
},
{
"epoch": 2.768,
"grad_norm": 0.023410051762185694,
"learning_rate": 1.4464e-05,
"loss": 0.0003,
"step": 8650
},
{
"epoch": 2.784,
"grad_norm": 0.029501287143741794,
"learning_rate": 1.4432000000000002e-05,
"loss": 0.0004,
"step": 8700
},
{
"epoch": 2.8,
"grad_norm": 0.006315006996691602,
"learning_rate": 1.4400000000000001e-05,
"loss": 0.0002,
"step": 8750
},
{
"epoch": 2.816,
"grad_norm": 0.02024117418990644,
"learning_rate": 1.4368000000000002e-05,
"loss": 0.0002,
"step": 8800
},
{
"epoch": 2.832,
"grad_norm": 0.00473025507886849,
"learning_rate": 1.4336000000000001e-05,
"loss": 0.0003,
"step": 8850
},
{
"epoch": 2.848,
"grad_norm": 0.008026314917502433,
"learning_rate": 1.4304e-05,
"loss": 0.0002,
"step": 8900
},
{
"epoch": 2.864,
"grad_norm": 0.006986782846698588,
"learning_rate": 1.4272000000000002e-05,
"loss": 0.0002,
"step": 8950
},
{
"epoch": 2.88,
"grad_norm": 0.006623438561010072,
"learning_rate": 1.4240000000000001e-05,
"loss": 0.0001,
"step": 9000
},
{
"epoch": 2.896,
"grad_norm": 0.0002450911749717482,
"learning_rate": 1.4208000000000002e-05,
"loss": 0.0,
"step": 9050
},
{
"epoch": 2.912,
"grad_norm": 0.0007552126102260173,
"learning_rate": 1.4176000000000001e-05,
"loss": 0.0001,
"step": 9100
},
{
"epoch": 2.928,
"grad_norm": 0.016500368158510824,
"learning_rate": 1.4144000000000002e-05,
"loss": 0.0002,
"step": 9150
},
{
"epoch": 2.944,
"grad_norm": 0.0034247242166474266,
"learning_rate": 1.4112000000000001e-05,
"loss": 0.0001,
"step": 9200
},
{
"epoch": 2.96,
"grad_norm": 0.005455926803494763,
"learning_rate": 1.408e-05,
"loss": 0.0001,
"step": 9250
},
{
"epoch": 2.976,
"grad_norm": 0.00015754376252841523,
"learning_rate": 1.4048000000000002e-05,
"loss": 0.0001,
"step": 9300
},
{
"epoch": 2.992,
"grad_norm": 0.005293513519086162,
"learning_rate": 1.4016000000000001e-05,
"loss": 0.0001,
"step": 9350
},
{
"epoch": 3.008,
"grad_norm": 0.005723786193935275,
"learning_rate": 1.3984000000000002e-05,
"loss": 0.0001,
"step": 9400
},
{
"epoch": 3.024,
"grad_norm": 0.02024744532866248,
"learning_rate": 1.3952000000000001e-05,
"loss": 0.0001,
"step": 9450
},
{
"epoch": 3.04,
"grad_norm": 0.003499919671790885,
"learning_rate": 1.392e-05,
"loss": 0.0001,
"step": 9500
},
{
"epoch": 3.056,
"grad_norm": 0.005799100557385055,
"learning_rate": 1.3888000000000002e-05,
"loss": 0.0001,
"step": 9550
},
{
"epoch": 3.072,
"grad_norm": 0.0009577375959945704,
"learning_rate": 1.3856e-05,
"loss": 0.0,
"step": 9600
},
{
"epoch": 3.088,
"grad_norm": 0.0007578722310411947,
"learning_rate": 1.3824000000000002e-05,
"loss": 0.0001,
"step": 9650
},
{
"epoch": 3.104,
"grad_norm": 0.002338152752115609,
"learning_rate": 1.3792000000000001e-05,
"loss": 0.0001,
"step": 9700
},
{
"epoch": 3.12,
"grad_norm": 0.01456411564860181,
"learning_rate": 1.376e-05,
"loss": 0.0001,
"step": 9750
},
{
"epoch": 3.136,
"grad_norm": 0.001287800520600071,
"learning_rate": 1.3728000000000001e-05,
"loss": 0.0001,
"step": 9800
},
{
"epoch": 3.152,
"grad_norm": 0.014424730029901063,
"learning_rate": 1.3696e-05,
"loss": 0.0001,
"step": 9850
},
{
"epoch": 3.168,
"grad_norm": 0.00924258872096614,
"learning_rate": 1.3664000000000002e-05,
"loss": 0.0001,
"step": 9900
},
{
"epoch": 3.184,
"grad_norm": 0.013489603484745373,
"learning_rate": 1.3632000000000001e-05,
"loss": 0.0001,
"step": 9950
},
{
"epoch": 3.2,
"grad_norm": 0.006927504226900411,
"learning_rate": 1.3600000000000002e-05,
"loss": 0.0001,
"step": 10000
},
{
"epoch": 3.216,
"grad_norm": 0.0035318365737952973,
"learning_rate": 1.3568000000000001e-05,
"loss": 0.0001,
"step": 10050
},
{
"epoch": 3.232,
"grad_norm": 0.005177760774164178,
"learning_rate": 1.3536e-05,
"loss": 0.0001,
"step": 10100
},
{
"epoch": 3.248,
"grad_norm": 0.000566812552570274,
"learning_rate": 1.3504000000000001e-05,
"loss": 0.0001,
"step": 10150
},
{
"epoch": 3.2640000000000002,
"grad_norm": 0.0027484984185127837,
"learning_rate": 1.3472e-05,
"loss": 0.0,
"step": 10200
},
{
"epoch": 3.2800000000000002,
"grad_norm": 9.001619235055446e-05,
"learning_rate": 1.3440000000000002e-05,
"loss": 0.0,
"step": 10250
},
{
"epoch": 3.296,
"grad_norm": 9.963417112520346e-05,
"learning_rate": 1.3408000000000001e-05,
"loss": 0.0,
"step": 10300
},
{
"epoch": 3.312,
"grad_norm": 0.029154892118349613,
"learning_rate": 1.3376e-05,
"loss": 0.0001,
"step": 10350
},
{
"epoch": 3.328,
"grad_norm": 0.029152414372463475,
"learning_rate": 1.3344000000000001e-05,
"loss": 0.0008,
"step": 10400
},
{
"epoch": 3.344,
"grad_norm": 0.021275289896917602,
"learning_rate": 1.3312e-05,
"loss": 0.0006,
"step": 10450
},
{
"epoch": 3.36,
"grad_norm": 0.009979331931202665,
"learning_rate": 1.3280000000000002e-05,
"loss": 0.0003,
"step": 10500
},
{
"epoch": 3.376,
"grad_norm": 0.0028554160629077644,
"learning_rate": 1.3248000000000001e-05,
"loss": 0.0002,
"step": 10550
},
{
"epoch": 3.392,
"grad_norm": 0.0008631163175074636,
"learning_rate": 1.3216000000000002e-05,
"loss": 0.0002,
"step": 10600
},
{
"epoch": 3.408,
"grad_norm": 0.0022088838542927364,
"learning_rate": 1.3184000000000001e-05,
"loss": 0.0002,
"step": 10650
},
{
"epoch": 3.424,
"grad_norm": 0.004224190234212431,
"learning_rate": 1.3152e-05,
"loss": 0.0001,
"step": 10700
},
{
"epoch": 3.44,
"grad_norm": 0.0011718751759226711,
"learning_rate": 1.3120000000000001e-05,
"loss": 0.0001,
"step": 10750
},
{
"epoch": 3.456,
"grad_norm": 0.00032887216500061996,
"learning_rate": 1.3088e-05,
"loss": 0.0001,
"step": 10800
},
{
"epoch": 3.472,
"grad_norm": 0.001042494069534549,
"learning_rate": 1.3056000000000002e-05,
"loss": 0.0001,
"step": 10850
},
{
"epoch": 3.488,
"grad_norm": 0.0008855299783423499,
"learning_rate": 1.3024000000000001e-05,
"loss": 0.0001,
"step": 10900
},
{
"epoch": 3.504,
"grad_norm": 0.005611519836033652,
"learning_rate": 1.2992e-05,
"loss": 0.0001,
"step": 10950
},
{
"epoch": 3.52,
"grad_norm": 0.0005321552377152104,
"learning_rate": 1.2960000000000001e-05,
"loss": 0.0001,
"step": 11000
},
{
"epoch": 3.536,
"grad_norm": 0.022067140757654112,
"learning_rate": 1.2928e-05,
"loss": 0.0001,
"step": 11050
},
{
"epoch": 3.552,
"grad_norm": 0.02743005815099029,
"learning_rate": 1.2896000000000002e-05,
"loss": 0.0002,
"step": 11100
},
{
"epoch": 3.568,
"grad_norm": 0.006980424491381615,
"learning_rate": 1.2864e-05,
"loss": 0.0002,
"step": 11150
},
{
"epoch": 3.584,
"grad_norm": 0.002797099415873821,
"learning_rate": 1.2832e-05,
"loss": 0.0001,
"step": 11200
},
{
"epoch": 3.6,
"grad_norm": 0.02046136431371523,
"learning_rate": 1.2800000000000001e-05,
"loss": 0.0001,
"step": 11250
},
{
"epoch": 3.616,
"grad_norm": 0.01941106362099442,
"learning_rate": 1.2768e-05,
"loss": 0.0001,
"step": 11300
},
{
"epoch": 3.632,
"grad_norm": 0.027454597931497824,
"learning_rate": 1.2736000000000001e-05,
"loss": 0.0001,
"step": 11350
},
{
"epoch": 3.648,
"grad_norm": 0.02018047493471484,
"learning_rate": 1.2704e-05,
"loss": 0.0002,
"step": 11400
},
{
"epoch": 3.664,
"grad_norm": 0.00597845181496525,
"learning_rate": 1.2672000000000002e-05,
"loss": 0.0001,
"step": 11450
},
{
"epoch": 3.68,
"grad_norm": 0.014788477975632599,
"learning_rate": 1.2640000000000001e-05,
"loss": 0.0001,
"step": 11500
},
{
"epoch": 3.6959999999999997,
"grad_norm": 0.0011628172843709302,
"learning_rate": 1.2608e-05,
"loss": 0.0001,
"step": 11550
},
{
"epoch": 3.7119999999999997,
"grad_norm": 0.009516688422133983,
"learning_rate": 1.2576000000000001e-05,
"loss": 0.0001,
"step": 11600
},
{
"epoch": 3.7279999999999998,
"grad_norm": 0.001028373499284717,
"learning_rate": 1.2544e-05,
"loss": 0.0001,
"step": 11650
},
{
"epoch": 3.7439999999999998,
"grad_norm": 0.0005456313003835665,
"learning_rate": 1.2512000000000002e-05,
"loss": 0.0001,
"step": 11700
},
{
"epoch": 3.76,
"grad_norm": 0.019519987964782267,
"learning_rate": 1.248e-05,
"loss": 0.0001,
"step": 11750
},
{
"epoch": 3.776,
"grad_norm": 0.005294491417444766,
"learning_rate": 1.2448e-05,
"loss": 0.0003,
"step": 11800
},
{
"epoch": 3.792,
"grad_norm": 0.02184964825167138,
"learning_rate": 1.2416000000000001e-05,
"loss": 0.0001,
"step": 11850
},
{
"epoch": 3.808,
"grad_norm": 0.02738996757417856,
"learning_rate": 1.2384e-05,
"loss": 0.0002,
"step": 11900
},
{
"epoch": 3.824,
"grad_norm": 0.012047613108118525,
"learning_rate": 1.2352000000000001e-05,
"loss": 0.0001,
"step": 11950
},
{
"epoch": 3.84,
"grad_norm": 0.0037495738541270694,
"learning_rate": 1.232e-05,
"loss": 0.0001,
"step": 12000
},
{
"epoch": 3.856,
"grad_norm": 0.009873580970824827,
"learning_rate": 1.2288e-05,
"loss": 0.0001,
"step": 12050
},
{
"epoch": 3.872,
"grad_norm": 0.0017604411840222716,
"learning_rate": 1.2256000000000001e-05,
"loss": 0.0001,
"step": 12100
},
{
"epoch": 3.888,
"grad_norm": 0.01674104988019884,
"learning_rate": 1.2224e-05,
"loss": 0.0001,
"step": 12150
},
{
"epoch": 3.904,
"grad_norm": 0.001884331509122243,
"learning_rate": 1.2192000000000001e-05,
"loss": 0.0001,
"step": 12200
},
{
"epoch": 3.92,
"grad_norm": 7.670429595529145e-05,
"learning_rate": 1.216e-05,
"loss": 0.0,
"step": 12250
},
{
"epoch": 3.936,
"grad_norm": 0.004975458011250956,
"learning_rate": 1.2128000000000001e-05,
"loss": 0.0001,
"step": 12300
},
{
"epoch": 3.952,
"grad_norm": 0.006668702945904534,
"learning_rate": 1.2096e-05,
"loss": 0.0001,
"step": 12350
},
{
"epoch": 3.968,
"grad_norm": 0.01189326350053773,
"learning_rate": 1.2064e-05,
"loss": 0.0,
"step": 12400
},
{
"epoch": 3.984,
"grad_norm": 9.504318727305145e-05,
"learning_rate": 1.2032000000000001e-05,
"loss": 0.0,
"step": 12450
},
{
"epoch": 4.0,
"grad_norm": 0.0001022247506147829,
"learning_rate": 1.2e-05,
"loss": 0.0,
"step": 12500
},
{
"epoch": 4.016,
"grad_norm": 4.213999803416733e-05,
"learning_rate": 1.1968000000000001e-05,
"loss": 0.0,
"step": 12550
},
{
"epoch": 4.032,
"grad_norm": 0.00016120612272882602,
"learning_rate": 1.1936e-05,
"loss": 0.0,
"step": 12600
},
{
"epoch": 4.048,
"grad_norm": 0.001648066553863214,
"learning_rate": 1.1904e-05,
"loss": 0.0,
"step": 12650
},
{
"epoch": 4.064,
"grad_norm": 0.0018732158829428201,
"learning_rate": 1.1872000000000001e-05,
"loss": 0.0001,
"step": 12700
},
{
"epoch": 4.08,
"grad_norm": 5.628979108254126e-05,
"learning_rate": 1.184e-05,
"loss": 0.0,
"step": 12750
},
{
"epoch": 4.096,
"grad_norm": 0.019032461176938534,
"learning_rate": 1.1808000000000001e-05,
"loss": 0.0001,
"step": 12800
},
{
"epoch": 4.112,
"grad_norm": 0.0016313949670923428,
"learning_rate": 1.1776e-05,
"loss": 0.0,
"step": 12850
},
{
"epoch": 4.128,
"grad_norm": 0.0015446847482196097,
"learning_rate": 1.1744000000000001e-05,
"loss": 0.0,
"step": 12900
},
{
"epoch": 4.144,
"grad_norm": 0.0036344510964935644,
"learning_rate": 1.1712e-05,
"loss": 0.0001,
"step": 12950
},
{
"epoch": 4.16,
"grad_norm": 0.00011213733550717207,
"learning_rate": 1.168e-05,
"loss": 0.0,
"step": 13000
},
{
"epoch": 4.176,
"grad_norm": 0.009241406559372695,
"learning_rate": 1.1648000000000001e-05,
"loss": 0.0,
"step": 13050
},
{
"epoch": 4.192,
"grad_norm": 0.004707319381909862,
"learning_rate": 1.1616e-05,
"loss": 0.0001,
"step": 13100
},
{
"epoch": 4.208,
"grad_norm": 0.001471958485689987,
"learning_rate": 1.1584000000000001e-05,
"loss": 0.0001,
"step": 13150
},
{
"epoch": 4.224,
"grad_norm": 0.005859122648703548,
"learning_rate": 1.1552e-05,
"loss": 0.0001,
"step": 13200
},
{
"epoch": 4.24,
"grad_norm": 0.012818649271253849,
"learning_rate": 1.152e-05,
"loss": 0.0001,
"step": 13250
},
{
"epoch": 4.256,
"grad_norm": 0.0037211861585128617,
"learning_rate": 1.1488e-05,
"loss": 0.0001,
"step": 13300
},
{
"epoch": 4.272,
"grad_norm": 0.07015941754753528,
"learning_rate": 1.1456e-05,
"loss": 0.0001,
"step": 13350
},
{
"epoch": 4.288,
"grad_norm": 0.035187482539997594,
"learning_rate": 1.1424000000000001e-05,
"loss": 0.0003,
"step": 13400
},
{
"epoch": 4.304,
"grad_norm": 0.019999557778834116,
"learning_rate": 1.1392e-05,
"loss": 0.0004,
"step": 13450
},
{
"epoch": 4.32,
"grad_norm": 0.0345649880873456,
"learning_rate": 1.136e-05,
"loss": 0.0006,
"step": 13500
},
{
"epoch": 4.336,
"grad_norm": 0.017367529665425263,
"learning_rate": 1.1328e-05,
"loss": 0.0006,
"step": 13550
},
{
"epoch": 4.352,
"grad_norm": 0.023709723324916443,
"learning_rate": 1.1296e-05,
"loss": 0.0003,
"step": 13600
},
{
"epoch": 4.368,
"grad_norm": 0.018413464418327773,
"learning_rate": 1.1264000000000001e-05,
"loss": 0.0002,
"step": 13650
},
{
"epoch": 4.384,
"grad_norm": 0.0018201928868281155,
"learning_rate": 1.1232e-05,
"loss": 0.0002,
"step": 13700
},
{
"epoch": 4.4,
"grad_norm": 0.00822419431789589,
"learning_rate": 1.1200000000000001e-05,
"loss": 0.0001,
"step": 13750
},
{
"epoch": 4.416,
"grad_norm": 0.006415739011886406,
"learning_rate": 1.1168e-05,
"loss": 0.0001,
"step": 13800
},
{
"epoch": 4.432,
"grad_norm": 0.0004659247582312783,
"learning_rate": 1.1136e-05,
"loss": 0.0001,
"step": 13850
},
{
"epoch": 4.448,
"grad_norm": 0.0005946738812863657,
"learning_rate": 1.1104e-05,
"loss": 0.0001,
"step": 13900
},
{
"epoch": 4.464,
"grad_norm": 0.00046156276112608923,
"learning_rate": 1.1072e-05,
"loss": 0.0001,
"step": 13950
},
{
"epoch": 4.48,
"grad_norm": 0.0005836721479758474,
"learning_rate": 1.1040000000000001e-05,
"loss": 0.0001,
"step": 14000
},
{
"epoch": 4.496,
"grad_norm": 0.001344964325089982,
"learning_rate": 1.1008e-05,
"loss": 0.0001,
"step": 14050
},
{
"epoch": 4.5120000000000005,
"grad_norm": 0.005801872303888594,
"learning_rate": 1.0976e-05,
"loss": 0.0001,
"step": 14100
},
{
"epoch": 4.5280000000000005,
"grad_norm": 9.837216618840086e-05,
"learning_rate": 1.0944e-05,
"loss": 0.0,
"step": 14150
},
{
"epoch": 4.5440000000000005,
"grad_norm": 7.737183380151301e-05,
"learning_rate": 1.0912e-05,
"loss": 0.0,
"step": 14200
},
{
"epoch": 4.5600000000000005,
"grad_norm": 0.0009713119715969353,
"learning_rate": 1.0880000000000001e-05,
"loss": 0.0,
"step": 14250
},
{
"epoch": 4.576,
"grad_norm": 0.005069492905364405,
"learning_rate": 1.0848e-05,
"loss": 0.0,
"step": 14300
},
{
"epoch": 4.592,
"grad_norm": 3.2989301849193866e-05,
"learning_rate": 1.0816e-05,
"loss": 0.0,
"step": 14350
},
{
"epoch": 4.608,
"grad_norm": 0.0015411767068381028,
"learning_rate": 1.0784e-05,
"loss": 0.0,
"step": 14400
},
{
"epoch": 4.624,
"grad_norm": 0.0029576402483524416,
"learning_rate": 1.0752e-05,
"loss": 0.0,
"step": 14450
},
{
"epoch": 4.64,
"grad_norm": 0.0033406485333461813,
"learning_rate": 1.072e-05,
"loss": 0.0,
"step": 14500
},
{
"epoch": 4.656,
"grad_norm": 0.00019611919936137406,
"learning_rate": 1.0688e-05,
"loss": 0.0,
"step": 14550
},
{
"epoch": 4.672,
"grad_norm": 0.0019765899286054755,
"learning_rate": 1.0656000000000003e-05,
"loss": 0.0,
"step": 14600
},
{
"epoch": 4.688,
"grad_norm": 0.0005709409541646127,
"learning_rate": 1.0624e-05,
"loss": 0.0,
"step": 14650
},
{
"epoch": 4.704,
"grad_norm": 0.0023491377959600473,
"learning_rate": 1.0592e-05,
"loss": 0.0,
"step": 14700
},
{
"epoch": 4.72,
"grad_norm": 0.0011388200727303454,
"learning_rate": 1.056e-05,
"loss": 0.0,
"step": 14750
},
{
"epoch": 4.736,
"grad_norm": 0.026118986062423813,
"learning_rate": 1.0528e-05,
"loss": 0.0001,
"step": 14800
},
{
"epoch": 4.752,
"grad_norm": 0.0029740973486947135,
"learning_rate": 1.0496000000000003e-05,
"loss": 0.0002,
"step": 14850
},
{
"epoch": 4.768,
"grad_norm": 0.017336503189413394,
"learning_rate": 1.0464e-05,
"loss": 0.0001,
"step": 14900
},
{
"epoch": 4.784,
"grad_norm": 0.02504986677595465,
"learning_rate": 1.0432e-05,
"loss": 0.0002,
"step": 14950
},
{
"epoch": 4.8,
"grad_norm": 0.007437099599438055,
"learning_rate": 1.04e-05,
"loss": 0.0002,
"step": 15000
},
{
"epoch": 4.816,
"grad_norm": 0.020369582876539328,
"learning_rate": 1.0368e-05,
"loss": 0.0001,
"step": 15050
},
{
"epoch": 4.832,
"grad_norm": 0.0006272672761587281,
"learning_rate": 1.0336000000000002e-05,
"loss": 0.0002,
"step": 15100
},
{
"epoch": 4.848,
"grad_norm": 0.006047362095922896,
"learning_rate": 1.0304e-05,
"loss": 0.0001,
"step": 15150
},
{
"epoch": 4.864,
"grad_norm": 0.015672098166329012,
"learning_rate": 1.0272e-05,
"loss": 0.0001,
"step": 15200
},
{
"epoch": 4.88,
"grad_norm": 0.0030277987193269465,
"learning_rate": 1.024e-05,
"loss": 0.0,
"step": 15250
},
{
"epoch": 4.896,
"grad_norm": 0.0010616116192301943,
"learning_rate": 1.0208e-05,
"loss": 0.0001,
"step": 15300
},
{
"epoch": 4.912,
"grad_norm": 0.0007680822239626422,
"learning_rate": 1.0176000000000002e-05,
"loss": 0.0001,
"step": 15350
},
{
"epoch": 4.928,
"grad_norm": 0.01025708941048719,
"learning_rate": 1.0144e-05,
"loss": 0.0001,
"step": 15400
},
{
"epoch": 4.944,
"grad_norm": 0.011307795907471767,
"learning_rate": 1.0112000000000002e-05,
"loss": 0.0001,
"step": 15450
},
{
"epoch": 4.96,
"grad_norm": 0.0004601284057725081,
"learning_rate": 1.008e-05,
"loss": 0.0,
"step": 15500
},
{
"epoch": 4.976,
"grad_norm": 0.010856337170473403,
"learning_rate": 1.0048e-05,
"loss": 0.0,
"step": 15550
},
{
"epoch": 4.992,
"grad_norm": 0.00015313333402904193,
"learning_rate": 1.0016000000000002e-05,
"loss": 0.0001,
"step": 15600
},
{
"epoch": 5.008,
"grad_norm": 0.0016422517703522808,
"learning_rate": 9.984e-06,
"loss": 0.0,
"step": 15650
},
{
"epoch": 5.024,
"grad_norm": 0.00017065965209302168,
"learning_rate": 9.952e-06,
"loss": 0.0,
"step": 15700
},
{
"epoch": 5.04,
"grad_norm": 0.0023982386902955902,
"learning_rate": 9.920000000000002e-06,
"loss": 0.0001,
"step": 15750
},
{
"epoch": 5.056,
"grad_norm": 0.0001370875982310932,
"learning_rate": 9.888000000000001e-06,
"loss": 0.0,
"step": 15800
},
{
"epoch": 5.072,
"grad_norm": 2.682877220003173e-05,
"learning_rate": 9.856000000000002e-06,
"loss": 0.0,
"step": 15850
},
{
"epoch": 5.088,
"grad_norm": 5.0994540698164586e-05,
"learning_rate": 9.824000000000001e-06,
"loss": 0.0,
"step": 15900
},
{
"epoch": 5.104,
"grad_norm": 4.2441360877384374e-05,
"learning_rate": 9.792e-06,
"loss": 0.0,
"step": 15950
},
{
"epoch": 5.12,
"grad_norm": 0.006419921838441735,
"learning_rate": 9.760000000000001e-06,
"loss": 0.0,
"step": 16000
},
{
"epoch": 5.136,
"grad_norm": 0.0032020770761266313,
"learning_rate": 9.728e-06,
"loss": 0.0,
"step": 16050
},
{
"epoch": 5.152,
"grad_norm": 0.0001985282041764814,
"learning_rate": 9.696000000000002e-06,
"loss": 0.0,
"step": 16100
},
{
"epoch": 5.168,
"grad_norm": 4.7798686858909734e-05,
"learning_rate": 9.664000000000001e-06,
"loss": 0.0,
"step": 16150
},
{
"epoch": 5.184,
"grad_norm": 0.0005436583145042151,
"learning_rate": 9.632e-06,
"loss": 0.0,
"step": 16200
},
{
"epoch": 5.2,
"grad_norm": 4.789698124498285e-05,
"learning_rate": 9.600000000000001e-06,
"loss": 0.0,
"step": 16250
},
{
"epoch": 5.216,
"grad_norm": 6.669996716920356e-05,
"learning_rate": 9.568e-06,
"loss": 0.0,
"step": 16300
},
{
"epoch": 5.232,
"grad_norm": 2.015998723514938e-05,
"learning_rate": 9.536000000000002e-06,
"loss": 0.0,
"step": 16350
},
{
"epoch": 5.248,
"grad_norm": 2.566916763899653e-05,
"learning_rate": 9.504e-06,
"loss": 0.0,
"step": 16400
},
{
"epoch": 5.264,
"grad_norm": 6.973559986609284e-05,
"learning_rate": 9.472000000000002e-06,
"loss": 0.0,
"step": 16450
},
{
"epoch": 5.28,
"grad_norm": 0.005275686664510083,
"learning_rate": 9.440000000000001e-06,
"loss": 0.0,
"step": 16500
},
{
"epoch": 5.296,
"grad_norm": 0.0004379785026117808,
"learning_rate": 9.408e-06,
"loss": 0.0,
"step": 16550
},
{
"epoch": 5.312,
"grad_norm": 7.224360874307222e-05,
"learning_rate": 9.376000000000001e-06,
"loss": 0.0,
"step": 16600
},
{
"epoch": 5.328,
"grad_norm": 0.005529688027168001,
"learning_rate": 9.344e-06,
"loss": 0.0001,
"step": 16650
},
{
"epoch": 5.344,
"grad_norm": 0.00035705853499429783,
"learning_rate": 9.312000000000002e-06,
"loss": 0.0001,
"step": 16700
},
{
"epoch": 5.36,
"grad_norm": 0.004595469236532978,
"learning_rate": 9.280000000000001e-06,
"loss": 0.0002,
"step": 16750
},
{
"epoch": 5.376,
"grad_norm": 0.014518935982881342,
"learning_rate": 9.248e-06,
"loss": 0.0002,
"step": 16800
},
{
"epoch": 5.392,
"grad_norm": 0.005664058901176576,
"learning_rate": 9.216000000000001e-06,
"loss": 0.0001,
"step": 16850
},
{
"epoch": 5.408,
"grad_norm": 0.02406436319347112,
"learning_rate": 9.184e-06,
"loss": 0.0002,
"step": 16900
},
{
"epoch": 5.424,
"grad_norm": 0.010408753408708865,
"learning_rate": 9.152000000000001e-06,
"loss": 0.0001,
"step": 16950
},
{
"epoch": 5.44,
"grad_norm": 0.004380210866505748,
"learning_rate": 9.12e-06,
"loss": 0.0001,
"step": 17000
},
{
"epoch": 5.456,
"grad_norm": 0.0007477014672070001,
"learning_rate": 9.088000000000002e-06,
"loss": 0.0001,
"step": 17050
},
{
"epoch": 5.4719999999999995,
"grad_norm": 0.004612394953866417,
"learning_rate": 9.056000000000001e-06,
"loss": 0.0001,
"step": 17100
},
{
"epoch": 5.4879999999999995,
"grad_norm": 0.00042378436163973034,
"learning_rate": 9.024e-06,
"loss": 0.0001,
"step": 17150
},
{
"epoch": 5.504,
"grad_norm": 0.00019529144567278435,
"learning_rate": 8.992000000000001e-06,
"loss": 0.0001,
"step": 17200
},
{
"epoch": 5.52,
"grad_norm": 0.0027044303589215226,
"learning_rate": 8.96e-06,
"loss": 0.0,
"step": 17250
},
{
"epoch": 5.536,
"grad_norm": 0.004170447127179762,
"learning_rate": 8.928000000000002e-06,
"loss": 0.0,
"step": 17300
},
{
"epoch": 5.552,
"grad_norm": 0.0011583744674708582,
"learning_rate": 8.896000000000001e-06,
"loss": 0.0,
"step": 17350
},
{
"epoch": 5.568,
"grad_norm": 7.505329528247209e-05,
"learning_rate": 8.864e-06,
"loss": 0.0,
"step": 17400
},
{
"epoch": 5.584,
"grad_norm": 1.6692639755468825e-05,
"learning_rate": 8.832000000000001e-06,
"loss": 0.0,
"step": 17450
},
{
"epoch": 5.6,
"grad_norm": 3.792640837382307e-05,
"learning_rate": 8.8e-06,
"loss": 0.0,
"step": 17500
},
{
"epoch": 5.616,
"grad_norm": 0.0034862027436583023,
"learning_rate": 8.768000000000001e-06,
"loss": 0.0,
"step": 17550
},
{
"epoch": 5.632,
"grad_norm": 0.002044696169017184,
"learning_rate": 8.736e-06,
"loss": 0.0,
"step": 17600
},
{
"epoch": 5.648,
"grad_norm": 0.0016480723423270521,
"learning_rate": 8.704e-06,
"loss": 0.0,
"step": 17650
},
{
"epoch": 5.664,
"grad_norm": 5.046250365736628e-05,
"learning_rate": 8.672000000000001e-06,
"loss": 0.0,
"step": 17700
},
{
"epoch": 5.68,
"grad_norm": 0.002490207831005637,
"learning_rate": 8.64e-06,
"loss": 0.0,
"step": 17750
},
{
"epoch": 5.696,
"grad_norm": 2.3620844090189842e-05,
"learning_rate": 8.608000000000001e-06,
"loss": 0.0,
"step": 17800
},
{
"epoch": 5.712,
"grad_norm": 0.005341011948506022,
"learning_rate": 8.576e-06,
"loss": 0.0,
"step": 17850
},
{
"epoch": 5.728,
"grad_norm": 0.001274874971425699,
"learning_rate": 8.544000000000002e-06,
"loss": 0.0,
"step": 17900
},
{
"epoch": 5.744,
"grad_norm": 0.005544577257881219,
"learning_rate": 8.512e-06,
"loss": 0.0,
"step": 17950
},
{
"epoch": 5.76,
"grad_norm": 0.001014221724791846,
"learning_rate": 8.48e-06,
"loss": 0.0,
"step": 18000
},
{
"epoch": 5.776,
"grad_norm": 0.0034755696845187607,
"learning_rate": 8.448000000000001e-06,
"loss": 0.0,
"step": 18050
},
{
"epoch": 5.792,
"grad_norm": 0.0035076131759202932,
"learning_rate": 8.416e-06,
"loss": 0.0,
"step": 18100
},
{
"epoch": 5.808,
"grad_norm": 1.798515357665533e-05,
"learning_rate": 8.384000000000001e-06,
"loss": 0.0,
"step": 18150
},
{
"epoch": 5.824,
"grad_norm": 0.008807581853205114,
"learning_rate": 8.352e-06,
"loss": 0.0,
"step": 18200
},
{
"epoch": 5.84,
"grad_norm": 0.004372862354976277,
"learning_rate": 8.32e-06,
"loss": 0.0,
"step": 18250
},
{
"epoch": 5.856,
"grad_norm": 0.004218059627149101,
"learning_rate": 8.288000000000001e-06,
"loss": 0.0,
"step": 18300
},
{
"epoch": 5.872,
"grad_norm": 0.003638369573242274,
"learning_rate": 8.256e-06,
"loss": 0.0,
"step": 18350
},
{
"epoch": 5.888,
"grad_norm": 5.7538041224172105e-06,
"learning_rate": 8.224000000000001e-06,
"loss": 0.0,
"step": 18400
},
{
"epoch": 5.904,
"grad_norm": 6.190202276164088e-06,
"learning_rate": 8.192e-06,
"loss": 0.0,
"step": 18450
},
{
"epoch": 5.92,
"grad_norm": 0.003102560253484021,
"learning_rate": 8.16e-06,
"loss": 0.0,
"step": 18500
},
{
"epoch": 5.936,
"grad_norm": 0.0030438170806869475,
"learning_rate": 8.128e-06,
"loss": 0.0,
"step": 18550
},
{
"epoch": 5.952,
"grad_norm": 0.004504233459517902,
"learning_rate": 8.096e-06,
"loss": 0.0,
"step": 18600
},
{
"epoch": 5.968,
"grad_norm": 0.004759229281040865,
"learning_rate": 8.064000000000001e-06,
"loss": 0.0,
"step": 18650
},
{
"epoch": 5.984,
"grad_norm": 7.723498841261772e-06,
"learning_rate": 8.032e-06,
"loss": 0.0,
"step": 18700
},
{
"epoch": 6.0,
"grad_norm": 5.552452532832903e-06,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0,
"step": 18750
},
{
"epoch": 6.016,
"grad_norm": 2.597337502435462e-05,
"learning_rate": 7.968e-06,
"loss": 0.0,
"step": 18800
},
{
"epoch": 6.032,
"grad_norm": 0.009335540220574937,
"learning_rate": 7.936e-06,
"loss": 0.0,
"step": 18850
},
{
"epoch": 6.048,
"grad_norm": 7.524061298947454e-06,
"learning_rate": 7.904000000000001e-06,
"loss": 0.0,
"step": 18900
},
{
"epoch": 6.064,
"grad_norm": 0.00015327762567155916,
"learning_rate": 7.872e-06,
"loss": 0.0,
"step": 18950
},
{
"epoch": 6.08,
"grad_norm": 2.3439642609130923e-05,
"learning_rate": 7.840000000000001e-06,
"loss": 0.0,
"step": 19000
},
{
"epoch": 6.096,
"grad_norm": 6.72333328204891e-05,
"learning_rate": 7.808e-06,
"loss": 0.0,
"step": 19050
},
{
"epoch": 6.112,
"grad_norm": 0.004346096242491425,
"learning_rate": 7.776e-06,
"loss": 0.0,
"step": 19100
},
{
"epoch": 6.128,
"grad_norm": 0.012622596321264466,
"learning_rate": 7.744e-06,
"loss": 0.0001,
"step": 19150
},
{
"epoch": 6.144,
"grad_norm": 0.008790621292549712,
"learning_rate": 7.712e-06,
"loss": 0.0002,
"step": 19200
},
{
"epoch": 6.16,
"grad_norm": 0.004938158345308147,
"learning_rate": 7.680000000000001e-06,
"loss": 0.0001,
"step": 19250
},
{
"epoch": 6.176,
"grad_norm": 0.018473675290916154,
"learning_rate": 7.648e-06,
"loss": 0.0002,
"step": 19300
},
{
"epoch": 6.192,
"grad_norm": 0.022804212260300934,
"learning_rate": 7.616000000000001e-06,
"loss": 0.0002,
"step": 19350
},
{
"epoch": 6.208,
"grad_norm": 0.003592326856058541,
"learning_rate": 7.5840000000000006e-06,
"loss": 0.0001,
"step": 19400
},
{
"epoch": 6.224,
"grad_norm": 0.047396594119550994,
"learning_rate": 7.552000000000001e-06,
"loss": 0.0001,
"step": 19450
},
{
"epoch": 6.24,
"grad_norm": 0.009235389481528054,
"learning_rate": 7.520000000000001e-06,
"loss": 0.0002,
"step": 19500
},
{
"epoch": 6.256,
"grad_norm": 0.01434618749682531,
"learning_rate": 7.488000000000001e-06,
"loss": 0.0001,
"step": 19550
},
{
"epoch": 6.272,
"grad_norm": 0.0021773822077626793,
"learning_rate": 7.456000000000001e-06,
"loss": 0.0001,
"step": 19600
},
{
"epoch": 6.288,
"grad_norm": 0.002663391021728829,
"learning_rate": 7.424e-06,
"loss": 0.0,
"step": 19650
},
{
"epoch": 6.304,
"grad_norm": 0.00011747154952013698,
"learning_rate": 7.3920000000000005e-06,
"loss": 0.0,
"step": 19700
},
{
"epoch": 6.32,
"grad_norm": 0.0008636079746021932,
"learning_rate": 7.360000000000001e-06,
"loss": 0.0001,
"step": 19750
},
{
"epoch": 6.336,
"grad_norm": 0.0026435008452155074,
"learning_rate": 7.328000000000001e-06,
"loss": 0.0,
"step": 19800
},
{
"epoch": 6.352,
"grad_norm": 0.0005814556237841476,
"learning_rate": 7.296000000000001e-06,
"loss": 0.0,
"step": 19850
},
{
"epoch": 6.368,
"grad_norm": 0.001405603964445953,
"learning_rate": 7.264000000000001e-06,
"loss": 0.0,
"step": 19900
},
{
"epoch": 6.384,
"grad_norm": 0.0016513732086724116,
"learning_rate": 7.232e-06,
"loss": 0.0,
"step": 19950
},
{
"epoch": 6.4,
"grad_norm": 0.0034893201733840937,
"learning_rate": 7.2000000000000005e-06,
"loss": 0.0,
"step": 20000
},
{
"epoch": 6.416,
"grad_norm": 0.00857451483231717,
"learning_rate": 7.168000000000001e-06,
"loss": 0.0,
"step": 20050
},
{
"epoch": 6.432,
"grad_norm": 2.4199606884085253e-05,
"learning_rate": 7.136000000000001e-06,
"loss": 0.0,
"step": 20100
},
{
"epoch": 6.448,
"grad_norm": 3.250175417169871e-05,
"learning_rate": 7.104000000000001e-06,
"loss": 0.0,
"step": 20150
},
{
"epoch": 6.464,
"grad_norm": 0.005732826853229158,
"learning_rate": 7.072000000000001e-06,
"loss": 0.0,
"step": 20200
},
{
"epoch": 6.48,
"grad_norm": 0.00010353949349009911,
"learning_rate": 7.04e-06,
"loss": 0.0,
"step": 20250
},
{
"epoch": 6.496,
"grad_norm": 0.005592962017534361,
"learning_rate": 7.0080000000000005e-06,
"loss": 0.0,
"step": 20300
},
{
"epoch": 6.5120000000000005,
"grad_norm": 1.8109719094913944e-05,
"learning_rate": 6.976000000000001e-06,
"loss": 0.0,
"step": 20350
},
{
"epoch": 6.5280000000000005,
"grad_norm": 0.0032198732263617894,
"learning_rate": 6.944000000000001e-06,
"loss": 0.0,
"step": 20400
},
{
"epoch": 6.5440000000000005,
"grad_norm": 0.005473202984385111,
"learning_rate": 6.912000000000001e-06,
"loss": 0.0,
"step": 20450
},
{
"epoch": 6.5600000000000005,
"grad_norm": 0.005380069417154803,
"learning_rate": 6.88e-06,
"loss": 0.0,
"step": 20500
},
{
"epoch": 6.576,
"grad_norm": 0.003232314271766742,
"learning_rate": 6.848e-06,
"loss": 0.0,
"step": 20550
},
{
"epoch": 6.592,
"grad_norm": 0.0036696087338221385,
"learning_rate": 6.8160000000000005e-06,
"loss": 0.0,
"step": 20600
},
{
"epoch": 6.608,
"grad_norm": 0.0026588039107233487,
"learning_rate": 6.784000000000001e-06,
"loss": 0.0,
"step": 20650
},
{
"epoch": 6.624,
"grad_norm": 8.99182338038509e-06,
"learning_rate": 6.752000000000001e-06,
"loss": 0.0,
"step": 20700
},
{
"epoch": 6.64,
"grad_norm": 8.563056421425572e-06,
"learning_rate": 6.720000000000001e-06,
"loss": 0.0,
"step": 20750
},
{
"epoch": 6.656,
"grad_norm": 0.00037207476131123044,
"learning_rate": 6.688e-06,
"loss": 0.0,
"step": 20800
},
{
"epoch": 6.672,
"grad_norm": 0.0007496876124411418,
"learning_rate": 6.656e-06,
"loss": 0.0,
"step": 20850
},
{
"epoch": 6.688,
"grad_norm": 0.0031456413963673368,
"learning_rate": 6.6240000000000004e-06,
"loss": 0.0,
"step": 20900
},
{
"epoch": 6.704,
"grad_norm": 0.0013961928602198664,
"learning_rate": 6.592000000000001e-06,
"loss": 0.0,
"step": 20950
},
{
"epoch": 6.72,
"grad_norm": 0.006857717806312309,
"learning_rate": 6.560000000000001e-06,
"loss": 0.0,
"step": 21000
},
{
"epoch": 6.736,
"grad_norm": 0.002786967933254842,
"learning_rate": 6.528000000000001e-06,
"loss": 0.0,
"step": 21050
},
{
"epoch": 6.752,
"grad_norm": 5.396561393846106e-06,
"learning_rate": 6.496e-06,
"loss": 0.0,
"step": 21100
},
{
"epoch": 6.768,
"grad_norm": 9.46138673561178e-06,
"learning_rate": 6.464e-06,
"loss": 0.0,
"step": 21150
},
{
"epoch": 6.784,
"grad_norm": 0.0023837669652299874,
"learning_rate": 6.432e-06,
"loss": 0.0,
"step": 21200
},
{
"epoch": 6.8,
"grad_norm": 1.05038503813717e-05,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.0,
"step": 21250
},
{
"epoch": 6.816,
"grad_norm": 0.0005776930285178894,
"learning_rate": 6.368000000000001e-06,
"loss": 0.0,
"step": 21300
},
{
"epoch": 6.832,
"grad_norm": 9.139296410215242e-06,
"learning_rate": 6.336000000000001e-06,
"loss": 0.0,
"step": 21350
},
{
"epoch": 6.848,
"grad_norm": 0.0021852339939074364,
"learning_rate": 6.304e-06,
"loss": 0.0,
"step": 21400
},
{
"epoch": 6.864,
"grad_norm": 0.005150959364319729,
"learning_rate": 6.272e-06,
"loss": 0.0,
"step": 21450
},
{
"epoch": 6.88,
"grad_norm": 5.175095074364956e-06,
"learning_rate": 6.24e-06,
"loss": 0.0,
"step": 21500
},
{
"epoch": 6.896,
"grad_norm": 0.004969801241634378,
"learning_rate": 6.2080000000000005e-06,
"loss": 0.0,
"step": 21550
},
{
"epoch": 6.912,
"grad_norm": 0.0008299473866216342,
"learning_rate": 6.176000000000001e-06,
"loss": 0.0,
"step": 21600
},
{
"epoch": 6.928,
"grad_norm": 0.0017992643848965695,
"learning_rate": 6.144e-06,
"loss": 0.0,
"step": 21650
},
{
"epoch": 6.944,
"grad_norm": 5.259612030448573e-06,
"learning_rate": 6.112e-06,
"loss": 0.0,
"step": 21700
},
{
"epoch": 6.96,
"grad_norm": 6.964401320931689e-06,
"learning_rate": 6.08e-06,
"loss": 0.0,
"step": 21750
},
{
"epoch": 6.976,
"grad_norm": 5.5934708883321015e-06,
"learning_rate": 6.048e-06,
"loss": 0.0,
"step": 21800
},
{
"epoch": 6.992,
"grad_norm": 0.0003940255704147256,
"learning_rate": 6.0160000000000005e-06,
"loss": 0.0,
"step": 21850
},
{
"epoch": 7.008,
"grad_norm": 5.435289572651918e-06,
"learning_rate": 5.984000000000001e-06,
"loss": 0.0,
"step": 21900
},
{
"epoch": 7.024,
"grad_norm": 0.0042470175422726604,
"learning_rate": 5.952e-06,
"loss": 0.0,
"step": 21950
},
{
"epoch": 7.04,
"grad_norm": 7.335645814032098e-06,
"learning_rate": 5.92e-06,
"loss": 0.0,
"step": 22000
},
{
"epoch": 7.056,
"grad_norm": 0.0009590486824500321,
"learning_rate": 5.888e-06,
"loss": 0.0,
"step": 22050
},
{
"epoch": 7.072,
"grad_norm": 5.906713961463055e-06,
"learning_rate": 5.856e-06,
"loss": 0.0,
"step": 22100
},
{
"epoch": 7.088,
"grad_norm": 0.004176688566628433,
"learning_rate": 5.8240000000000005e-06,
"loss": 0.0,
"step": 22150
},
{
"epoch": 7.104,
"grad_norm": 0.0032506704819596282,
"learning_rate": 5.792000000000001e-06,
"loss": 0.0,
"step": 22200
},
{
"epoch": 7.12,
"grad_norm": 0.0005990950882116851,
"learning_rate": 5.76e-06,
"loss": 0.0,
"step": 22250
},
{
"epoch": 7.136,
"grad_norm": 0.002619844073095375,
"learning_rate": 5.728e-06,
"loss": 0.0,
"step": 22300
},
{
"epoch": 7.152,
"grad_norm": 0.000133825406762409,
"learning_rate": 5.696e-06,
"loss": 0.0,
"step": 22350
},
{
"epoch": 7.168,
"grad_norm": 7.589758778816755e-06,
"learning_rate": 5.664e-06,
"loss": 0.0,
"step": 22400
},
{
"epoch": 7.184,
"grad_norm": 0.0037077771258705637,
"learning_rate": 5.6320000000000005e-06,
"loss": 0.0,
"step": 22450
},
{
"epoch": 7.2,
"grad_norm": 6.45656311225853e-05,
"learning_rate": 5.600000000000001e-06,
"loss": 0.0,
"step": 22500
},
{
"epoch": 7.216,
"grad_norm": 3.53352659793062e-06,
"learning_rate": 5.568e-06,
"loss": 0.0,
"step": 22550
},
{
"epoch": 7.232,
"grad_norm": 4.956625745019511e-06,
"learning_rate": 5.536e-06,
"loss": 0.0,
"step": 22600
},
{
"epoch": 7.248,
"grad_norm": 3.5972044070382584e-06,
"learning_rate": 5.504e-06,
"loss": 0.0,
"step": 22650
},
{
"epoch": 7.264,
"grad_norm": 3.952736091142475e-06,
"learning_rate": 5.472e-06,
"loss": 0.0,
"step": 22700
},
{
"epoch": 7.28,
"grad_norm": 0.002344121199927973,
"learning_rate": 5.4400000000000004e-06,
"loss": 0.0,
"step": 22750
},
{
"epoch": 7.296,
"grad_norm": 0.0014181046917897458,
"learning_rate": 5.408e-06,
"loss": 0.0,
"step": 22800
},
{
"epoch": 7.312,
"grad_norm": 3.673993878498047e-06,
"learning_rate": 5.376e-06,
"loss": 0.0,
"step": 22850
},
{
"epoch": 7.328,
"grad_norm": 0.0007035424885323912,
"learning_rate": 5.344e-06,
"loss": 0.0,
"step": 22900
},
{
"epoch": 7.344,
"grad_norm": 3.4640649140748472e-06,
"learning_rate": 5.312e-06,
"loss": 0.0,
"step": 22950
},
{
"epoch": 7.36,
"grad_norm": 3.397764375456797e-06,
"learning_rate": 5.28e-06,
"loss": 0.0,
"step": 23000
},
{
"epoch": 7.376,
"grad_norm": 6.985809741397173e-06,
"learning_rate": 5.248000000000001e-06,
"loss": 0.0,
"step": 23050
},
{
"epoch": 7.392,
"grad_norm": 0.004727778554217135,
"learning_rate": 5.216e-06,
"loss": 0.0,
"step": 23100
},
{
"epoch": 7.408,
"grad_norm": 0.00034319356861871504,
"learning_rate": 5.184e-06,
"loss": 0.0,
"step": 23150
},
{
"epoch": 7.424,
"grad_norm": 0.002707354453454117,
"learning_rate": 5.152e-06,
"loss": 0.0,
"step": 23200
},
{
"epoch": 7.44,
"grad_norm": 0.003741433501999256,
"learning_rate": 5.12e-06,
"loss": 0.0,
"step": 23250
},
{
"epoch": 7.456,
"grad_norm": 0.004313866605554443,
"learning_rate": 5.088000000000001e-06,
"loss": 0.0,
"step": 23300
},
{
"epoch": 7.4719999999999995,
"grad_norm": 5.048162041463982e-06,
"learning_rate": 5.056000000000001e-06,
"loss": 0.0,
"step": 23350
},
{
"epoch": 7.4879999999999995,
"grad_norm": 0.0025475153276506386,
"learning_rate": 5.024e-06,
"loss": 0.0,
"step": 23400
},
{
"epoch": 7.504,
"grad_norm": 0.0031833329888314825,
"learning_rate": 4.992e-06,
"loss": 0.0,
"step": 23450
},
{
"epoch": 7.52,
"grad_norm": 6.705201147750263e-06,
"learning_rate": 4.960000000000001e-06,
"loss": 0.0,
"step": 23500
},
{
"epoch": 7.536,
"grad_norm": 5.39106789868296e-06,
"learning_rate": 4.928000000000001e-06,
"loss": 0.0,
"step": 23550
},
{
"epoch": 7.552,
"grad_norm": 0.003626501302770346,
"learning_rate": 4.896e-06,
"loss": 0.0,
"step": 23600
},
{
"epoch": 7.568,
"grad_norm": 1.0052769842061194e-05,
"learning_rate": 4.864e-06,
"loss": 0.0,
"step": 23650
},
{
"epoch": 7.584,
"grad_norm": 5.916920444277189e-06,
"learning_rate": 4.8320000000000005e-06,
"loss": 0.0,
"step": 23700
},
{
"epoch": 7.6,
"grad_norm": 0.004523885580121961,
"learning_rate": 4.800000000000001e-06,
"loss": 0.0,
"step": 23750
},
{
"epoch": 7.616,
"grad_norm": 0.00013354452021234782,
"learning_rate": 4.768000000000001e-06,
"loss": 0.0,
"step": 23800
},
{
"epoch": 7.632,
"grad_norm": 0.0012276488785534887,
"learning_rate": 4.736000000000001e-06,
"loss": 0.0,
"step": 23850
},
{
"epoch": 7.648,
"grad_norm": 4.502912527446804e-06,
"learning_rate": 4.704e-06,
"loss": 0.0,
"step": 23900
},
{
"epoch": 7.664,
"grad_norm": 0.0006608232019348104,
"learning_rate": 4.672e-06,
"loss": 0.0,
"step": 23950
},
{
"epoch": 7.68,
"grad_norm": 4.088988845255269e-06,
"learning_rate": 4.6400000000000005e-06,
"loss": 0.0,
"step": 24000
},
{
"epoch": 7.696,
"grad_norm": 0.0003390975010046968,
"learning_rate": 4.608000000000001e-06,
"loss": 0.0,
"step": 24050
},
{
"epoch": 7.712,
"grad_norm": 0.002508565709210886,
"learning_rate": 4.576000000000001e-06,
"loss": 0.0,
"step": 24100
},
{
"epoch": 7.728,
"grad_norm": 0.0015288194430780194,
"learning_rate": 4.544000000000001e-06,
"loss": 0.0,
"step": 24150
},
{
"epoch": 7.744,
"grad_norm": 0.001448906925195681,
"learning_rate": 4.512e-06,
"loss": 0.0,
"step": 24200
},
{
"epoch": 7.76,
"grad_norm": 0.0021363762770405994,
"learning_rate": 4.48e-06,
"loss": 0.0,
"step": 24250
},
{
"epoch": 7.776,
"grad_norm": 0.00031198434311475884,
"learning_rate": 4.4480000000000004e-06,
"loss": 0.0,
"step": 24300
},
{
"epoch": 7.792,
"grad_norm": 9.145186445762476e-05,
"learning_rate": 4.416000000000001e-06,
"loss": 0.0001,
"step": 24350
},
{
"epoch": 7.808,
"grad_norm": 0.0009402193973337792,
"learning_rate": 4.384000000000001e-06,
"loss": 0.0001,
"step": 24400
},
{
"epoch": 7.824,
"grad_norm": 0.00261922858427188,
"learning_rate": 4.352e-06,
"loss": 0.0001,
"step": 24450
},
{
"epoch": 7.84,
"grad_norm": 0.0007561662124930203,
"learning_rate": 4.32e-06,
"loss": 0.0001,
"step": 24500
},
{
"epoch": 7.856,
"grad_norm": 0.0011904195012080216,
"learning_rate": 4.288e-06,
"loss": 0.0001,
"step": 24550
},
{
"epoch": 7.872,
"grad_norm": 0.00682322933598015,
"learning_rate": 4.256e-06,
"loss": 0.0,
"step": 24600
},
{
"epoch": 7.888,
"grad_norm": 1.4918149837028124e-05,
"learning_rate": 4.2240000000000006e-06,
"loss": 0.0,
"step": 24650
},
{
"epoch": 7.904,
"grad_norm": 3.7896845336326056e-05,
"learning_rate": 4.192000000000001e-06,
"loss": 0.0,
"step": 24700
},
{
"epoch": 7.92,
"grad_norm": 1.4233998621857135e-05,
"learning_rate": 4.16e-06,
"loss": 0.0,
"step": 24750
},
{
"epoch": 7.936,
"grad_norm": 1.3512330101803153e-05,
"learning_rate": 4.128e-06,
"loss": 0.0,
"step": 24800
},
{
"epoch": 7.952,
"grad_norm": 0.0017622882875680194,
"learning_rate": 4.096e-06,
"loss": 0.0,
"step": 24850
},
{
"epoch": 7.968,
"grad_norm": 0.004063890556109565,
"learning_rate": 4.064e-06,
"loss": 0.0,
"step": 24900
},
{
"epoch": 7.984,
"grad_norm": 0.006146752020903372,
"learning_rate": 4.0320000000000005e-06,
"loss": 0.0,
"step": 24950
},
{
"epoch": 8.0,
"grad_norm": 0.0045951480519814975,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0,
"step": 25000
},
{
"epoch": 8.016,
"grad_norm": 1.105999334127264e-05,
"learning_rate": 3.968e-06,
"loss": 0.0,
"step": 25050
},
{
"epoch": 8.032,
"grad_norm": 1.4322006340633164e-05,
"learning_rate": 3.936e-06,
"loss": 0.0,
"step": 25100
},
{
"epoch": 8.048,
"grad_norm": 0.00459590439207981,
"learning_rate": 3.904e-06,
"loss": 0.0,
"step": 25150
},
{
"epoch": 8.064,
"grad_norm": 6.626585512636433e-06,
"learning_rate": 3.872e-06,
"loss": 0.0,
"step": 25200
},
{
"epoch": 8.08,
"grad_norm": 3.062805741448072e-05,
"learning_rate": 3.8400000000000005e-06,
"loss": 0.0,
"step": 25250
},
{
"epoch": 8.096,
"grad_norm": 0.00139179287638531,
"learning_rate": 3.8080000000000006e-06,
"loss": 0.0,
"step": 25300
},
{
"epoch": 8.112,
"grad_norm": 3.0149777542350257e-05,
"learning_rate": 3.7760000000000004e-06,
"loss": 0.0,
"step": 25350
},
{
"epoch": 8.128,
"grad_norm": 0.003113858325217473,
"learning_rate": 3.7440000000000005e-06,
"loss": 0.0,
"step": 25400
},
{
"epoch": 8.144,
"grad_norm": 6.947925699992271e-06,
"learning_rate": 3.712e-06,
"loss": 0.0,
"step": 25450
},
{
"epoch": 8.16,
"grad_norm": 0.002796916584528065,
"learning_rate": 3.6800000000000003e-06,
"loss": 0.0,
"step": 25500
},
{
"epoch": 8.176,
"grad_norm": 6.18407640833407e-06,
"learning_rate": 3.6480000000000005e-06,
"loss": 0.0,
"step": 25550
},
{
"epoch": 8.192,
"grad_norm": 3.985635768893783e-06,
"learning_rate": 3.616e-06,
"loss": 0.0,
"step": 25600
},
{
"epoch": 8.208,
"grad_norm": 4.306104409089461e-06,
"learning_rate": 3.5840000000000003e-06,
"loss": 0.0,
"step": 25650
},
{
"epoch": 8.224,
"grad_norm": 1.806410872336378e-05,
"learning_rate": 3.5520000000000005e-06,
"loss": 0.0,
"step": 25700
},
{
"epoch": 8.24,
"grad_norm": 3.194958556657671e-05,
"learning_rate": 3.52e-06,
"loss": 0.0,
"step": 25750
},
{
"epoch": 8.256,
"grad_norm": 0.0027543054021635484,
"learning_rate": 3.4880000000000003e-06,
"loss": 0.0,
"step": 25800
},
{
"epoch": 8.272,
"grad_norm": 0.0005772419217733996,
"learning_rate": 3.4560000000000005e-06,
"loss": 0.0,
"step": 25850
},
{
"epoch": 8.288,
"grad_norm": 0.00030919843905252405,
"learning_rate": 3.424e-06,
"loss": 0.0,
"step": 25900
},
{
"epoch": 8.304,
"grad_norm": 4.280920759753649e-06,
"learning_rate": 3.3920000000000003e-06,
"loss": 0.0,
"step": 25950
},
{
"epoch": 8.32,
"grad_norm": 8.10017575098728e-06,
"learning_rate": 3.3600000000000004e-06,
"loss": 0.0,
"step": 26000
},
{
"epoch": 8.336,
"grad_norm": 0.0028206230272121986,
"learning_rate": 3.328e-06,
"loss": 0.0,
"step": 26050
},
{
"epoch": 8.352,
"grad_norm": 0.002696578509199753,
"learning_rate": 3.2960000000000003e-06,
"loss": 0.0,
"step": 26100
},
{
"epoch": 8.368,
"grad_norm": 4.570628348792391e-06,
"learning_rate": 3.2640000000000004e-06,
"loss": 0.0,
"step": 26150
},
{
"epoch": 8.384,
"grad_norm": 5.1947235707447225e-06,
"learning_rate": 3.232e-06,
"loss": 0.0,
"step": 26200
},
{
"epoch": 8.4,
"grad_norm": 0.002501990498399514,
"learning_rate": 3.2000000000000003e-06,
"loss": 0.0,
"step": 26250
},
{
"epoch": 8.416,
"grad_norm": 0.003409989286376495,
"learning_rate": 3.1680000000000004e-06,
"loss": 0.0,
"step": 26300
},
{
"epoch": 8.432,
"grad_norm": 0.0026823086545517314,
"learning_rate": 3.136e-06,
"loss": 0.0,
"step": 26350
},
{
"epoch": 8.448,
"grad_norm": 0.0022783205269016425,
"learning_rate": 3.1040000000000003e-06,
"loss": 0.0,
"step": 26400
},
{
"epoch": 8.464,
"grad_norm": 6.336465247361757e-06,
"learning_rate": 3.072e-06,
"loss": 0.0,
"step": 26450
},
{
"epoch": 8.48,
"grad_norm": 4.592277322590752e-06,
"learning_rate": 3.04e-06,
"loss": 0.0,
"step": 26500
},
{
"epoch": 8.496,
"grad_norm": 0.002440763934157598,
"learning_rate": 3.0080000000000003e-06,
"loss": 0.0,
"step": 26550
},
{
"epoch": 8.512,
"grad_norm": 0.0017411545218379125,
"learning_rate": 2.976e-06,
"loss": 0.0,
"step": 26600
},
{
"epoch": 8.528,
"grad_norm": 0.0008851555302938333,
"learning_rate": 2.944e-06,
"loss": 0.0,
"step": 26650
},
{
"epoch": 8.544,
"grad_norm": 5.758051362572022e-06,
"learning_rate": 2.9120000000000002e-06,
"loss": 0.0,
"step": 26700
},
{
"epoch": 8.56,
"grad_norm": 3.791587800895384e-06,
"learning_rate": 2.88e-06,
"loss": 0.0,
"step": 26750
},
{
"epoch": 8.576,
"grad_norm": 1.1417223748270065e-05,
"learning_rate": 2.848e-06,
"loss": 0.0,
"step": 26800
},
{
"epoch": 8.592,
"grad_norm": 1.93146445830669e-05,
"learning_rate": 2.8160000000000002e-06,
"loss": 0.0,
"step": 26850
},
{
"epoch": 8.608,
"grad_norm": 3.0035429272579585e-06,
"learning_rate": 2.784e-06,
"loss": 0.0,
"step": 26900
},
{
"epoch": 8.624,
"grad_norm": 3.1422754893000723e-06,
"learning_rate": 2.752e-06,
"loss": 0.0,
"step": 26950
},
{
"epoch": 8.64,
"grad_norm": 0.0016051837845728594,
"learning_rate": 2.7200000000000002e-06,
"loss": 0.0,
"step": 27000
},
{
"epoch": 8.656,
"grad_norm": 3.0331981849054454e-06,
"learning_rate": 2.688e-06,
"loss": 0.0,
"step": 27050
},
{
"epoch": 8.672,
"grad_norm": 2.688345145719462e-06,
"learning_rate": 2.656e-06,
"loss": 0.0,
"step": 27100
},
{
"epoch": 8.688,
"grad_norm": 0.0018971614745931684,
"learning_rate": 2.6240000000000006e-06,
"loss": 0.0,
"step": 27150
},
{
"epoch": 8.704,
"grad_norm": 0.001157850108603949,
"learning_rate": 2.592e-06,
"loss": 0.0,
"step": 27200
},
{
"epoch": 8.72,
"grad_norm": 9.438485254012652e-06,
"learning_rate": 2.56e-06,
"loss": 0.0,
"step": 27250
},
{
"epoch": 8.736,
"grad_norm": 0.003824595780488627,
"learning_rate": 2.5280000000000006e-06,
"loss": 0.0,
"step": 27300
},
{
"epoch": 8.752,
"grad_norm": 5.80867931861633e-06,
"learning_rate": 2.496e-06,
"loss": 0.0,
"step": 27350
},
{
"epoch": 8.768,
"grad_norm": 0.00834510993524938,
"learning_rate": 2.4640000000000005e-06,
"loss": 0.0,
"step": 27400
},
{
"epoch": 8.784,
"grad_norm": 3.36769367606297e-06,
"learning_rate": 2.432e-06,
"loss": 0.0,
"step": 27450
},
{
"epoch": 8.8,
"grad_norm": 3.5500421331503815e-06,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.0,
"step": 27500
},
{
"epoch": 8.816,
"grad_norm": 0.005803545750156033,
"learning_rate": 2.3680000000000005e-06,
"loss": 0.0,
"step": 27550
},
{
"epoch": 8.832,
"grad_norm": 0.003256066741531994,
"learning_rate": 2.336e-06,
"loss": 0.0,
"step": 27600
},
{
"epoch": 8.848,
"grad_norm": 2.75992066631196e-06,
"learning_rate": 2.3040000000000003e-06,
"loss": 0.0,
"step": 27650
},
{
"epoch": 8.864,
"grad_norm": 0.002876840934498164,
"learning_rate": 2.2720000000000004e-06,
"loss": 0.0,
"step": 27700
},
{
"epoch": 8.88,
"grad_norm": 3.6932448968918764e-06,
"learning_rate": 2.24e-06,
"loss": 0.0,
"step": 27750
},
{
"epoch": 8.896,
"grad_norm": 0.002516088845243444,
"learning_rate": 2.2080000000000003e-06,
"loss": 0.0,
"step": 27800
},
{
"epoch": 8.912,
"grad_norm": 2.7651064988456252e-06,
"learning_rate": 2.176e-06,
"loss": 0.0,
"step": 27850
},
{
"epoch": 8.928,
"grad_norm": 4.677689598922778e-06,
"learning_rate": 2.144e-06,
"loss": 0.0,
"step": 27900
},
{
"epoch": 8.943999999999999,
"grad_norm": 3.5933917656898116e-06,
"learning_rate": 2.1120000000000003e-06,
"loss": 0.0,
"step": 27950
},
{
"epoch": 8.96,
"grad_norm": 0.0026837812666487813,
"learning_rate": 2.08e-06,
"loss": 0.0,
"step": 28000
},
{
"epoch": 8.975999999999999,
"grad_norm": 6.660030265311055e-06,
"learning_rate": 2.048e-06,
"loss": 0.0,
"step": 28050
},
{
"epoch": 8.992,
"grad_norm": 0.00698615969912155,
"learning_rate": 2.0160000000000003e-06,
"loss": 0.0,
"step": 28100
},
{
"epoch": 9.008,
"grad_norm": 6.071643098745315e-06,
"learning_rate": 1.984e-06,
"loss": 0.0,
"step": 28150
},
{
"epoch": 9.024,
"grad_norm": 0.002739745016870674,
"learning_rate": 1.952e-06,
"loss": 0.0,
"step": 28200
},
{
"epoch": 9.04,
"grad_norm": 0.0018820591441367112,
"learning_rate": 1.9200000000000003e-06,
"loss": 0.0,
"step": 28250
},
{
"epoch": 9.056,
"grad_norm": 4.422973643174459e-06,
"learning_rate": 1.8880000000000002e-06,
"loss": 0.0,
"step": 28300
},
{
"epoch": 9.072,
"grad_norm": 0.004241223054630402,
"learning_rate": 1.856e-06,
"loss": 0.0,
"step": 28350
},
{
"epoch": 9.088,
"grad_norm": 0.0018724180201145385,
"learning_rate": 1.8240000000000002e-06,
"loss": 0.0,
"step": 28400
},
{
"epoch": 9.104,
"grad_norm": 3.230264296112646e-06,
"learning_rate": 1.7920000000000002e-06,
"loss": 0.0,
"step": 28450
},
{
"epoch": 9.12,
"grad_norm": 0.001800105085499999,
"learning_rate": 1.76e-06,
"loss": 0.0,
"step": 28500
},
{
"epoch": 9.136,
"grad_norm": 3.4036067894074316e-06,
"learning_rate": 1.7280000000000002e-06,
"loss": 0.0,
"step": 28550
},
{
"epoch": 9.152,
"grad_norm": 2.9416031574887065e-06,
"learning_rate": 1.6960000000000002e-06,
"loss": 0.0,
"step": 28600
},
{
"epoch": 9.168,
"grad_norm": 0.0074933729947643645,
"learning_rate": 1.664e-06,
"loss": 0.0,
"step": 28650
},
{
"epoch": 9.184,
"grad_norm": 2.6324603429806364e-06,
"learning_rate": 1.6320000000000002e-06,
"loss": 0.0,
"step": 28700
},
{
"epoch": 9.2,
"grad_norm": 0.004046684231625261,
"learning_rate": 1.6000000000000001e-06,
"loss": 0.0,
"step": 28750
},
{
"epoch": 9.216,
"grad_norm": 2.8312469098439607e-06,
"learning_rate": 1.568e-06,
"loss": 0.0,
"step": 28800
},
{
"epoch": 9.232,
"grad_norm": 7.031184057455803e-06,
"learning_rate": 1.536e-06,
"loss": 0.0,
"step": 28850
},
{
"epoch": 9.248,
"grad_norm": 2.7837343248805205e-06,
"learning_rate": 1.5040000000000001e-06,
"loss": 0.0,
"step": 28900
},
{
"epoch": 9.264,
"grad_norm": 2.6096489509437644e-06,
"learning_rate": 1.472e-06,
"loss": 0.0,
"step": 28950
},
{
"epoch": 9.28,
"grad_norm": 0.003287527391462962,
"learning_rate": 1.44e-06,
"loss": 0.0,
"step": 29000
},
{
"epoch": 9.296,
"grad_norm": 3.568400823063364e-06,
"learning_rate": 1.4080000000000001e-06,
"loss": 0.0,
"step": 29050
},
{
"epoch": 9.312,
"grad_norm": 0.001613374504927987,
"learning_rate": 1.376e-06,
"loss": 0.0,
"step": 29100
},
{
"epoch": 9.328,
"grad_norm": 0.004032203594194166,
"learning_rate": 1.344e-06,
"loss": 0.0,
"step": 29150
},
{
"epoch": 9.344,
"grad_norm": 0.005198815071921098,
"learning_rate": 1.3120000000000003e-06,
"loss": 0.0,
"step": 29200
},
{
"epoch": 9.36,
"grad_norm": 2.8504515758333786e-06,
"learning_rate": 1.28e-06,
"loss": 0.0,
"step": 29250
},
{
"epoch": 9.376,
"grad_norm": 0.0065733355533144095,
"learning_rate": 1.248e-06,
"loss": 0.0,
"step": 29300
},
{
"epoch": 9.392,
"grad_norm": 0.0014655085457166437,
"learning_rate": 1.216e-06,
"loss": 0.0,
"step": 29350
},
{
"epoch": 9.408,
"grad_norm": 0.002317977733364113,
"learning_rate": 1.1840000000000002e-06,
"loss": 0.0,
"step": 29400
},
{
"epoch": 9.424,
"grad_norm": 3.1712820857845064e-06,
"learning_rate": 1.1520000000000002e-06,
"loss": 0.0,
"step": 29450
},
{
"epoch": 9.44,
"grad_norm": 0.0017765942152206172,
"learning_rate": 1.12e-06,
"loss": 0.0,
"step": 29500
},
{
"epoch": 9.456,
"grad_norm": 3.298030137729072e-06,
"learning_rate": 1.088e-06,
"loss": 0.0,
"step": 29550
},
{
"epoch": 9.472,
"grad_norm": 0.0035002110108688657,
"learning_rate": 1.0560000000000001e-06,
"loss": 0.0,
"step": 29600
},
{
"epoch": 9.488,
"grad_norm": 0.003870923448853272,
"learning_rate": 1.024e-06,
"loss": 0.0,
"step": 29650
},
{
"epoch": 9.504,
"grad_norm": 0.00203367970856836,
"learning_rate": 9.92e-07,
"loss": 0.0,
"step": 29700
},
{
"epoch": 9.52,
"grad_norm": 0.006006505453486269,
"learning_rate": 9.600000000000001e-07,
"loss": 0.0,
"step": 29750
},
{
"epoch": 9.536,
"grad_norm": 0.01038968138831676,
"learning_rate": 9.28e-07,
"loss": 0.0,
"step": 29800
},
{
"epoch": 9.552,
"grad_norm": 0.0008476742919666878,
"learning_rate": 8.960000000000001e-07,
"loss": 0.0,
"step": 29850
},
{
"epoch": 9.568,
"grad_norm": 0.0029285556516275065,
"learning_rate": 8.640000000000001e-07,
"loss": 0.0,
"step": 29900
},
{
"epoch": 9.584,
"grad_norm": 3.538940194233791e-06,
"learning_rate": 8.32e-07,
"loss": 0.0,
"step": 29950
},
{
"epoch": 9.6,
"grad_norm": 2.6317247125415924e-06,
"learning_rate": 8.000000000000001e-07,
"loss": 0.0,
"step": 30000
},
{
"epoch": 9.616,
"grad_norm": 3.1723179921373707e-06,
"learning_rate": 7.68e-07,
"loss": 0.0,
"step": 30050
},
{
"epoch": 9.632,
"grad_norm": 0.0017773797129791615,
"learning_rate": 7.36e-07,
"loss": 0.0,
"step": 30100
},
{
"epoch": 9.648,
"grad_norm": 3.890700637247315e-06,
"learning_rate": 7.040000000000001e-07,
"loss": 0.0,
"step": 30150
},
{
"epoch": 9.664,
"grad_norm": 0.0014622567589594895,
"learning_rate": 6.72e-07,
"loss": 0.0,
"step": 30200
},
{
"epoch": 9.68,
"grad_norm": 0.006419297008436344,
"learning_rate": 6.4e-07,
"loss": 0.0,
"step": 30250
},
{
"epoch": 9.696,
"grad_norm": 3.365707076677938e-06,
"learning_rate": 6.08e-07,
"loss": 0.0,
"step": 30300
},
{
"epoch": 9.712,
"grad_norm": 0.00939817499692286,
"learning_rate": 5.760000000000001e-07,
"loss": 0.0,
"step": 30350
},
{
"epoch": 9.728,
"grad_norm": 2.7639546967269018e-06,
"learning_rate": 5.44e-07,
"loss": 0.0,
"step": 30400
},
{
"epoch": 9.744,
"grad_norm": 3.228059969214847e-06,
"learning_rate": 5.12e-07,
"loss": 0.0,
"step": 30450
},
{
"epoch": 9.76,
"grad_norm": 2.9143603801018378e-06,
"learning_rate": 4.800000000000001e-07,
"loss": 0.0,
"step": 30500
},
{
"epoch": 9.776,
"grad_norm": 4.472677373176853e-06,
"learning_rate": 4.4800000000000004e-07,
"loss": 0.0,
"step": 30550
},
{
"epoch": 9.792,
"grad_norm": 0.003177155397723066,
"learning_rate": 4.16e-07,
"loss": 0.0,
"step": 30600
},
{
"epoch": 9.808,
"grad_norm": 2.9639748677203557e-06,
"learning_rate": 3.84e-07,
"loss": 0.0,
"step": 30650
},
{
"epoch": 9.824,
"grad_norm": 3.7104760090468066e-06,
"learning_rate": 3.5200000000000003e-07,
"loss": 0.0,
"step": 30700
},
{
"epoch": 9.84,
"grad_norm": 0.0024388786980219382,
"learning_rate": 3.2e-07,
"loss": 0.0,
"step": 30750
},
{
"epoch": 9.856,
"grad_norm": 0.005259090550834367,
"learning_rate": 2.8800000000000004e-07,
"loss": 0.0,
"step": 30800
},
{
"epoch": 9.872,
"grad_norm": 0.0010068931779022919,
"learning_rate": 2.56e-07,
"loss": 0.0,
"step": 30850
},
{
"epoch": 9.888,
"grad_norm": 3.036971816568042e-06,
"learning_rate": 2.2400000000000002e-07,
"loss": 0.0,
"step": 30900
},
{
"epoch": 9.904,
"grad_norm": 5.944920943941958e-06,
"learning_rate": 1.92e-07,
"loss": 0.0,
"step": 30950
},
{
"epoch": 9.92,
"grad_norm": 3.1533133218253755e-06,
"learning_rate": 1.6e-07,
"loss": 0.0,
"step": 31000
},
{
"epoch": 9.936,
"grad_norm": 0.0008556866274702537,
"learning_rate": 1.28e-07,
"loss": 0.0,
"step": 31050
},
{
"epoch": 9.952,
"grad_norm": 0.0019244948361365065,
"learning_rate": 9.6e-08,
"loss": 0.0,
"step": 31100
},
{
"epoch": 9.968,
"grad_norm": 4.299595493855448e-06,
"learning_rate": 6.4e-08,
"loss": 0.0,
"step": 31150
},
{
"epoch": 9.984,
"grad_norm": 3.017341374030069e-06,
"learning_rate": 3.2e-08,
"loss": 0.0,
"step": 31200
},
{
"epoch": 10.0,
"grad_norm": 3.6112124670701524e-06,
"learning_rate": 0.0,
"loss": 0.0,
"step": 31250
}
],
"logging_steps": 50,
"max_steps": 31250,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.47527639760896e+16,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}