| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 31250, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 0.4162791292708614, | |
| "learning_rate": 1.9968e-05, | |
| "loss": 0.4754, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 0.2034076606680038, | |
| "learning_rate": 1.9936000000000004e-05, | |
| "loss": 0.0184, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 0.32707728996877367, | |
| "learning_rate": 1.9904e-05, | |
| "loss": 0.0148, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.4247544882383208, | |
| "learning_rate": 1.9872000000000002e-05, | |
| "loss": 0.0126, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.25015536838552505, | |
| "learning_rate": 1.9840000000000003e-05, | |
| "loss": 0.0102, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.1173465173905855, | |
| "learning_rate": 1.9808e-05, | |
| "loss": 0.0086, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.17858061231251363, | |
| "learning_rate": 1.9776000000000002e-05, | |
| "loss": 0.0072, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.12166310007206395, | |
| "learning_rate": 1.9744e-05, | |
| "loss": 0.006, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.1115878060467963, | |
| "learning_rate": 1.9712000000000004e-05, | |
| "loss": 0.0052, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.06376502592170473, | |
| "learning_rate": 1.968e-05, | |
| "loss": 0.0044, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.10598418241747591, | |
| "learning_rate": 1.9648000000000002e-05, | |
| "loss": 0.0036, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.1240464209943554, | |
| "learning_rate": 1.9616000000000003e-05, | |
| "loss": 0.0031, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.0985656589222262, | |
| "learning_rate": 1.9584e-05, | |
| "loss": 0.0026, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.10986810134020673, | |
| "learning_rate": 1.9552000000000002e-05, | |
| "loss": 0.0023, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.1103690941151271, | |
| "learning_rate": 1.9520000000000003e-05, | |
| "loss": 0.002, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.06027351422139254, | |
| "learning_rate": 1.9488000000000004e-05, | |
| "loss": 0.0018, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.10889576295219647, | |
| "learning_rate": 1.9456e-05, | |
| "loss": 0.0015, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.06822239222520912, | |
| "learning_rate": 1.9424e-05, | |
| "loss": 0.0014, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.09673723500638307, | |
| "learning_rate": 1.9392000000000003e-05, | |
| "loss": 0.0012, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.04816136721008917, | |
| "learning_rate": 1.936e-05, | |
| "loss": 0.0011, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.036073116914313585, | |
| "learning_rate": 1.9328000000000002e-05, | |
| "loss": 0.0012, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.06730673946375604, | |
| "learning_rate": 1.9296000000000003e-05, | |
| "loss": 0.0009, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.03491648747017885, | |
| "learning_rate": 1.9264e-05, | |
| "loss": 0.0011, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.05153510670302306, | |
| "learning_rate": 1.9232e-05, | |
| "loss": 0.0009, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.04307035845962119, | |
| "learning_rate": 1.9200000000000003e-05, | |
| "loss": 0.0008, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.03949817663105664, | |
| "learning_rate": 1.9168000000000004e-05, | |
| "loss": 0.0006, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.046238030209222174, | |
| "learning_rate": 1.9136e-05, | |
| "loss": 0.0007, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.04736527158661927, | |
| "learning_rate": 1.9104000000000002e-05, | |
| "loss": 0.0007, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 1.0273111933810504, | |
| "learning_rate": 1.9072000000000003e-05, | |
| "loss": 0.0511, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.05230770995494484, | |
| "learning_rate": 1.904e-05, | |
| "loss": 0.0006, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 0.030869197184465513, | |
| "learning_rate": 1.9008e-05, | |
| "loss": 0.0006, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.035546700143440524, | |
| "learning_rate": 1.8976000000000003e-05, | |
| "loss": 0.0005, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 0.05475624621759873, | |
| "learning_rate": 1.8944000000000004e-05, | |
| "loss": 0.0005, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.011229400249910374, | |
| "learning_rate": 1.8912e-05, | |
| "loss": 0.0004, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.03437935699788119, | |
| "learning_rate": 1.8880000000000002e-05, | |
| "loss": 0.0004, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.032764646105168924, | |
| "learning_rate": 1.8848000000000003e-05, | |
| "loss": 0.0004, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 0.020472956028111056, | |
| "learning_rate": 1.8816e-05, | |
| "loss": 0.0004, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.014897727874505565, | |
| "learning_rate": 1.8784000000000002e-05, | |
| "loss": 0.0003, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 0.016804462072975052, | |
| "learning_rate": 1.8752000000000003e-05, | |
| "loss": 0.0004, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.0555365139060795, | |
| "learning_rate": 1.8720000000000004e-05, | |
| "loss": 0.0003, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 0.04167732575085471, | |
| "learning_rate": 1.8688e-05, | |
| "loss": 0.0004, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.02351053070911185, | |
| "learning_rate": 1.8656000000000002e-05, | |
| "loss": 0.0004, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 0.04978335013967468, | |
| "learning_rate": 1.8624000000000003e-05, | |
| "loss": 0.0004, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.03666638757885489, | |
| "learning_rate": 1.8592e-05, | |
| "loss": 0.0005, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.032014118732009544, | |
| "learning_rate": 1.8560000000000002e-05, | |
| "loss": 0.0005, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.05999773243277201, | |
| "learning_rate": 1.8528000000000003e-05, | |
| "loss": 0.0004, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 0.040391581637471245, | |
| "learning_rate": 1.8496e-05, | |
| "loss": 0.0005, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.03975437942972135, | |
| "learning_rate": 1.8464e-05, | |
| "loss": 0.0005, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.784, | |
| "grad_norm": 0.025970283264487873, | |
| "learning_rate": 1.8432000000000002e-05, | |
| "loss": 0.0004, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.02563127650055879, | |
| "learning_rate": 1.8400000000000003e-05, | |
| "loss": 0.0004, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 0.045543501280072535, | |
| "learning_rate": 1.8368e-05, | |
| "loss": 0.0004, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.03591751155162754, | |
| "learning_rate": 1.8336000000000002e-05, | |
| "loss": 0.0006, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.848, | |
| "grad_norm": 0.02547674454985814, | |
| "learning_rate": 1.8304000000000003e-05, | |
| "loss": 0.0005, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 0.053643683147153336, | |
| "learning_rate": 1.8272e-05, | |
| "loss": 0.0004, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.03266181119414202, | |
| "learning_rate": 1.824e-05, | |
| "loss": 0.0006, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.056105089349029955, | |
| "learning_rate": 1.8208000000000003e-05, | |
| "loss": 0.0005, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 0.024340620330705078, | |
| "learning_rate": 1.8176000000000004e-05, | |
| "loss": 0.0005, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 0.030931428596602917, | |
| "learning_rate": 1.8144e-05, | |
| "loss": 0.0004, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.944, | |
| "grad_norm": 0.21827287090117428, | |
| "learning_rate": 1.8112000000000002e-05, | |
| "loss": 0.0012, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.048404745271573284, | |
| "learning_rate": 1.8080000000000003e-05, | |
| "loss": 0.0009, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.976, | |
| "grad_norm": 0.03511490038904797, | |
| "learning_rate": 1.8048e-05, | |
| "loss": 0.0004, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 0.0450712481155652, | |
| "learning_rate": 1.8016e-05, | |
| "loss": 0.0005, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.008, | |
| "grad_norm": 0.025904643033032682, | |
| "learning_rate": 1.7984000000000003e-05, | |
| "loss": 0.0003, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.024, | |
| "grad_norm": 0.026407027678467632, | |
| "learning_rate": 1.7952e-05, | |
| "loss": 0.0003, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.021041390612394642, | |
| "learning_rate": 1.792e-05, | |
| "loss": 0.0003, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.056, | |
| "grad_norm": 0.020868716704968104, | |
| "learning_rate": 1.7888000000000002e-05, | |
| "loss": 0.0003, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.072, | |
| "grad_norm": 0.01496037199648747, | |
| "learning_rate": 1.7856000000000003e-05, | |
| "loss": 0.0003, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.088, | |
| "grad_norm": 0.025499824661529856, | |
| "learning_rate": 1.7824e-05, | |
| "loss": 0.0002, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.104, | |
| "grad_norm": 0.01913077710522383, | |
| "learning_rate": 1.7792000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.03713721333860161, | |
| "learning_rate": 1.7760000000000003e-05, | |
| "loss": 0.0002, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.1360000000000001, | |
| "grad_norm": 0.03222855382697803, | |
| "learning_rate": 1.7728e-05, | |
| "loss": 0.0003, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.152, | |
| "grad_norm": 0.007975587682259639, | |
| "learning_rate": 1.7696e-05, | |
| "loss": 0.0002, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.168, | |
| "grad_norm": 0.002794706661316828, | |
| "learning_rate": 1.7664000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.184, | |
| "grad_norm": 0.032076141646391186, | |
| "learning_rate": 1.7632000000000003e-05, | |
| "loss": 0.0004, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.6234924591493982, | |
| "learning_rate": 1.76e-05, | |
| "loss": 0.1071, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.216, | |
| "grad_norm": 0.02540240432384445, | |
| "learning_rate": 1.7568000000000002e-05, | |
| "loss": 0.0064, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.232, | |
| "grad_norm": 0.018763443771111773, | |
| "learning_rate": 1.7536000000000003e-05, | |
| "loss": 0.0003, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.248, | |
| "grad_norm": 0.002105113957933186, | |
| "learning_rate": 1.7504e-05, | |
| "loss": 0.0002, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.264, | |
| "grad_norm": 0.011820986559734206, | |
| "learning_rate": 1.7472e-05, | |
| "loss": 0.0001, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.010883352595811436, | |
| "learning_rate": 1.7440000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.296, | |
| "grad_norm": 0.0002846213040226481, | |
| "learning_rate": 1.7408e-05, | |
| "loss": 0.0001, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.312, | |
| "grad_norm": 0.011665351347795698, | |
| "learning_rate": 1.7376e-05, | |
| "loss": 0.0001, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.328, | |
| "grad_norm": 0.003619829409498692, | |
| "learning_rate": 1.7344000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.3439999999999999, | |
| "grad_norm": 0.009844016518088946, | |
| "learning_rate": 1.7312000000000003e-05, | |
| "loss": 0.0001, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 0.004750662567109947, | |
| "learning_rate": 1.728e-05, | |
| "loss": 0.0001, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.376, | |
| "grad_norm": 0.011961256262424691, | |
| "learning_rate": 1.7248e-05, | |
| "loss": 0.0001, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.392, | |
| "grad_norm": 0.0008924320200507949, | |
| "learning_rate": 1.7216000000000003e-05, | |
| "loss": 0.0001, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.408, | |
| "grad_norm": 0.01662320118905631, | |
| "learning_rate": 1.7184e-05, | |
| "loss": 0.0001, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.424, | |
| "grad_norm": 0.028186123188899303, | |
| "learning_rate": 1.7152e-05, | |
| "loss": 0.0001, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.0034815142490064606, | |
| "learning_rate": 1.7120000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.456, | |
| "grad_norm": 0.001460364718969842, | |
| "learning_rate": 1.7088000000000003e-05, | |
| "loss": 0.0001, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.472, | |
| "grad_norm": 0.012151855125151954, | |
| "learning_rate": 1.7056e-05, | |
| "loss": 0.0001, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.488, | |
| "grad_norm": 0.0066837713743412835, | |
| "learning_rate": 1.7024e-05, | |
| "loss": 0.0002, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.504, | |
| "grad_norm": 0.01614289259012644, | |
| "learning_rate": 1.6992000000000003e-05, | |
| "loss": 0.0002, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.017017037424245766, | |
| "learning_rate": 1.696e-05, | |
| "loss": 0.0004, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.536, | |
| "grad_norm": 0.037257132020433056, | |
| "learning_rate": 1.6928e-05, | |
| "loss": 0.0002, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.552, | |
| "grad_norm": 0.017499460231563804, | |
| "learning_rate": 1.6896000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.568, | |
| "grad_norm": 0.03629289000795102, | |
| "learning_rate": 1.6864e-05, | |
| "loss": 0.0003, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.584, | |
| "grad_norm": 0.012313804740317102, | |
| "learning_rate": 1.6832e-05, | |
| "loss": 0.0002, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.015022727388605363, | |
| "learning_rate": 1.6800000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.616, | |
| "grad_norm": 0.01637075751125702, | |
| "learning_rate": 1.6768000000000003e-05, | |
| "loss": 0.0002, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.6320000000000001, | |
| "grad_norm": 0.00817681413922093, | |
| "learning_rate": 1.6736e-05, | |
| "loss": 0.0002, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.6480000000000001, | |
| "grad_norm": 0.05076839955582874, | |
| "learning_rate": 1.6704e-05, | |
| "loss": 0.0003, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.6640000000000001, | |
| "grad_norm": 0.054792586149856305, | |
| "learning_rate": 1.6672000000000002e-05, | |
| "loss": 0.0003, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 0.014300407813244355, | |
| "learning_rate": 1.664e-05, | |
| "loss": 0.0004, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.696, | |
| "grad_norm": 0.022359944469494724, | |
| "learning_rate": 1.6608e-05, | |
| "loss": 0.0004, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.712, | |
| "grad_norm": 0.03392271894362949, | |
| "learning_rate": 1.6576000000000002e-05, | |
| "loss": 0.0005, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.728, | |
| "grad_norm": 0.030499061647474107, | |
| "learning_rate": 1.6544000000000003e-05, | |
| "loss": 0.0004, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.744, | |
| "grad_norm": 0.022947059194539453, | |
| "learning_rate": 1.6512e-05, | |
| "loss": 0.0002, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.004139587011211526, | |
| "learning_rate": 1.648e-05, | |
| "loss": 0.0002, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.776, | |
| "grad_norm": 0.019015863163501696, | |
| "learning_rate": 1.6448000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.792, | |
| "grad_norm": 0.02153996101513209, | |
| "learning_rate": 1.6416e-05, | |
| "loss": 0.0001, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.808, | |
| "grad_norm": 0.02075243587775307, | |
| "learning_rate": 1.6384e-05, | |
| "loss": 0.0002, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.8239999999999998, | |
| "grad_norm": 0.009150550879400116, | |
| "learning_rate": 1.6352000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 0.03715955238865434, | |
| "learning_rate": 1.632e-05, | |
| "loss": 0.0001, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.8559999999999999, | |
| "grad_norm": 0.017289500337293505, | |
| "learning_rate": 1.6288e-05, | |
| "loss": 0.0001, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.8719999999999999, | |
| "grad_norm": 0.003573430818383637, | |
| "learning_rate": 1.6256e-05, | |
| "loss": 0.0001, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.888, | |
| "grad_norm": 0.00746919924078368, | |
| "learning_rate": 1.6224000000000003e-05, | |
| "loss": 0.0001, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.904, | |
| "grad_norm": 0.018846707196093635, | |
| "learning_rate": 1.6192e-05, | |
| "loss": 0.0001, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.029690252225365133, | |
| "learning_rate": 1.616e-05, | |
| "loss": 0.0001, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.936, | |
| "grad_norm": 0.00404539132639544, | |
| "learning_rate": 1.6128000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.952, | |
| "grad_norm": 0.01577316675319085, | |
| "learning_rate": 1.6096e-05, | |
| "loss": 0.0001, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.968, | |
| "grad_norm": 0.029879625197064306, | |
| "learning_rate": 1.6064e-05, | |
| "loss": 0.0002, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.984, | |
| "grad_norm": 0.014564254437322836, | |
| "learning_rate": 1.6032e-05, | |
| "loss": 0.0002, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.022672964806977416, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.0001, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.016, | |
| "grad_norm": 0.025131584412300467, | |
| "learning_rate": 1.5968e-05, | |
| "loss": 0.0003, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.032, | |
| "grad_norm": 0.019193509331689527, | |
| "learning_rate": 1.5936e-05, | |
| "loss": 0.0003, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.048, | |
| "grad_norm": 0.03172642139425199, | |
| "learning_rate": 1.5904000000000002e-05, | |
| "loss": 0.0003, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.064, | |
| "grad_norm": 0.03825097776567973, | |
| "learning_rate": 1.5872e-05, | |
| "loss": 0.0003, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.02799126609845451, | |
| "learning_rate": 1.584e-05, | |
| "loss": 0.0004, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.096, | |
| "grad_norm": 0.021007331756498947, | |
| "learning_rate": 1.5808000000000002e-05, | |
| "loss": 0.0003, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.112, | |
| "grad_norm": 0.03138950848983769, | |
| "learning_rate": 1.5776e-05, | |
| "loss": 0.0003, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.128, | |
| "grad_norm": 0.06211389122519674, | |
| "learning_rate": 1.5744e-05, | |
| "loss": 0.0004, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.144, | |
| "grad_norm": 0.019842547638907367, | |
| "learning_rate": 1.5712e-05, | |
| "loss": 0.0004, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.011531174317517585, | |
| "learning_rate": 1.5680000000000002e-05, | |
| "loss": 0.0003, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.176, | |
| "grad_norm": 0.029715206936091667, | |
| "learning_rate": 1.5648e-05, | |
| "loss": 0.0003, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.192, | |
| "grad_norm": 0.03367332739926089, | |
| "learning_rate": 1.5616e-05, | |
| "loss": 0.0004, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.208, | |
| "grad_norm": 0.0328904913855512, | |
| "learning_rate": 1.5584000000000002e-05, | |
| "loss": 0.0003, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.224, | |
| "grad_norm": 0.033707262406383776, | |
| "learning_rate": 1.5552e-05, | |
| "loss": 0.0003, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.026776309909601587, | |
| "learning_rate": 1.552e-05, | |
| "loss": 0.0004, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.2560000000000002, | |
| "grad_norm": 0.014767091414466142, | |
| "learning_rate": 1.5488e-05, | |
| "loss": 0.0002, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.2720000000000002, | |
| "grad_norm": 0.0017357490608581442, | |
| "learning_rate": 1.5456000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.288, | |
| "grad_norm": 0.012533161481551452, | |
| "learning_rate": 1.5424e-05, | |
| "loss": 0.0003, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.304, | |
| "grad_norm": 0.011606179356836947, | |
| "learning_rate": 1.5392e-05, | |
| "loss": 0.0002, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.011141091556149417, | |
| "learning_rate": 1.5360000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.336, | |
| "grad_norm": 0.012970504799498298, | |
| "learning_rate": 1.5328e-05, | |
| "loss": 0.0002, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.352, | |
| "grad_norm": 0.013792627527534363, | |
| "learning_rate": 1.5296e-05, | |
| "loss": 0.0001, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.368, | |
| "grad_norm": 0.005003589537772858, | |
| "learning_rate": 1.5264e-05, | |
| "loss": 0.0001, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.384, | |
| "grad_norm": 0.022468245054652756, | |
| "learning_rate": 1.5232000000000003e-05, | |
| "loss": 0.0001, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.0036743704709482736, | |
| "learning_rate": 1.5200000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.416, | |
| "grad_norm": 0.02123021172151452, | |
| "learning_rate": 1.5168000000000001e-05, | |
| "loss": 0.0002, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.432, | |
| "grad_norm": 0.0029051452232639837, | |
| "learning_rate": 1.5136000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.448, | |
| "grad_norm": 0.012870324143213164, | |
| "learning_rate": 1.5104000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.464, | |
| "grad_norm": 0.013477754787068787, | |
| "learning_rate": 1.5072000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.00028543856214405185, | |
| "learning_rate": 1.5040000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.496, | |
| "grad_norm": 7.966477694629657e-05, | |
| "learning_rate": 1.5008000000000001e-05, | |
| "loss": 0.0, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.512, | |
| "grad_norm": 0.0086588190020715, | |
| "learning_rate": 1.4976000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 2.528, | |
| "grad_norm": 0.004551133565457303, | |
| "learning_rate": 1.4944000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.544, | |
| "grad_norm": 0.0011433396140536688, | |
| "learning_rate": 1.4912000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.0213021627246675, | |
| "learning_rate": 1.4880000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.576, | |
| "grad_norm": 0.03352306587680257, | |
| "learning_rate": 1.4848e-05, | |
| "loss": 0.0003, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 2.592, | |
| "grad_norm": 0.02330516084282675, | |
| "learning_rate": 1.4816000000000002e-05, | |
| "loss": 0.0003, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.608, | |
| "grad_norm": 0.0006771745351232887, | |
| "learning_rate": 1.4784000000000001e-05, | |
| "loss": 0.0002, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 2.624, | |
| "grad_norm": 0.012210357305192779, | |
| "learning_rate": 1.4752000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.03745986985753525, | |
| "learning_rate": 1.4720000000000001e-05, | |
| "loss": 0.0002, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 2.656, | |
| "grad_norm": 0.014274167297240953, | |
| "learning_rate": 1.4688000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.672, | |
| "grad_norm": 0.02447629236070352, | |
| "learning_rate": 1.4656000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 2.6879999999999997, | |
| "grad_norm": 0.0038311408711608334, | |
| "learning_rate": 1.4624000000000001e-05, | |
| "loss": 0.0003, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.7039999999999997, | |
| "grad_norm": 0.0028822241550350873, | |
| "learning_rate": 1.4592000000000002e-05, | |
| "loss": 0.0003, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 0.021839407360574662, | |
| "learning_rate": 1.4560000000000001e-05, | |
| "loss": 0.0002, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.7359999999999998, | |
| "grad_norm": 0.013328644366624556, | |
| "learning_rate": 1.4528000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 2.752, | |
| "grad_norm": 0.035287550317779214, | |
| "learning_rate": 1.4496000000000001e-05, | |
| "loss": 0.0004, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.768, | |
| "grad_norm": 0.023410051762185694, | |
| "learning_rate": 1.4464e-05, | |
| "loss": 0.0003, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 2.784, | |
| "grad_norm": 0.029501287143741794, | |
| "learning_rate": 1.4432000000000002e-05, | |
| "loss": 0.0004, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.006315006996691602, | |
| "learning_rate": 1.4400000000000001e-05, | |
| "loss": 0.0002, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 2.816, | |
| "grad_norm": 0.02024117418990644, | |
| "learning_rate": 1.4368000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.832, | |
| "grad_norm": 0.00473025507886849, | |
| "learning_rate": 1.4336000000000001e-05, | |
| "loss": 0.0003, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 2.848, | |
| "grad_norm": 0.008026314917502433, | |
| "learning_rate": 1.4304e-05, | |
| "loss": 0.0002, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.864, | |
| "grad_norm": 0.006986782846698588, | |
| "learning_rate": 1.4272000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.006623438561010072, | |
| "learning_rate": 1.4240000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.896, | |
| "grad_norm": 0.0002450911749717482, | |
| "learning_rate": 1.4208000000000002e-05, | |
| "loss": 0.0, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 2.912, | |
| "grad_norm": 0.0007552126102260173, | |
| "learning_rate": 1.4176000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.928, | |
| "grad_norm": 0.016500368158510824, | |
| "learning_rate": 1.4144000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 2.944, | |
| "grad_norm": 0.0034247242166474266, | |
| "learning_rate": 1.4112000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.005455926803494763, | |
| "learning_rate": 1.408e-05, | |
| "loss": 0.0001, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 2.976, | |
| "grad_norm": 0.00015754376252841523, | |
| "learning_rate": 1.4048000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.992, | |
| "grad_norm": 0.005293513519086162, | |
| "learning_rate": 1.4016000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 3.008, | |
| "grad_norm": 0.005723786193935275, | |
| "learning_rate": 1.3984000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 3.024, | |
| "grad_norm": 0.02024744532866248, | |
| "learning_rate": 1.3952000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 0.003499919671790885, | |
| "learning_rate": 1.392e-05, | |
| "loss": 0.0001, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.056, | |
| "grad_norm": 0.005799100557385055, | |
| "learning_rate": 1.3888000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 3.072, | |
| "grad_norm": 0.0009577375959945704, | |
| "learning_rate": 1.3856e-05, | |
| "loss": 0.0, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 3.088, | |
| "grad_norm": 0.0007578722310411947, | |
| "learning_rate": 1.3824000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 3.104, | |
| "grad_norm": 0.002338152752115609, | |
| "learning_rate": 1.3792000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.01456411564860181, | |
| "learning_rate": 1.376e-05, | |
| "loss": 0.0001, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 3.136, | |
| "grad_norm": 0.001287800520600071, | |
| "learning_rate": 1.3728000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 3.152, | |
| "grad_norm": 0.014424730029901063, | |
| "learning_rate": 1.3696e-05, | |
| "loss": 0.0001, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 3.168, | |
| "grad_norm": 0.00924258872096614, | |
| "learning_rate": 1.3664000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 3.184, | |
| "grad_norm": 0.013489603484745373, | |
| "learning_rate": 1.3632000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.006927504226900411, | |
| "learning_rate": 1.3600000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.216, | |
| "grad_norm": 0.0035318365737952973, | |
| "learning_rate": 1.3568000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 3.232, | |
| "grad_norm": 0.005177760774164178, | |
| "learning_rate": 1.3536e-05, | |
| "loss": 0.0001, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 3.248, | |
| "grad_norm": 0.000566812552570274, | |
| "learning_rate": 1.3504000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 3.2640000000000002, | |
| "grad_norm": 0.0027484984185127837, | |
| "learning_rate": 1.3472e-05, | |
| "loss": 0.0, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 3.2800000000000002, | |
| "grad_norm": 9.001619235055446e-05, | |
| "learning_rate": 1.3440000000000002e-05, | |
| "loss": 0.0, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 3.296, | |
| "grad_norm": 9.963417112520346e-05, | |
| "learning_rate": 1.3408000000000001e-05, | |
| "loss": 0.0, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 3.312, | |
| "grad_norm": 0.029154892118349613, | |
| "learning_rate": 1.3376e-05, | |
| "loss": 0.0001, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 3.328, | |
| "grad_norm": 0.029152414372463475, | |
| "learning_rate": 1.3344000000000001e-05, | |
| "loss": 0.0008, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 3.344, | |
| "grad_norm": 0.021275289896917602, | |
| "learning_rate": 1.3312e-05, | |
| "loss": 0.0006, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.009979331931202665, | |
| "learning_rate": 1.3280000000000002e-05, | |
| "loss": 0.0003, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.376, | |
| "grad_norm": 0.0028554160629077644, | |
| "learning_rate": 1.3248000000000001e-05, | |
| "loss": 0.0002, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 3.392, | |
| "grad_norm": 0.0008631163175074636, | |
| "learning_rate": 1.3216000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 3.408, | |
| "grad_norm": 0.0022088838542927364, | |
| "learning_rate": 1.3184000000000001e-05, | |
| "loss": 0.0002, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 3.424, | |
| "grad_norm": 0.004224190234212431, | |
| "learning_rate": 1.3152e-05, | |
| "loss": 0.0001, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 0.0011718751759226711, | |
| "learning_rate": 1.3120000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 3.456, | |
| "grad_norm": 0.00032887216500061996, | |
| "learning_rate": 1.3088e-05, | |
| "loss": 0.0001, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 3.472, | |
| "grad_norm": 0.001042494069534549, | |
| "learning_rate": 1.3056000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 3.488, | |
| "grad_norm": 0.0008855299783423499, | |
| "learning_rate": 1.3024000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 3.504, | |
| "grad_norm": 0.005611519836033652, | |
| "learning_rate": 1.2992e-05, | |
| "loss": 0.0001, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.0005321552377152104, | |
| "learning_rate": 1.2960000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.536, | |
| "grad_norm": 0.022067140757654112, | |
| "learning_rate": 1.2928e-05, | |
| "loss": 0.0001, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 3.552, | |
| "grad_norm": 0.02743005815099029, | |
| "learning_rate": 1.2896000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 3.568, | |
| "grad_norm": 0.006980424491381615, | |
| "learning_rate": 1.2864e-05, | |
| "loss": 0.0002, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 3.584, | |
| "grad_norm": 0.002797099415873821, | |
| "learning_rate": 1.2832e-05, | |
| "loss": 0.0001, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.02046136431371523, | |
| "learning_rate": 1.2800000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 3.616, | |
| "grad_norm": 0.01941106362099442, | |
| "learning_rate": 1.2768e-05, | |
| "loss": 0.0001, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 3.632, | |
| "grad_norm": 0.027454597931497824, | |
| "learning_rate": 1.2736000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 3.648, | |
| "grad_norm": 0.02018047493471484, | |
| "learning_rate": 1.2704e-05, | |
| "loss": 0.0002, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 3.664, | |
| "grad_norm": 0.00597845181496525, | |
| "learning_rate": 1.2672000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 0.014788477975632599, | |
| "learning_rate": 1.2640000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 3.6959999999999997, | |
| "grad_norm": 0.0011628172843709302, | |
| "learning_rate": 1.2608e-05, | |
| "loss": 0.0001, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 3.7119999999999997, | |
| "grad_norm": 0.009516688422133983, | |
| "learning_rate": 1.2576000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 3.7279999999999998, | |
| "grad_norm": 0.001028373499284717, | |
| "learning_rate": 1.2544e-05, | |
| "loss": 0.0001, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 3.7439999999999998, | |
| "grad_norm": 0.0005456313003835665, | |
| "learning_rate": 1.2512000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 0.019519987964782267, | |
| "learning_rate": 1.248e-05, | |
| "loss": 0.0001, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 3.776, | |
| "grad_norm": 0.005294491417444766, | |
| "learning_rate": 1.2448e-05, | |
| "loss": 0.0003, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 3.792, | |
| "grad_norm": 0.02184964825167138, | |
| "learning_rate": 1.2416000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 3.808, | |
| "grad_norm": 0.02738996757417856, | |
| "learning_rate": 1.2384e-05, | |
| "loss": 0.0002, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 3.824, | |
| "grad_norm": 0.012047613108118525, | |
| "learning_rate": 1.2352000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.0037495738541270694, | |
| "learning_rate": 1.232e-05, | |
| "loss": 0.0001, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.856, | |
| "grad_norm": 0.009873580970824827, | |
| "learning_rate": 1.2288e-05, | |
| "loss": 0.0001, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 3.872, | |
| "grad_norm": 0.0017604411840222716, | |
| "learning_rate": 1.2256000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 3.888, | |
| "grad_norm": 0.01674104988019884, | |
| "learning_rate": 1.2224e-05, | |
| "loss": 0.0001, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 3.904, | |
| "grad_norm": 0.001884331509122243, | |
| "learning_rate": 1.2192000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 7.670429595529145e-05, | |
| "learning_rate": 1.216e-05, | |
| "loss": 0.0, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 3.936, | |
| "grad_norm": 0.004975458011250956, | |
| "learning_rate": 1.2128000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 3.952, | |
| "grad_norm": 0.006668702945904534, | |
| "learning_rate": 1.2096e-05, | |
| "loss": 0.0001, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 3.968, | |
| "grad_norm": 0.01189326350053773, | |
| "learning_rate": 1.2064e-05, | |
| "loss": 0.0, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 3.984, | |
| "grad_norm": 9.504318727305145e-05, | |
| "learning_rate": 1.2032000000000001e-05, | |
| "loss": 0.0, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.0001022247506147829, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.0, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 4.016, | |
| "grad_norm": 4.213999803416733e-05, | |
| "learning_rate": 1.1968000000000001e-05, | |
| "loss": 0.0, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 4.032, | |
| "grad_norm": 0.00016120612272882602, | |
| "learning_rate": 1.1936e-05, | |
| "loss": 0.0, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 4.048, | |
| "grad_norm": 0.001648066553863214, | |
| "learning_rate": 1.1904e-05, | |
| "loss": 0.0, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 4.064, | |
| "grad_norm": 0.0018732158829428201, | |
| "learning_rate": 1.1872000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 5.628979108254126e-05, | |
| "learning_rate": 1.184e-05, | |
| "loss": 0.0, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 4.096, | |
| "grad_norm": 0.019032461176938534, | |
| "learning_rate": 1.1808000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 4.112, | |
| "grad_norm": 0.0016313949670923428, | |
| "learning_rate": 1.1776e-05, | |
| "loss": 0.0, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 4.128, | |
| "grad_norm": 0.0015446847482196097, | |
| "learning_rate": 1.1744000000000001e-05, | |
| "loss": 0.0, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 4.144, | |
| "grad_norm": 0.0036344510964935644, | |
| "learning_rate": 1.1712e-05, | |
| "loss": 0.0001, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.00011213733550717207, | |
| "learning_rate": 1.168e-05, | |
| "loss": 0.0, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 4.176, | |
| "grad_norm": 0.009241406559372695, | |
| "learning_rate": 1.1648000000000001e-05, | |
| "loss": 0.0, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 4.192, | |
| "grad_norm": 0.004707319381909862, | |
| "learning_rate": 1.1616e-05, | |
| "loss": 0.0001, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 4.208, | |
| "grad_norm": 0.001471958485689987, | |
| "learning_rate": 1.1584000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 4.224, | |
| "grad_norm": 0.005859122648703548, | |
| "learning_rate": 1.1552e-05, | |
| "loss": 0.0001, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 0.012818649271253849, | |
| "learning_rate": 1.152e-05, | |
| "loss": 0.0001, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 4.256, | |
| "grad_norm": 0.0037211861585128617, | |
| "learning_rate": 1.1488e-05, | |
| "loss": 0.0001, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 4.272, | |
| "grad_norm": 0.07015941754753528, | |
| "learning_rate": 1.1456e-05, | |
| "loss": 0.0001, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 4.288, | |
| "grad_norm": 0.035187482539997594, | |
| "learning_rate": 1.1424000000000001e-05, | |
| "loss": 0.0003, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 4.304, | |
| "grad_norm": 0.019999557778834116, | |
| "learning_rate": 1.1392e-05, | |
| "loss": 0.0004, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.0345649880873456, | |
| "learning_rate": 1.136e-05, | |
| "loss": 0.0006, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 4.336, | |
| "grad_norm": 0.017367529665425263, | |
| "learning_rate": 1.1328e-05, | |
| "loss": 0.0006, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 4.352, | |
| "grad_norm": 0.023709723324916443, | |
| "learning_rate": 1.1296e-05, | |
| "loss": 0.0003, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 4.368, | |
| "grad_norm": 0.018413464418327773, | |
| "learning_rate": 1.1264000000000001e-05, | |
| "loss": 0.0002, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 4.384, | |
| "grad_norm": 0.0018201928868281155, | |
| "learning_rate": 1.1232e-05, | |
| "loss": 0.0002, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.00822419431789589, | |
| "learning_rate": 1.1200000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 4.416, | |
| "grad_norm": 0.006415739011886406, | |
| "learning_rate": 1.1168e-05, | |
| "loss": 0.0001, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 4.432, | |
| "grad_norm": 0.0004659247582312783, | |
| "learning_rate": 1.1136e-05, | |
| "loss": 0.0001, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 4.448, | |
| "grad_norm": 0.0005946738812863657, | |
| "learning_rate": 1.1104e-05, | |
| "loss": 0.0001, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 4.464, | |
| "grad_norm": 0.00046156276112608923, | |
| "learning_rate": 1.1072e-05, | |
| "loss": 0.0001, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.0005836721479758474, | |
| "learning_rate": 1.1040000000000001e-05, | |
| "loss": 0.0001, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.496, | |
| "grad_norm": 0.001344964325089982, | |
| "learning_rate": 1.1008e-05, | |
| "loss": 0.0001, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 4.5120000000000005, | |
| "grad_norm": 0.005801872303888594, | |
| "learning_rate": 1.0976e-05, | |
| "loss": 0.0001, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 4.5280000000000005, | |
| "grad_norm": 9.837216618840086e-05, | |
| "learning_rate": 1.0944e-05, | |
| "loss": 0.0, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 4.5440000000000005, | |
| "grad_norm": 7.737183380151301e-05, | |
| "learning_rate": 1.0912e-05, | |
| "loss": 0.0, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 4.5600000000000005, | |
| "grad_norm": 0.0009713119715969353, | |
| "learning_rate": 1.0880000000000001e-05, | |
| "loss": 0.0, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 4.576, | |
| "grad_norm": 0.005069492905364405, | |
| "learning_rate": 1.0848e-05, | |
| "loss": 0.0, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 4.592, | |
| "grad_norm": 3.2989301849193866e-05, | |
| "learning_rate": 1.0816e-05, | |
| "loss": 0.0, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 4.608, | |
| "grad_norm": 0.0015411767068381028, | |
| "learning_rate": 1.0784e-05, | |
| "loss": 0.0, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 4.624, | |
| "grad_norm": 0.0029576402483524416, | |
| "learning_rate": 1.0752e-05, | |
| "loss": 0.0, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 0.0033406485333461813, | |
| "learning_rate": 1.072e-05, | |
| "loss": 0.0, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 4.656, | |
| "grad_norm": 0.00019611919936137406, | |
| "learning_rate": 1.0688e-05, | |
| "loss": 0.0, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 4.672, | |
| "grad_norm": 0.0019765899286054755, | |
| "learning_rate": 1.0656000000000003e-05, | |
| "loss": 0.0, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 4.688, | |
| "grad_norm": 0.0005709409541646127, | |
| "learning_rate": 1.0624e-05, | |
| "loss": 0.0, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 4.704, | |
| "grad_norm": 0.0023491377959600473, | |
| "learning_rate": 1.0592e-05, | |
| "loss": 0.0, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 0.0011388200727303454, | |
| "learning_rate": 1.056e-05, | |
| "loss": 0.0, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 4.736, | |
| "grad_norm": 0.026118986062423813, | |
| "learning_rate": 1.0528e-05, | |
| "loss": 0.0001, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 4.752, | |
| "grad_norm": 0.0029740973486947135, | |
| "learning_rate": 1.0496000000000003e-05, | |
| "loss": 0.0002, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 4.768, | |
| "grad_norm": 0.017336503189413394, | |
| "learning_rate": 1.0464e-05, | |
| "loss": 0.0001, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 4.784, | |
| "grad_norm": 0.02504986677595465, | |
| "learning_rate": 1.0432e-05, | |
| "loss": 0.0002, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.007437099599438055, | |
| "learning_rate": 1.04e-05, | |
| "loss": 0.0002, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.816, | |
| "grad_norm": 0.020369582876539328, | |
| "learning_rate": 1.0368e-05, | |
| "loss": 0.0001, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 4.832, | |
| "grad_norm": 0.0006272672761587281, | |
| "learning_rate": 1.0336000000000002e-05, | |
| "loss": 0.0002, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 4.848, | |
| "grad_norm": 0.006047362095922896, | |
| "learning_rate": 1.0304e-05, | |
| "loss": 0.0001, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 4.864, | |
| "grad_norm": 0.015672098166329012, | |
| "learning_rate": 1.0272e-05, | |
| "loss": 0.0001, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 0.0030277987193269465, | |
| "learning_rate": 1.024e-05, | |
| "loss": 0.0, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 4.896, | |
| "grad_norm": 0.0010616116192301943, | |
| "learning_rate": 1.0208e-05, | |
| "loss": 0.0001, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 4.912, | |
| "grad_norm": 0.0007680822239626422, | |
| "learning_rate": 1.0176000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 4.928, | |
| "grad_norm": 0.01025708941048719, | |
| "learning_rate": 1.0144e-05, | |
| "loss": 0.0001, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 4.944, | |
| "grad_norm": 0.011307795907471767, | |
| "learning_rate": 1.0112000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 0.0004601284057725081, | |
| "learning_rate": 1.008e-05, | |
| "loss": 0.0, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 4.976, | |
| "grad_norm": 0.010856337170473403, | |
| "learning_rate": 1.0048e-05, | |
| "loss": 0.0, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 4.992, | |
| "grad_norm": 0.00015313333402904193, | |
| "learning_rate": 1.0016000000000002e-05, | |
| "loss": 0.0001, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 5.008, | |
| "grad_norm": 0.0016422517703522808, | |
| "learning_rate": 9.984e-06, | |
| "loss": 0.0, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 5.024, | |
| "grad_norm": 0.00017065965209302168, | |
| "learning_rate": 9.952e-06, | |
| "loss": 0.0, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 0.0023982386902955902, | |
| "learning_rate": 9.920000000000002e-06, | |
| "loss": 0.0001, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 5.056, | |
| "grad_norm": 0.0001370875982310932, | |
| "learning_rate": 9.888000000000001e-06, | |
| "loss": 0.0, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 5.072, | |
| "grad_norm": 2.682877220003173e-05, | |
| "learning_rate": 9.856000000000002e-06, | |
| "loss": 0.0, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 5.088, | |
| "grad_norm": 5.0994540698164586e-05, | |
| "learning_rate": 9.824000000000001e-06, | |
| "loss": 0.0, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 5.104, | |
| "grad_norm": 4.2441360877384374e-05, | |
| "learning_rate": 9.792e-06, | |
| "loss": 0.0, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "grad_norm": 0.006419921838441735, | |
| "learning_rate": 9.760000000000001e-06, | |
| "loss": 0.0, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 5.136, | |
| "grad_norm": 0.0032020770761266313, | |
| "learning_rate": 9.728e-06, | |
| "loss": 0.0, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 5.152, | |
| "grad_norm": 0.0001985282041764814, | |
| "learning_rate": 9.696000000000002e-06, | |
| "loss": 0.0, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 5.168, | |
| "grad_norm": 4.7798686858909734e-05, | |
| "learning_rate": 9.664000000000001e-06, | |
| "loss": 0.0, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 5.184, | |
| "grad_norm": 0.0005436583145042151, | |
| "learning_rate": 9.632e-06, | |
| "loss": 0.0, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 4.789698124498285e-05, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 0.0, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 5.216, | |
| "grad_norm": 6.669996716920356e-05, | |
| "learning_rate": 9.568e-06, | |
| "loss": 0.0, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 5.232, | |
| "grad_norm": 2.015998723514938e-05, | |
| "learning_rate": 9.536000000000002e-06, | |
| "loss": 0.0, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 5.248, | |
| "grad_norm": 2.566916763899653e-05, | |
| "learning_rate": 9.504e-06, | |
| "loss": 0.0, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 5.264, | |
| "grad_norm": 6.973559986609284e-05, | |
| "learning_rate": 9.472000000000002e-06, | |
| "loss": 0.0, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "grad_norm": 0.005275686664510083, | |
| "learning_rate": 9.440000000000001e-06, | |
| "loss": 0.0, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 5.296, | |
| "grad_norm": 0.0004379785026117808, | |
| "learning_rate": 9.408e-06, | |
| "loss": 0.0, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 5.312, | |
| "grad_norm": 7.224360874307222e-05, | |
| "learning_rate": 9.376000000000001e-06, | |
| "loss": 0.0, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 5.328, | |
| "grad_norm": 0.005529688027168001, | |
| "learning_rate": 9.344e-06, | |
| "loss": 0.0001, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 5.344, | |
| "grad_norm": 0.00035705853499429783, | |
| "learning_rate": 9.312000000000002e-06, | |
| "loss": 0.0001, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "grad_norm": 0.004595469236532978, | |
| "learning_rate": 9.280000000000001e-06, | |
| "loss": 0.0002, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 5.376, | |
| "grad_norm": 0.014518935982881342, | |
| "learning_rate": 9.248e-06, | |
| "loss": 0.0002, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 5.392, | |
| "grad_norm": 0.005664058901176576, | |
| "learning_rate": 9.216000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 5.408, | |
| "grad_norm": 0.02406436319347112, | |
| "learning_rate": 9.184e-06, | |
| "loss": 0.0002, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 5.424, | |
| "grad_norm": 0.010408753408708865, | |
| "learning_rate": 9.152000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 0.004380210866505748, | |
| "learning_rate": 9.12e-06, | |
| "loss": 0.0001, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 5.456, | |
| "grad_norm": 0.0007477014672070001, | |
| "learning_rate": 9.088000000000002e-06, | |
| "loss": 0.0001, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 5.4719999999999995, | |
| "grad_norm": 0.004612394953866417, | |
| "learning_rate": 9.056000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 5.4879999999999995, | |
| "grad_norm": 0.00042378436163973034, | |
| "learning_rate": 9.024e-06, | |
| "loss": 0.0001, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 5.504, | |
| "grad_norm": 0.00019529144567278435, | |
| "learning_rate": 8.992000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 0.0027044303589215226, | |
| "learning_rate": 8.96e-06, | |
| "loss": 0.0, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 5.536, | |
| "grad_norm": 0.004170447127179762, | |
| "learning_rate": 8.928000000000002e-06, | |
| "loss": 0.0, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 5.552, | |
| "grad_norm": 0.0011583744674708582, | |
| "learning_rate": 8.896000000000001e-06, | |
| "loss": 0.0, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 5.568, | |
| "grad_norm": 7.505329528247209e-05, | |
| "learning_rate": 8.864e-06, | |
| "loss": 0.0, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 5.584, | |
| "grad_norm": 1.6692639755468825e-05, | |
| "learning_rate": 8.832000000000001e-06, | |
| "loss": 0.0, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 3.792640837382307e-05, | |
| "learning_rate": 8.8e-06, | |
| "loss": 0.0, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 5.616, | |
| "grad_norm": 0.0034862027436583023, | |
| "learning_rate": 8.768000000000001e-06, | |
| "loss": 0.0, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 5.632, | |
| "grad_norm": 0.002044696169017184, | |
| "learning_rate": 8.736e-06, | |
| "loss": 0.0, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 5.648, | |
| "grad_norm": 0.0016480723423270521, | |
| "learning_rate": 8.704e-06, | |
| "loss": 0.0, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 5.664, | |
| "grad_norm": 5.046250365736628e-05, | |
| "learning_rate": 8.672000000000001e-06, | |
| "loss": 0.0, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "grad_norm": 0.002490207831005637, | |
| "learning_rate": 8.64e-06, | |
| "loss": 0.0, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 5.696, | |
| "grad_norm": 2.3620844090189842e-05, | |
| "learning_rate": 8.608000000000001e-06, | |
| "loss": 0.0, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 5.712, | |
| "grad_norm": 0.005341011948506022, | |
| "learning_rate": 8.576e-06, | |
| "loss": 0.0, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 5.728, | |
| "grad_norm": 0.001274874971425699, | |
| "learning_rate": 8.544000000000002e-06, | |
| "loss": 0.0, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 5.744, | |
| "grad_norm": 0.005544577257881219, | |
| "learning_rate": 8.512e-06, | |
| "loss": 0.0, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 0.001014221724791846, | |
| "learning_rate": 8.48e-06, | |
| "loss": 0.0, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 5.776, | |
| "grad_norm": 0.0034755696845187607, | |
| "learning_rate": 8.448000000000001e-06, | |
| "loss": 0.0, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 5.792, | |
| "grad_norm": 0.0035076131759202932, | |
| "learning_rate": 8.416e-06, | |
| "loss": 0.0, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 5.808, | |
| "grad_norm": 1.798515357665533e-05, | |
| "learning_rate": 8.384000000000001e-06, | |
| "loss": 0.0, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 5.824, | |
| "grad_norm": 0.008807581853205114, | |
| "learning_rate": 8.352e-06, | |
| "loss": 0.0, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "grad_norm": 0.004372862354976277, | |
| "learning_rate": 8.32e-06, | |
| "loss": 0.0, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 5.856, | |
| "grad_norm": 0.004218059627149101, | |
| "learning_rate": 8.288000000000001e-06, | |
| "loss": 0.0, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 5.872, | |
| "grad_norm": 0.003638369573242274, | |
| "learning_rate": 8.256e-06, | |
| "loss": 0.0, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 5.888, | |
| "grad_norm": 5.7538041224172105e-06, | |
| "learning_rate": 8.224000000000001e-06, | |
| "loss": 0.0, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 5.904, | |
| "grad_norm": 6.190202276164088e-06, | |
| "learning_rate": 8.192e-06, | |
| "loss": 0.0, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 0.003102560253484021, | |
| "learning_rate": 8.16e-06, | |
| "loss": 0.0, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 5.936, | |
| "grad_norm": 0.0030438170806869475, | |
| "learning_rate": 8.128e-06, | |
| "loss": 0.0, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 5.952, | |
| "grad_norm": 0.004504233459517902, | |
| "learning_rate": 8.096e-06, | |
| "loss": 0.0, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 5.968, | |
| "grad_norm": 0.004759229281040865, | |
| "learning_rate": 8.064000000000001e-06, | |
| "loss": 0.0, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 5.984, | |
| "grad_norm": 7.723498841261772e-06, | |
| "learning_rate": 8.032e-06, | |
| "loss": 0.0, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 5.552452532832903e-06, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.0, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 6.016, | |
| "grad_norm": 2.597337502435462e-05, | |
| "learning_rate": 7.968e-06, | |
| "loss": 0.0, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 6.032, | |
| "grad_norm": 0.009335540220574937, | |
| "learning_rate": 7.936e-06, | |
| "loss": 0.0, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 6.048, | |
| "grad_norm": 7.524061298947454e-06, | |
| "learning_rate": 7.904000000000001e-06, | |
| "loss": 0.0, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 6.064, | |
| "grad_norm": 0.00015327762567155916, | |
| "learning_rate": 7.872e-06, | |
| "loss": 0.0, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 2.3439642609130923e-05, | |
| "learning_rate": 7.840000000000001e-06, | |
| "loss": 0.0, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 6.096, | |
| "grad_norm": 6.72333328204891e-05, | |
| "learning_rate": 7.808e-06, | |
| "loss": 0.0, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 6.112, | |
| "grad_norm": 0.004346096242491425, | |
| "learning_rate": 7.776e-06, | |
| "loss": 0.0, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 6.128, | |
| "grad_norm": 0.012622596321264466, | |
| "learning_rate": 7.744e-06, | |
| "loss": 0.0001, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 6.144, | |
| "grad_norm": 0.008790621292549712, | |
| "learning_rate": 7.712e-06, | |
| "loss": 0.0002, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "grad_norm": 0.004938158345308147, | |
| "learning_rate": 7.680000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 6.176, | |
| "grad_norm": 0.018473675290916154, | |
| "learning_rate": 7.648e-06, | |
| "loss": 0.0002, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 6.192, | |
| "grad_norm": 0.022804212260300934, | |
| "learning_rate": 7.616000000000001e-06, | |
| "loss": 0.0002, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 6.208, | |
| "grad_norm": 0.003592326856058541, | |
| "learning_rate": 7.5840000000000006e-06, | |
| "loss": 0.0001, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 6.224, | |
| "grad_norm": 0.047396594119550994, | |
| "learning_rate": 7.552000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "grad_norm": 0.009235389481528054, | |
| "learning_rate": 7.520000000000001e-06, | |
| "loss": 0.0002, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 6.256, | |
| "grad_norm": 0.01434618749682531, | |
| "learning_rate": 7.488000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 6.272, | |
| "grad_norm": 0.0021773822077626793, | |
| "learning_rate": 7.456000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 6.288, | |
| "grad_norm": 0.002663391021728829, | |
| "learning_rate": 7.424e-06, | |
| "loss": 0.0, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 6.304, | |
| "grad_norm": 0.00011747154952013698, | |
| "learning_rate": 7.3920000000000005e-06, | |
| "loss": 0.0, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "grad_norm": 0.0008636079746021932, | |
| "learning_rate": 7.360000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 6.336, | |
| "grad_norm": 0.0026435008452155074, | |
| "learning_rate": 7.328000000000001e-06, | |
| "loss": 0.0, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 6.352, | |
| "grad_norm": 0.0005814556237841476, | |
| "learning_rate": 7.296000000000001e-06, | |
| "loss": 0.0, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 6.368, | |
| "grad_norm": 0.001405603964445953, | |
| "learning_rate": 7.264000000000001e-06, | |
| "loss": 0.0, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 6.384, | |
| "grad_norm": 0.0016513732086724116, | |
| "learning_rate": 7.232e-06, | |
| "loss": 0.0, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 0.0034893201733840937, | |
| "learning_rate": 7.2000000000000005e-06, | |
| "loss": 0.0, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 6.416, | |
| "grad_norm": 0.00857451483231717, | |
| "learning_rate": 7.168000000000001e-06, | |
| "loss": 0.0, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 6.432, | |
| "grad_norm": 2.4199606884085253e-05, | |
| "learning_rate": 7.136000000000001e-06, | |
| "loss": 0.0, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 6.448, | |
| "grad_norm": 3.250175417169871e-05, | |
| "learning_rate": 7.104000000000001e-06, | |
| "loss": 0.0, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 6.464, | |
| "grad_norm": 0.005732826853229158, | |
| "learning_rate": 7.072000000000001e-06, | |
| "loss": 0.0, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "grad_norm": 0.00010353949349009911, | |
| "learning_rate": 7.04e-06, | |
| "loss": 0.0, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 6.496, | |
| "grad_norm": 0.005592962017534361, | |
| "learning_rate": 7.0080000000000005e-06, | |
| "loss": 0.0, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 6.5120000000000005, | |
| "grad_norm": 1.8109719094913944e-05, | |
| "learning_rate": 6.976000000000001e-06, | |
| "loss": 0.0, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 6.5280000000000005, | |
| "grad_norm": 0.0032198732263617894, | |
| "learning_rate": 6.944000000000001e-06, | |
| "loss": 0.0, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 6.5440000000000005, | |
| "grad_norm": 0.005473202984385111, | |
| "learning_rate": 6.912000000000001e-06, | |
| "loss": 0.0, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 6.5600000000000005, | |
| "grad_norm": 0.005380069417154803, | |
| "learning_rate": 6.88e-06, | |
| "loss": 0.0, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 6.576, | |
| "grad_norm": 0.003232314271766742, | |
| "learning_rate": 6.848e-06, | |
| "loss": 0.0, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 6.592, | |
| "grad_norm": 0.0036696087338221385, | |
| "learning_rate": 6.8160000000000005e-06, | |
| "loss": 0.0, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 6.608, | |
| "grad_norm": 0.0026588039107233487, | |
| "learning_rate": 6.784000000000001e-06, | |
| "loss": 0.0, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 6.624, | |
| "grad_norm": 8.99182338038509e-06, | |
| "learning_rate": 6.752000000000001e-06, | |
| "loss": 0.0, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "grad_norm": 8.563056421425572e-06, | |
| "learning_rate": 6.720000000000001e-06, | |
| "loss": 0.0, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 6.656, | |
| "grad_norm": 0.00037207476131123044, | |
| "learning_rate": 6.688e-06, | |
| "loss": 0.0, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 6.672, | |
| "grad_norm": 0.0007496876124411418, | |
| "learning_rate": 6.656e-06, | |
| "loss": 0.0, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 6.688, | |
| "grad_norm": 0.0031456413963673368, | |
| "learning_rate": 6.6240000000000004e-06, | |
| "loss": 0.0, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 6.704, | |
| "grad_norm": 0.0013961928602198664, | |
| "learning_rate": 6.592000000000001e-06, | |
| "loss": 0.0, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "grad_norm": 0.006857717806312309, | |
| "learning_rate": 6.560000000000001e-06, | |
| "loss": 0.0, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 6.736, | |
| "grad_norm": 0.002786967933254842, | |
| "learning_rate": 6.528000000000001e-06, | |
| "loss": 0.0, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 6.752, | |
| "grad_norm": 5.396561393846106e-06, | |
| "learning_rate": 6.496e-06, | |
| "loss": 0.0, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 6.768, | |
| "grad_norm": 9.46138673561178e-06, | |
| "learning_rate": 6.464e-06, | |
| "loss": 0.0, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 6.784, | |
| "grad_norm": 0.0023837669652299874, | |
| "learning_rate": 6.432e-06, | |
| "loss": 0.0, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 1.05038503813717e-05, | |
| "learning_rate": 6.4000000000000006e-06, | |
| "loss": 0.0, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 6.816, | |
| "grad_norm": 0.0005776930285178894, | |
| "learning_rate": 6.368000000000001e-06, | |
| "loss": 0.0, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 6.832, | |
| "grad_norm": 9.139296410215242e-06, | |
| "learning_rate": 6.336000000000001e-06, | |
| "loss": 0.0, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 6.848, | |
| "grad_norm": 0.0021852339939074364, | |
| "learning_rate": 6.304e-06, | |
| "loss": 0.0, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 6.864, | |
| "grad_norm": 0.005150959364319729, | |
| "learning_rate": 6.272e-06, | |
| "loss": 0.0, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "grad_norm": 5.175095074364956e-06, | |
| "learning_rate": 6.24e-06, | |
| "loss": 0.0, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 6.896, | |
| "grad_norm": 0.004969801241634378, | |
| "learning_rate": 6.2080000000000005e-06, | |
| "loss": 0.0, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 6.912, | |
| "grad_norm": 0.0008299473866216342, | |
| "learning_rate": 6.176000000000001e-06, | |
| "loss": 0.0, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 6.928, | |
| "grad_norm": 0.0017992643848965695, | |
| "learning_rate": 6.144e-06, | |
| "loss": 0.0, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 6.944, | |
| "grad_norm": 5.259612030448573e-06, | |
| "learning_rate": 6.112e-06, | |
| "loss": 0.0, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "grad_norm": 6.964401320931689e-06, | |
| "learning_rate": 6.08e-06, | |
| "loss": 0.0, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 6.976, | |
| "grad_norm": 5.5934708883321015e-06, | |
| "learning_rate": 6.048e-06, | |
| "loss": 0.0, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 6.992, | |
| "grad_norm": 0.0003940255704147256, | |
| "learning_rate": 6.0160000000000005e-06, | |
| "loss": 0.0, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 7.008, | |
| "grad_norm": 5.435289572651918e-06, | |
| "learning_rate": 5.984000000000001e-06, | |
| "loss": 0.0, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 7.024, | |
| "grad_norm": 0.0042470175422726604, | |
| "learning_rate": 5.952e-06, | |
| "loss": 0.0, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "grad_norm": 7.335645814032098e-06, | |
| "learning_rate": 5.92e-06, | |
| "loss": 0.0, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 7.056, | |
| "grad_norm": 0.0009590486824500321, | |
| "learning_rate": 5.888e-06, | |
| "loss": 0.0, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 7.072, | |
| "grad_norm": 5.906713961463055e-06, | |
| "learning_rate": 5.856e-06, | |
| "loss": 0.0, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 7.088, | |
| "grad_norm": 0.004176688566628433, | |
| "learning_rate": 5.8240000000000005e-06, | |
| "loss": 0.0, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 7.104, | |
| "grad_norm": 0.0032506704819596282, | |
| "learning_rate": 5.792000000000001e-06, | |
| "loss": 0.0, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "grad_norm": 0.0005990950882116851, | |
| "learning_rate": 5.76e-06, | |
| "loss": 0.0, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 7.136, | |
| "grad_norm": 0.002619844073095375, | |
| "learning_rate": 5.728e-06, | |
| "loss": 0.0, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 7.152, | |
| "grad_norm": 0.000133825406762409, | |
| "learning_rate": 5.696e-06, | |
| "loss": 0.0, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 7.168, | |
| "grad_norm": 7.589758778816755e-06, | |
| "learning_rate": 5.664e-06, | |
| "loss": 0.0, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 7.184, | |
| "grad_norm": 0.0037077771258705637, | |
| "learning_rate": 5.6320000000000005e-06, | |
| "loss": 0.0, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 6.45656311225853e-05, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 0.0, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 7.216, | |
| "grad_norm": 3.53352659793062e-06, | |
| "learning_rate": 5.568e-06, | |
| "loss": 0.0, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 7.232, | |
| "grad_norm": 4.956625745019511e-06, | |
| "learning_rate": 5.536e-06, | |
| "loss": 0.0, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 7.248, | |
| "grad_norm": 3.5972044070382584e-06, | |
| "learning_rate": 5.504e-06, | |
| "loss": 0.0, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 7.264, | |
| "grad_norm": 3.952736091142475e-06, | |
| "learning_rate": 5.472e-06, | |
| "loss": 0.0, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "grad_norm": 0.002344121199927973, | |
| "learning_rate": 5.4400000000000004e-06, | |
| "loss": 0.0, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 7.296, | |
| "grad_norm": 0.0014181046917897458, | |
| "learning_rate": 5.408e-06, | |
| "loss": 0.0, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 7.312, | |
| "grad_norm": 3.673993878498047e-06, | |
| "learning_rate": 5.376e-06, | |
| "loss": 0.0, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 7.328, | |
| "grad_norm": 0.0007035424885323912, | |
| "learning_rate": 5.344e-06, | |
| "loss": 0.0, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 7.344, | |
| "grad_norm": 3.4640649140748472e-06, | |
| "learning_rate": 5.312e-06, | |
| "loss": 0.0, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "grad_norm": 3.397764375456797e-06, | |
| "learning_rate": 5.28e-06, | |
| "loss": 0.0, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 7.376, | |
| "grad_norm": 6.985809741397173e-06, | |
| "learning_rate": 5.248000000000001e-06, | |
| "loss": 0.0, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 7.392, | |
| "grad_norm": 0.004727778554217135, | |
| "learning_rate": 5.216e-06, | |
| "loss": 0.0, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 7.408, | |
| "grad_norm": 0.00034319356861871504, | |
| "learning_rate": 5.184e-06, | |
| "loss": 0.0, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 7.424, | |
| "grad_norm": 0.002707354453454117, | |
| "learning_rate": 5.152e-06, | |
| "loss": 0.0, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "grad_norm": 0.003741433501999256, | |
| "learning_rate": 5.12e-06, | |
| "loss": 0.0, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 7.456, | |
| "grad_norm": 0.004313866605554443, | |
| "learning_rate": 5.088000000000001e-06, | |
| "loss": 0.0, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 7.4719999999999995, | |
| "grad_norm": 5.048162041463982e-06, | |
| "learning_rate": 5.056000000000001e-06, | |
| "loss": 0.0, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 7.4879999999999995, | |
| "grad_norm": 0.0025475153276506386, | |
| "learning_rate": 5.024e-06, | |
| "loss": 0.0, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 7.504, | |
| "grad_norm": 0.0031833329888314825, | |
| "learning_rate": 4.992e-06, | |
| "loss": 0.0, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "grad_norm": 6.705201147750263e-06, | |
| "learning_rate": 4.960000000000001e-06, | |
| "loss": 0.0, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 7.536, | |
| "grad_norm": 5.39106789868296e-06, | |
| "learning_rate": 4.928000000000001e-06, | |
| "loss": 0.0, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 7.552, | |
| "grad_norm": 0.003626501302770346, | |
| "learning_rate": 4.896e-06, | |
| "loss": 0.0, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 7.568, | |
| "grad_norm": 1.0052769842061194e-05, | |
| "learning_rate": 4.864e-06, | |
| "loss": 0.0, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 7.584, | |
| "grad_norm": 5.916920444277189e-06, | |
| "learning_rate": 4.8320000000000005e-06, | |
| "loss": 0.0, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "grad_norm": 0.004523885580121961, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 0.0, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 7.616, | |
| "grad_norm": 0.00013354452021234782, | |
| "learning_rate": 4.768000000000001e-06, | |
| "loss": 0.0, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 7.632, | |
| "grad_norm": 0.0012276488785534887, | |
| "learning_rate": 4.736000000000001e-06, | |
| "loss": 0.0, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 7.648, | |
| "grad_norm": 4.502912527446804e-06, | |
| "learning_rate": 4.704e-06, | |
| "loss": 0.0, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 7.664, | |
| "grad_norm": 0.0006608232019348104, | |
| "learning_rate": 4.672e-06, | |
| "loss": 0.0, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "grad_norm": 4.088988845255269e-06, | |
| "learning_rate": 4.6400000000000005e-06, | |
| "loss": 0.0, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 7.696, | |
| "grad_norm": 0.0003390975010046968, | |
| "learning_rate": 4.608000000000001e-06, | |
| "loss": 0.0, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 7.712, | |
| "grad_norm": 0.002508565709210886, | |
| "learning_rate": 4.576000000000001e-06, | |
| "loss": 0.0, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 7.728, | |
| "grad_norm": 0.0015288194430780194, | |
| "learning_rate": 4.544000000000001e-06, | |
| "loss": 0.0, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 7.744, | |
| "grad_norm": 0.001448906925195681, | |
| "learning_rate": 4.512e-06, | |
| "loss": 0.0, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "grad_norm": 0.0021363762770405994, | |
| "learning_rate": 4.48e-06, | |
| "loss": 0.0, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 7.776, | |
| "grad_norm": 0.00031198434311475884, | |
| "learning_rate": 4.4480000000000004e-06, | |
| "loss": 0.0, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 7.792, | |
| "grad_norm": 9.145186445762476e-05, | |
| "learning_rate": 4.416000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 7.808, | |
| "grad_norm": 0.0009402193973337792, | |
| "learning_rate": 4.384000000000001e-06, | |
| "loss": 0.0001, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 7.824, | |
| "grad_norm": 0.00261922858427188, | |
| "learning_rate": 4.352e-06, | |
| "loss": 0.0001, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "grad_norm": 0.0007561662124930203, | |
| "learning_rate": 4.32e-06, | |
| "loss": 0.0001, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 7.856, | |
| "grad_norm": 0.0011904195012080216, | |
| "learning_rate": 4.288e-06, | |
| "loss": 0.0001, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 7.872, | |
| "grad_norm": 0.00682322933598015, | |
| "learning_rate": 4.256e-06, | |
| "loss": 0.0, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 7.888, | |
| "grad_norm": 1.4918149837028124e-05, | |
| "learning_rate": 4.2240000000000006e-06, | |
| "loss": 0.0, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 7.904, | |
| "grad_norm": 3.7896845336326056e-05, | |
| "learning_rate": 4.192000000000001e-06, | |
| "loss": 0.0, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "grad_norm": 1.4233998621857135e-05, | |
| "learning_rate": 4.16e-06, | |
| "loss": 0.0, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 7.936, | |
| "grad_norm": 1.3512330101803153e-05, | |
| "learning_rate": 4.128e-06, | |
| "loss": 0.0, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 7.952, | |
| "grad_norm": 0.0017622882875680194, | |
| "learning_rate": 4.096e-06, | |
| "loss": 0.0, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 7.968, | |
| "grad_norm": 0.004063890556109565, | |
| "learning_rate": 4.064e-06, | |
| "loss": 0.0, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 7.984, | |
| "grad_norm": 0.006146752020903372, | |
| "learning_rate": 4.0320000000000005e-06, | |
| "loss": 0.0, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.0045951480519814975, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.0, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 8.016, | |
| "grad_norm": 1.105999334127264e-05, | |
| "learning_rate": 3.968e-06, | |
| "loss": 0.0, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 8.032, | |
| "grad_norm": 1.4322006340633164e-05, | |
| "learning_rate": 3.936e-06, | |
| "loss": 0.0, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 8.048, | |
| "grad_norm": 0.00459590439207981, | |
| "learning_rate": 3.904e-06, | |
| "loss": 0.0, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 8.064, | |
| "grad_norm": 6.626585512636433e-06, | |
| "learning_rate": 3.872e-06, | |
| "loss": 0.0, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "grad_norm": 3.062805741448072e-05, | |
| "learning_rate": 3.8400000000000005e-06, | |
| "loss": 0.0, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 8.096, | |
| "grad_norm": 0.00139179287638531, | |
| "learning_rate": 3.8080000000000006e-06, | |
| "loss": 0.0, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 8.112, | |
| "grad_norm": 3.0149777542350257e-05, | |
| "learning_rate": 3.7760000000000004e-06, | |
| "loss": 0.0, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 8.128, | |
| "grad_norm": 0.003113858325217473, | |
| "learning_rate": 3.7440000000000005e-06, | |
| "loss": 0.0, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 8.144, | |
| "grad_norm": 6.947925699992271e-06, | |
| "learning_rate": 3.712e-06, | |
| "loss": 0.0, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "grad_norm": 0.002796916584528065, | |
| "learning_rate": 3.6800000000000003e-06, | |
| "loss": 0.0, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 8.176, | |
| "grad_norm": 6.18407640833407e-06, | |
| "learning_rate": 3.6480000000000005e-06, | |
| "loss": 0.0, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 8.192, | |
| "grad_norm": 3.985635768893783e-06, | |
| "learning_rate": 3.616e-06, | |
| "loss": 0.0, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 8.208, | |
| "grad_norm": 4.306104409089461e-06, | |
| "learning_rate": 3.5840000000000003e-06, | |
| "loss": 0.0, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 8.224, | |
| "grad_norm": 1.806410872336378e-05, | |
| "learning_rate": 3.5520000000000005e-06, | |
| "loss": 0.0, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "grad_norm": 3.194958556657671e-05, | |
| "learning_rate": 3.52e-06, | |
| "loss": 0.0, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 8.256, | |
| "grad_norm": 0.0027543054021635484, | |
| "learning_rate": 3.4880000000000003e-06, | |
| "loss": 0.0, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 8.272, | |
| "grad_norm": 0.0005772419217733996, | |
| "learning_rate": 3.4560000000000005e-06, | |
| "loss": 0.0, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 8.288, | |
| "grad_norm": 0.00030919843905252405, | |
| "learning_rate": 3.424e-06, | |
| "loss": 0.0, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 8.304, | |
| "grad_norm": 4.280920759753649e-06, | |
| "learning_rate": 3.3920000000000003e-06, | |
| "loss": 0.0, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "grad_norm": 8.10017575098728e-06, | |
| "learning_rate": 3.3600000000000004e-06, | |
| "loss": 0.0, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 8.336, | |
| "grad_norm": 0.0028206230272121986, | |
| "learning_rate": 3.328e-06, | |
| "loss": 0.0, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 8.352, | |
| "grad_norm": 0.002696578509199753, | |
| "learning_rate": 3.2960000000000003e-06, | |
| "loss": 0.0, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 8.368, | |
| "grad_norm": 4.570628348792391e-06, | |
| "learning_rate": 3.2640000000000004e-06, | |
| "loss": 0.0, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 8.384, | |
| "grad_norm": 5.1947235707447225e-06, | |
| "learning_rate": 3.232e-06, | |
| "loss": 0.0, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "grad_norm": 0.002501990498399514, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 0.0, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 8.416, | |
| "grad_norm": 0.003409989286376495, | |
| "learning_rate": 3.1680000000000004e-06, | |
| "loss": 0.0, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 8.432, | |
| "grad_norm": 0.0026823086545517314, | |
| "learning_rate": 3.136e-06, | |
| "loss": 0.0, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 8.448, | |
| "grad_norm": 0.0022783205269016425, | |
| "learning_rate": 3.1040000000000003e-06, | |
| "loss": 0.0, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 8.464, | |
| "grad_norm": 6.336465247361757e-06, | |
| "learning_rate": 3.072e-06, | |
| "loss": 0.0, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "grad_norm": 4.592277322590752e-06, | |
| "learning_rate": 3.04e-06, | |
| "loss": 0.0, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 8.496, | |
| "grad_norm": 0.002440763934157598, | |
| "learning_rate": 3.0080000000000003e-06, | |
| "loss": 0.0, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 8.512, | |
| "grad_norm": 0.0017411545218379125, | |
| "learning_rate": 2.976e-06, | |
| "loss": 0.0, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 8.528, | |
| "grad_norm": 0.0008851555302938333, | |
| "learning_rate": 2.944e-06, | |
| "loss": 0.0, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 8.544, | |
| "grad_norm": 5.758051362572022e-06, | |
| "learning_rate": 2.9120000000000002e-06, | |
| "loss": 0.0, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "grad_norm": 3.791587800895384e-06, | |
| "learning_rate": 2.88e-06, | |
| "loss": 0.0, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 8.576, | |
| "grad_norm": 1.1417223748270065e-05, | |
| "learning_rate": 2.848e-06, | |
| "loss": 0.0, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 8.592, | |
| "grad_norm": 1.93146445830669e-05, | |
| "learning_rate": 2.8160000000000002e-06, | |
| "loss": 0.0, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 8.608, | |
| "grad_norm": 3.0035429272579585e-06, | |
| "learning_rate": 2.784e-06, | |
| "loss": 0.0, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 8.624, | |
| "grad_norm": 3.1422754893000723e-06, | |
| "learning_rate": 2.752e-06, | |
| "loss": 0.0, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "grad_norm": 0.0016051837845728594, | |
| "learning_rate": 2.7200000000000002e-06, | |
| "loss": 0.0, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 8.656, | |
| "grad_norm": 3.0331981849054454e-06, | |
| "learning_rate": 2.688e-06, | |
| "loss": 0.0, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 8.672, | |
| "grad_norm": 2.688345145719462e-06, | |
| "learning_rate": 2.656e-06, | |
| "loss": 0.0, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 8.688, | |
| "grad_norm": 0.0018971614745931684, | |
| "learning_rate": 2.6240000000000006e-06, | |
| "loss": 0.0, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 8.704, | |
| "grad_norm": 0.001157850108603949, | |
| "learning_rate": 2.592e-06, | |
| "loss": 0.0, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "grad_norm": 9.438485254012652e-06, | |
| "learning_rate": 2.56e-06, | |
| "loss": 0.0, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 8.736, | |
| "grad_norm": 0.003824595780488627, | |
| "learning_rate": 2.5280000000000006e-06, | |
| "loss": 0.0, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 8.752, | |
| "grad_norm": 5.80867931861633e-06, | |
| "learning_rate": 2.496e-06, | |
| "loss": 0.0, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 8.768, | |
| "grad_norm": 0.00834510993524938, | |
| "learning_rate": 2.4640000000000005e-06, | |
| "loss": 0.0, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 8.784, | |
| "grad_norm": 3.36769367606297e-06, | |
| "learning_rate": 2.432e-06, | |
| "loss": 0.0, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 3.5500421331503815e-06, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 0.0, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 8.816, | |
| "grad_norm": 0.005803545750156033, | |
| "learning_rate": 2.3680000000000005e-06, | |
| "loss": 0.0, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 8.832, | |
| "grad_norm": 0.003256066741531994, | |
| "learning_rate": 2.336e-06, | |
| "loss": 0.0, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 8.848, | |
| "grad_norm": 2.75992066631196e-06, | |
| "learning_rate": 2.3040000000000003e-06, | |
| "loss": 0.0, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 8.864, | |
| "grad_norm": 0.002876840934498164, | |
| "learning_rate": 2.2720000000000004e-06, | |
| "loss": 0.0, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "grad_norm": 3.6932448968918764e-06, | |
| "learning_rate": 2.24e-06, | |
| "loss": 0.0, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 8.896, | |
| "grad_norm": 0.002516088845243444, | |
| "learning_rate": 2.2080000000000003e-06, | |
| "loss": 0.0, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 8.912, | |
| "grad_norm": 2.7651064988456252e-06, | |
| "learning_rate": 2.176e-06, | |
| "loss": 0.0, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 8.928, | |
| "grad_norm": 4.677689598922778e-06, | |
| "learning_rate": 2.144e-06, | |
| "loss": 0.0, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 8.943999999999999, | |
| "grad_norm": 3.5933917656898116e-06, | |
| "learning_rate": 2.1120000000000003e-06, | |
| "loss": 0.0, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "grad_norm": 0.0026837812666487813, | |
| "learning_rate": 2.08e-06, | |
| "loss": 0.0, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 8.975999999999999, | |
| "grad_norm": 6.660030265311055e-06, | |
| "learning_rate": 2.048e-06, | |
| "loss": 0.0, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 8.992, | |
| "grad_norm": 0.00698615969912155, | |
| "learning_rate": 2.0160000000000003e-06, | |
| "loss": 0.0, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 9.008, | |
| "grad_norm": 6.071643098745315e-06, | |
| "learning_rate": 1.984e-06, | |
| "loss": 0.0, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 9.024, | |
| "grad_norm": 0.002739745016870674, | |
| "learning_rate": 1.952e-06, | |
| "loss": 0.0, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "grad_norm": 0.0018820591441367112, | |
| "learning_rate": 1.9200000000000003e-06, | |
| "loss": 0.0, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 9.056, | |
| "grad_norm": 4.422973643174459e-06, | |
| "learning_rate": 1.8880000000000002e-06, | |
| "loss": 0.0, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 9.072, | |
| "grad_norm": 0.004241223054630402, | |
| "learning_rate": 1.856e-06, | |
| "loss": 0.0, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 9.088, | |
| "grad_norm": 0.0018724180201145385, | |
| "learning_rate": 1.8240000000000002e-06, | |
| "loss": 0.0, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 9.104, | |
| "grad_norm": 3.230264296112646e-06, | |
| "learning_rate": 1.7920000000000002e-06, | |
| "loss": 0.0, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "grad_norm": 0.001800105085499999, | |
| "learning_rate": 1.76e-06, | |
| "loss": 0.0, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 9.136, | |
| "grad_norm": 3.4036067894074316e-06, | |
| "learning_rate": 1.7280000000000002e-06, | |
| "loss": 0.0, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 9.152, | |
| "grad_norm": 2.9416031574887065e-06, | |
| "learning_rate": 1.6960000000000002e-06, | |
| "loss": 0.0, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 9.168, | |
| "grad_norm": 0.0074933729947643645, | |
| "learning_rate": 1.664e-06, | |
| "loss": 0.0, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 9.184, | |
| "grad_norm": 2.6324603429806364e-06, | |
| "learning_rate": 1.6320000000000002e-06, | |
| "loss": 0.0, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "grad_norm": 0.004046684231625261, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 0.0, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 9.216, | |
| "grad_norm": 2.8312469098439607e-06, | |
| "learning_rate": 1.568e-06, | |
| "loss": 0.0, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 9.232, | |
| "grad_norm": 7.031184057455803e-06, | |
| "learning_rate": 1.536e-06, | |
| "loss": 0.0, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 9.248, | |
| "grad_norm": 2.7837343248805205e-06, | |
| "learning_rate": 1.5040000000000001e-06, | |
| "loss": 0.0, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 9.264, | |
| "grad_norm": 2.6096489509437644e-06, | |
| "learning_rate": 1.472e-06, | |
| "loss": 0.0, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "grad_norm": 0.003287527391462962, | |
| "learning_rate": 1.44e-06, | |
| "loss": 0.0, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 9.296, | |
| "grad_norm": 3.568400823063364e-06, | |
| "learning_rate": 1.4080000000000001e-06, | |
| "loss": 0.0, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 9.312, | |
| "grad_norm": 0.001613374504927987, | |
| "learning_rate": 1.376e-06, | |
| "loss": 0.0, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 9.328, | |
| "grad_norm": 0.004032203594194166, | |
| "learning_rate": 1.344e-06, | |
| "loss": 0.0, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 9.344, | |
| "grad_norm": 0.005198815071921098, | |
| "learning_rate": 1.3120000000000003e-06, | |
| "loss": 0.0, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "grad_norm": 2.8504515758333786e-06, | |
| "learning_rate": 1.28e-06, | |
| "loss": 0.0, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 9.376, | |
| "grad_norm": 0.0065733355533144095, | |
| "learning_rate": 1.248e-06, | |
| "loss": 0.0, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 9.392, | |
| "grad_norm": 0.0014655085457166437, | |
| "learning_rate": 1.216e-06, | |
| "loss": 0.0, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 9.408, | |
| "grad_norm": 0.002317977733364113, | |
| "learning_rate": 1.1840000000000002e-06, | |
| "loss": 0.0, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 9.424, | |
| "grad_norm": 3.1712820857845064e-06, | |
| "learning_rate": 1.1520000000000002e-06, | |
| "loss": 0.0, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "grad_norm": 0.0017765942152206172, | |
| "learning_rate": 1.12e-06, | |
| "loss": 0.0, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 9.456, | |
| "grad_norm": 3.298030137729072e-06, | |
| "learning_rate": 1.088e-06, | |
| "loss": 0.0, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 9.472, | |
| "grad_norm": 0.0035002110108688657, | |
| "learning_rate": 1.0560000000000001e-06, | |
| "loss": 0.0, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 9.488, | |
| "grad_norm": 0.003870923448853272, | |
| "learning_rate": 1.024e-06, | |
| "loss": 0.0, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 9.504, | |
| "grad_norm": 0.00203367970856836, | |
| "learning_rate": 9.92e-07, | |
| "loss": 0.0, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "grad_norm": 0.006006505453486269, | |
| "learning_rate": 9.600000000000001e-07, | |
| "loss": 0.0, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 9.536, | |
| "grad_norm": 0.01038968138831676, | |
| "learning_rate": 9.28e-07, | |
| "loss": 0.0, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 9.552, | |
| "grad_norm": 0.0008476742919666878, | |
| "learning_rate": 8.960000000000001e-07, | |
| "loss": 0.0, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 9.568, | |
| "grad_norm": 0.0029285556516275065, | |
| "learning_rate": 8.640000000000001e-07, | |
| "loss": 0.0, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 9.584, | |
| "grad_norm": 3.538940194233791e-06, | |
| "learning_rate": 8.32e-07, | |
| "loss": 0.0, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "grad_norm": 2.6317247125415924e-06, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 0.0, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 9.616, | |
| "grad_norm": 3.1723179921373707e-06, | |
| "learning_rate": 7.68e-07, | |
| "loss": 0.0, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 9.632, | |
| "grad_norm": 0.0017773797129791615, | |
| "learning_rate": 7.36e-07, | |
| "loss": 0.0, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 9.648, | |
| "grad_norm": 3.890700637247315e-06, | |
| "learning_rate": 7.040000000000001e-07, | |
| "loss": 0.0, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 9.664, | |
| "grad_norm": 0.0014622567589594895, | |
| "learning_rate": 6.72e-07, | |
| "loss": 0.0, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "grad_norm": 0.006419297008436344, | |
| "learning_rate": 6.4e-07, | |
| "loss": 0.0, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 9.696, | |
| "grad_norm": 3.365707076677938e-06, | |
| "learning_rate": 6.08e-07, | |
| "loss": 0.0, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 9.712, | |
| "grad_norm": 0.00939817499692286, | |
| "learning_rate": 5.760000000000001e-07, | |
| "loss": 0.0, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 9.728, | |
| "grad_norm": 2.7639546967269018e-06, | |
| "learning_rate": 5.44e-07, | |
| "loss": 0.0, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 9.744, | |
| "grad_norm": 3.228059969214847e-06, | |
| "learning_rate": 5.12e-07, | |
| "loss": 0.0, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "grad_norm": 2.9143603801018378e-06, | |
| "learning_rate": 4.800000000000001e-07, | |
| "loss": 0.0, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 9.776, | |
| "grad_norm": 4.472677373176853e-06, | |
| "learning_rate": 4.4800000000000004e-07, | |
| "loss": 0.0, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 9.792, | |
| "grad_norm": 0.003177155397723066, | |
| "learning_rate": 4.16e-07, | |
| "loss": 0.0, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 9.808, | |
| "grad_norm": 2.9639748677203557e-06, | |
| "learning_rate": 3.84e-07, | |
| "loss": 0.0, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 9.824, | |
| "grad_norm": 3.7104760090468066e-06, | |
| "learning_rate": 3.5200000000000003e-07, | |
| "loss": 0.0, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "grad_norm": 0.0024388786980219382, | |
| "learning_rate": 3.2e-07, | |
| "loss": 0.0, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 9.856, | |
| "grad_norm": 0.005259090550834367, | |
| "learning_rate": 2.8800000000000004e-07, | |
| "loss": 0.0, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 9.872, | |
| "grad_norm": 0.0010068931779022919, | |
| "learning_rate": 2.56e-07, | |
| "loss": 0.0, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 9.888, | |
| "grad_norm": 3.036971816568042e-06, | |
| "learning_rate": 2.2400000000000002e-07, | |
| "loss": 0.0, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 9.904, | |
| "grad_norm": 5.944920943941958e-06, | |
| "learning_rate": 1.92e-07, | |
| "loss": 0.0, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "grad_norm": 3.1533133218253755e-06, | |
| "learning_rate": 1.6e-07, | |
| "loss": 0.0, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 9.936, | |
| "grad_norm": 0.0008556866274702537, | |
| "learning_rate": 1.28e-07, | |
| "loss": 0.0, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 9.952, | |
| "grad_norm": 0.0019244948361365065, | |
| "learning_rate": 9.6e-08, | |
| "loss": 0.0, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 9.968, | |
| "grad_norm": 4.299595493855448e-06, | |
| "learning_rate": 6.4e-08, | |
| "loss": 0.0, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 9.984, | |
| "grad_norm": 3.017341374030069e-06, | |
| "learning_rate": 3.2e-08, | |
| "loss": 0.0, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 3.6112124670701524e-06, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "step": 31250 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 31250, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.47527639760896e+16, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |