| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.05003027134129561, | |
| "eval_steps": 500, | |
| "global_step": 909, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 5.503880235566074e-05, | |
| "grad_norm": 459.8753356933594, | |
| "learning_rate": 1.0000000000000001e-07, | |
| "loss": 3.303, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00011007760471132149, | |
| "grad_norm": 314.2561950683594, | |
| "learning_rate": 2.0000000000000002e-07, | |
| "loss": 2.8226, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0001651164070669822, | |
| "grad_norm": 314.1292419433594, | |
| "learning_rate": 3.0000000000000004e-07, | |
| "loss": 2.8517, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00022015520942264297, | |
| "grad_norm": 312.4049072265625, | |
| "learning_rate": 4.0000000000000003e-07, | |
| "loss": 2.6248, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0002751940117783037, | |
| "grad_norm": 353.7213134765625, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 2.7883, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0003302328141339644, | |
| "grad_norm": 278.41668701171875, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 2.5468, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0003852716164896252, | |
| "grad_norm": 336.14532470703125, | |
| "learning_rate": 7.000000000000001e-07, | |
| "loss": 2.7721, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.00044031041884528595, | |
| "grad_norm": 201.19374084472656, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 2.4873, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0004953492212009466, | |
| "grad_norm": 184.7027587890625, | |
| "learning_rate": 9.000000000000001e-07, | |
| "loss": 2.6647, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0005503880235566074, | |
| "grad_norm": 154.597412109375, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 2.602, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0006054268259122681, | |
| "grad_norm": 40.47785568237305, | |
| "learning_rate": 1.1e-06, | |
| "loss": 2.6716, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0006604656282679288, | |
| "grad_norm": 25.338607788085938, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 2.2631, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0007155044306235897, | |
| "grad_norm": 24.976919174194336, | |
| "learning_rate": 1.3e-06, | |
| "loss": 2.3564, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.0007705432329792504, | |
| "grad_norm": 15.239912033081055, | |
| "learning_rate": 1.4000000000000001e-06, | |
| "loss": 2.3295, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0008255820353349112, | |
| "grad_norm": 14.125042915344238, | |
| "learning_rate": 1.5e-06, | |
| "loss": 2.307, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0008806208376905719, | |
| "grad_norm": 13.163726806640625, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 2.1493, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0009356596400462326, | |
| "grad_norm": 8.726515769958496, | |
| "learning_rate": 1.7000000000000002e-06, | |
| "loss": 2.0333, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0009906984424018933, | |
| "grad_norm": 9.072502136230469, | |
| "learning_rate": 1.8000000000000001e-06, | |
| "loss": 2.2046, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.001045737244757554, | |
| "grad_norm": 9.412588119506836, | |
| "learning_rate": 1.9000000000000002e-06, | |
| "loss": 2.2001, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0011007760471132147, | |
| "grad_norm": 8.67534065246582, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.7679, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0011558148494688755, | |
| "grad_norm": 14.015918731689453, | |
| "learning_rate": 2.1000000000000002e-06, | |
| "loss": 1.9566, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.0012108536518245362, | |
| "grad_norm": 7.9474687576293945, | |
| "learning_rate": 2.2e-06, | |
| "loss": 1.9085, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.001265892454180197, | |
| "grad_norm": 6.806368350982666, | |
| "learning_rate": 2.3000000000000004e-06, | |
| "loss": 1.7918, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0013209312565358577, | |
| "grad_norm": 5.3452582359313965, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 1.8321, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0013759700588915184, | |
| "grad_norm": 8.744244575500488, | |
| "learning_rate": 2.5e-06, | |
| "loss": 1.6317, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0014310088612471794, | |
| "grad_norm": 5.304683685302734, | |
| "learning_rate": 2.6e-06, | |
| "loss": 1.6846, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.00148604766360284, | |
| "grad_norm": 5.650127410888672, | |
| "learning_rate": 2.7000000000000004e-06, | |
| "loss": 1.7449, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0015410864659585008, | |
| "grad_norm": 5.479269504547119, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 1.8158, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0015961252683141616, | |
| "grad_norm": 4.873537063598633, | |
| "learning_rate": 2.9e-06, | |
| "loss": 1.8015, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.0016511640706698223, | |
| "grad_norm": 4.971101760864258, | |
| "learning_rate": 3e-06, | |
| "loss": 1.9034, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.001706202873025483, | |
| "grad_norm": 4.407571315765381, | |
| "learning_rate": 3.1000000000000004e-06, | |
| "loss": 1.9037, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0017612416753811438, | |
| "grad_norm": 4.429073810577393, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 1.6812, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0018162804777368045, | |
| "grad_norm": 5.16085147857666, | |
| "learning_rate": 3.3000000000000006e-06, | |
| "loss": 1.7627, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0018713192800924653, | |
| "grad_norm": 4.0805768966674805, | |
| "learning_rate": 3.4000000000000005e-06, | |
| "loss": 1.6799, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.001926358082448126, | |
| "grad_norm": 4.548702239990234, | |
| "learning_rate": 3.5e-06, | |
| "loss": 1.7799, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0019813968848037865, | |
| "grad_norm": 5.181888580322266, | |
| "learning_rate": 3.6000000000000003e-06, | |
| "loss": 1.8235, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0020364356871594475, | |
| "grad_norm": 3.9876129627227783, | |
| "learning_rate": 3.7e-06, | |
| "loss": 1.5999, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.002091474489515108, | |
| "grad_norm": 6.325051307678223, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 1.7499, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.002146513291870769, | |
| "grad_norm": 6.199049949645996, | |
| "learning_rate": 3.900000000000001e-06, | |
| "loss": 1.784, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.0022015520942264295, | |
| "grad_norm": 4.83912992477417, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.8895, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0022565908965820904, | |
| "grad_norm": 4.515626907348633, | |
| "learning_rate": 4.1e-06, | |
| "loss": 1.4887, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.002311629698937751, | |
| "grad_norm": 5.032265663146973, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 1.7324, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.002366668501293412, | |
| "grad_norm": 4.1879048347473145, | |
| "learning_rate": 4.3e-06, | |
| "loss": 1.4912, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.0024217073036490724, | |
| "grad_norm": 4.128026485443115, | |
| "learning_rate": 4.4e-06, | |
| "loss": 1.554, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0024767461060047334, | |
| "grad_norm": 4.527958393096924, | |
| "learning_rate": 4.5e-06, | |
| "loss": 1.652, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.002531784908360394, | |
| "grad_norm": 4.8388190269470215, | |
| "learning_rate": 4.600000000000001e-06, | |
| "loss": 1.6696, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.002586823710716055, | |
| "grad_norm": 4.2088541984558105, | |
| "learning_rate": 4.7e-06, | |
| "loss": 1.568, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.0026418625130717154, | |
| "grad_norm": 4.789997577667236, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 1.642, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0026969013154273763, | |
| "grad_norm": 4.408346652984619, | |
| "learning_rate": 4.9000000000000005e-06, | |
| "loss": 1.5181, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.002751940117783037, | |
| "grad_norm": 4.572340488433838, | |
| "learning_rate": 5e-06, | |
| "loss": 1.6698, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0028069789201386978, | |
| "grad_norm": 4.728564739227295, | |
| "learning_rate": 5.1e-06, | |
| "loss": 1.5785, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.0028620177224943587, | |
| "grad_norm": 4.449855327606201, | |
| "learning_rate": 5.2e-06, | |
| "loss": 1.4624, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0029170565248500193, | |
| "grad_norm": 4.127189636230469, | |
| "learning_rate": 5.300000000000001e-06, | |
| "loss": 1.6061, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.00297209532720568, | |
| "grad_norm": 4.244532108306885, | |
| "learning_rate": 5.400000000000001e-06, | |
| "loss": 1.491, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0030271341295613407, | |
| "grad_norm": 3.437682628631592, | |
| "learning_rate": 5.500000000000001e-06, | |
| "loss": 1.1967, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.0030821729319170017, | |
| "grad_norm": 3.83516788482666, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 1.4731, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.003137211734272662, | |
| "grad_norm": 3.9108972549438477, | |
| "learning_rate": 5.7e-06, | |
| "loss": 1.4393, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.003192250536628323, | |
| "grad_norm": 3.5258419513702393, | |
| "learning_rate": 5.8e-06, | |
| "loss": 1.4206, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.0032472893389839837, | |
| "grad_norm": 4.124903678894043, | |
| "learning_rate": 5.9e-06, | |
| "loss": 1.4747, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.0033023281413396446, | |
| "grad_norm": 4.055769920349121, | |
| "learning_rate": 6e-06, | |
| "loss": 1.4655, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.003357366943695305, | |
| "grad_norm": 3.904837131500244, | |
| "learning_rate": 6.1e-06, | |
| "loss": 1.5125, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.003412405746050966, | |
| "grad_norm": 3.2904794216156006, | |
| "learning_rate": 6.200000000000001e-06, | |
| "loss": 1.4596, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0034674445484066266, | |
| "grad_norm": 3.24053692817688, | |
| "learning_rate": 6.300000000000001e-06, | |
| "loss": 1.3851, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.0035224833507622876, | |
| "grad_norm": 3.457639217376709, | |
| "learning_rate": 6.4000000000000006e-06, | |
| "loss": 1.4019, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.003577522153117948, | |
| "grad_norm": 3.073054790496826, | |
| "learning_rate": 6.5000000000000004e-06, | |
| "loss": 1.2872, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.003632560955473609, | |
| "grad_norm": 2.6726694107055664, | |
| "learning_rate": 6.600000000000001e-06, | |
| "loss": 1.2361, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.0036875997578292696, | |
| "grad_norm": 2.9378459453582764, | |
| "learning_rate": 6.700000000000001e-06, | |
| "loss": 1.4452, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.0037426385601849305, | |
| "grad_norm": 2.81107234954834, | |
| "learning_rate": 6.800000000000001e-06, | |
| "loss": 1.4804, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.003797677362540591, | |
| "grad_norm": 2.60062313079834, | |
| "learning_rate": 6.9e-06, | |
| "loss": 1.3263, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.003852716164896252, | |
| "grad_norm": 2.5642921924591064, | |
| "learning_rate": 7e-06, | |
| "loss": 1.2751, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0039077549672519125, | |
| "grad_norm": 2.3608031272888184, | |
| "learning_rate": 7.100000000000001e-06, | |
| "loss": 1.2614, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.003962793769607573, | |
| "grad_norm": 2.7201738357543945, | |
| "learning_rate": 7.2000000000000005e-06, | |
| "loss": 1.5018, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.004017832571963234, | |
| "grad_norm": 2.584726095199585, | |
| "learning_rate": 7.3e-06, | |
| "loss": 1.3519, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.004072871374318895, | |
| "grad_norm": 1.9693044424057007, | |
| "learning_rate": 7.4e-06, | |
| "loss": 1.0934, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0041279101766745555, | |
| "grad_norm": 2.220736503601074, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.4687, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.004182948979030216, | |
| "grad_norm": 2.2629456520080566, | |
| "learning_rate": 7.600000000000001e-06, | |
| "loss": 1.3328, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.004237987781385877, | |
| "grad_norm": 2.051820993423462, | |
| "learning_rate": 7.7e-06, | |
| "loss": 1.3058, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.004293026583741538, | |
| "grad_norm": 2.2451820373535156, | |
| "learning_rate": 7.800000000000002e-06, | |
| "loss": 1.3556, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.004348065386097198, | |
| "grad_norm": 3.13584303855896, | |
| "learning_rate": 7.9e-06, | |
| "loss": 1.3262, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.004403104188452859, | |
| "grad_norm": 5.024479866027832, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.2103, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.00445814299080852, | |
| "grad_norm": 2.070889711380005, | |
| "learning_rate": 8.1e-06, | |
| "loss": 1.1994, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.004513181793164181, | |
| "grad_norm": 2.797286033630371, | |
| "learning_rate": 8.2e-06, | |
| "loss": 1.3075, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.004568220595519841, | |
| "grad_norm": 2.11370849609375, | |
| "learning_rate": 8.3e-06, | |
| "loss": 1.36, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.004623259397875502, | |
| "grad_norm": 2.5416152477264404, | |
| "learning_rate": 8.400000000000001e-06, | |
| "loss": 1.3484, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.004678298200231163, | |
| "grad_norm": 2.4702343940734863, | |
| "learning_rate": 8.5e-06, | |
| "loss": 1.3677, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.004733337002586824, | |
| "grad_norm": 3.670365333557129, | |
| "learning_rate": 8.6e-06, | |
| "loss": 1.2192, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.004788375804942484, | |
| "grad_norm": 2.282954692840576, | |
| "learning_rate": 8.700000000000001e-06, | |
| "loss": 1.2982, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.004843414607298145, | |
| "grad_norm": 2.3659238815307617, | |
| "learning_rate": 8.8e-06, | |
| "loss": 1.3206, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.004898453409653806, | |
| "grad_norm": 4.939981460571289, | |
| "learning_rate": 8.900000000000001e-06, | |
| "loss": 1.4328, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.004953492212009467, | |
| "grad_norm": 2.335858106613159, | |
| "learning_rate": 9e-06, | |
| "loss": 1.2603, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.005008531014365127, | |
| "grad_norm": 2.2165043354034424, | |
| "learning_rate": 9.100000000000001e-06, | |
| "loss": 1.3141, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.005063569816720788, | |
| "grad_norm": 2.7872185707092285, | |
| "learning_rate": 9.200000000000002e-06, | |
| "loss": 1.3314, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.005118608619076449, | |
| "grad_norm": 2.6353912353515625, | |
| "learning_rate": 9.3e-06, | |
| "loss": 1.2027, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.00517364742143211, | |
| "grad_norm": 3.2509102821350098, | |
| "learning_rate": 9.4e-06, | |
| "loss": 1.2316, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.00522868622378777, | |
| "grad_norm": 2.4560611248016357, | |
| "learning_rate": 9.5e-06, | |
| "loss": 1.1848, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.005283725026143431, | |
| "grad_norm": 2.338151216506958, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 1.2392, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.005338763828499092, | |
| "grad_norm": 2.231065034866333, | |
| "learning_rate": 9.7e-06, | |
| "loss": 1.2089, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.005393802630854753, | |
| "grad_norm": 2.278428077697754, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 1.2267, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.005448841433210413, | |
| "grad_norm": 2.4422810077667236, | |
| "learning_rate": 9.9e-06, | |
| "loss": 1.2041, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.005503880235566074, | |
| "grad_norm": 2.216248035430908, | |
| "learning_rate": 1e-05, | |
| "loss": 1.0798, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.005558919037921735, | |
| "grad_norm": 2.3301615715026855, | |
| "learning_rate": 9.99999998121067e-06, | |
| "loss": 1.3069, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.0056139578402773956, | |
| "grad_norm": 2.315436363220215, | |
| "learning_rate": 9.999999924842678e-06, | |
| "loss": 1.1589, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.005668996642633056, | |
| "grad_norm": 2.3522140979766846, | |
| "learning_rate": 9.999999830896024e-06, | |
| "loss": 1.0978, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.0057240354449887175, | |
| "grad_norm": 2.5798308849334717, | |
| "learning_rate": 9.99999969937071e-06, | |
| "loss": 1.0599, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.005779074247344378, | |
| "grad_norm": 2.456644058227539, | |
| "learning_rate": 9.999999530266738e-06, | |
| "loss": 1.1682, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.0058341130497000385, | |
| "grad_norm": 2.1559031009674072, | |
| "learning_rate": 9.999999323584106e-06, | |
| "loss": 1.0631, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.005889151852055699, | |
| "grad_norm": 2.2985048294067383, | |
| "learning_rate": 9.99999907932282e-06, | |
| "loss": 1.1455, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.00594419065441136, | |
| "grad_norm": 2.596167802810669, | |
| "learning_rate": 9.999998797482877e-06, | |
| "loss": 1.1686, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.005999229456767021, | |
| "grad_norm": 2.378618001937866, | |
| "learning_rate": 9.999998478064283e-06, | |
| "loss": 1.2226, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.0060542682591226814, | |
| "grad_norm": 2.228116750717163, | |
| "learning_rate": 9.999998121067038e-06, | |
| "loss": 1.1396, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.006109307061478342, | |
| "grad_norm": 2.4419472217559814, | |
| "learning_rate": 9.999997726491146e-06, | |
| "loss": 1.1401, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.006164345863834003, | |
| "grad_norm": 2.0695526599884033, | |
| "learning_rate": 9.999997294336608e-06, | |
| "loss": 1.1868, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.006219384666189664, | |
| "grad_norm": 2.3170363903045654, | |
| "learning_rate": 9.99999682460343e-06, | |
| "loss": 1.1172, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.006274423468545324, | |
| "grad_norm": 2.670466184616089, | |
| "learning_rate": 9.999996317291615e-06, | |
| "loss": 1.2481, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.006329462270900985, | |
| "grad_norm": 2.1214540004730225, | |
| "learning_rate": 9.999995772401166e-06, | |
| "loss": 0.9994, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.006384501073256646, | |
| "grad_norm": 1.9283969402313232, | |
| "learning_rate": 9.999995189932085e-06, | |
| "loss": 1.0692, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.006439539875612307, | |
| "grad_norm": 2.2620882987976074, | |
| "learning_rate": 9.99999456988438e-06, | |
| "loss": 1.0725, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.006494578677967967, | |
| "grad_norm": 2.2121341228485107, | |
| "learning_rate": 9.999993912258055e-06, | |
| "loss": 1.1328, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.006549617480323628, | |
| "grad_norm": 2.298126220703125, | |
| "learning_rate": 9.999993217053113e-06, | |
| "loss": 1.1272, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.006604656282679289, | |
| "grad_norm": 1.81593656539917, | |
| "learning_rate": 9.99999248426956e-06, | |
| "loss": 1.017, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.00665969508503495, | |
| "grad_norm": 2.1174378395080566, | |
| "learning_rate": 9.999991713907403e-06, | |
| "loss": 1.0557, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.00671473388739061, | |
| "grad_norm": 1.9061017036437988, | |
| "learning_rate": 9.999990905966647e-06, | |
| "loss": 1.0379, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.006769772689746271, | |
| "grad_norm": 1.912500023841858, | |
| "learning_rate": 9.999990060447297e-06, | |
| "loss": 1.104, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.006824811492101932, | |
| "grad_norm": 1.9249529838562012, | |
| "learning_rate": 9.99998917734936e-06, | |
| "loss": 1.0136, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.006879850294457593, | |
| "grad_norm": 1.8504948616027832, | |
| "learning_rate": 9.999988256672843e-06, | |
| "loss": 0.99, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.006934889096813253, | |
| "grad_norm": 1.720042109489441, | |
| "learning_rate": 9.999987298417753e-06, | |
| "loss": 1.0666, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.006989927899168914, | |
| "grad_norm": 1.778251051902771, | |
| "learning_rate": 9.999986302584097e-06, | |
| "loss": 1.0424, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.007044966701524575, | |
| "grad_norm": 1.9485961198806763, | |
| "learning_rate": 9.999985269171881e-06, | |
| "loss": 1.105, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.007100005503880236, | |
| "grad_norm": 3.0802104473114014, | |
| "learning_rate": 9.999984198181114e-06, | |
| "loss": 1.1081, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.007155044306235896, | |
| "grad_norm": 1.7476954460144043, | |
| "learning_rate": 9.999983089611806e-06, | |
| "loss": 0.9677, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.007210083108591557, | |
| "grad_norm": 1.6127299070358276, | |
| "learning_rate": 9.999981943463963e-06, | |
| "loss": 0.9937, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.007265121910947218, | |
| "grad_norm": 2.1477208137512207, | |
| "learning_rate": 9.999980759737594e-06, | |
| "loss": 1.0319, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.007320160713302879, | |
| "grad_norm": 1.531163215637207, | |
| "learning_rate": 9.999979538432707e-06, | |
| "loss": 0.8696, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.007375199515658539, | |
| "grad_norm": 1.8226820230484009, | |
| "learning_rate": 9.999978279549313e-06, | |
| "loss": 1.2061, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.0074302383180142, | |
| "grad_norm": 1.481895923614502, | |
| "learning_rate": 9.99997698308742e-06, | |
| "loss": 0.949, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.007485277120369861, | |
| "grad_norm": 1.6715927124023438, | |
| "learning_rate": 9.99997564904704e-06, | |
| "loss": 1.1579, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.0075403159227255215, | |
| "grad_norm": 1.4235272407531738, | |
| "learning_rate": 9.999974277428179e-06, | |
| "loss": 1.064, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.007595354725081182, | |
| "grad_norm": 1.3524872064590454, | |
| "learning_rate": 9.999972868230852e-06, | |
| "loss": 0.9141, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.007650393527436843, | |
| "grad_norm": 1.3741765022277832, | |
| "learning_rate": 9.999971421455066e-06, | |
| "loss": 1.0256, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.007705432329792504, | |
| "grad_norm": 1.9869598150253296, | |
| "learning_rate": 9.999969937100835e-06, | |
| "loss": 0.9489, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0077604711321481645, | |
| "grad_norm": 1.4785465002059937, | |
| "learning_rate": 9.999968415168166e-06, | |
| "loss": 0.9243, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.007815509934503825, | |
| "grad_norm": 1.5476176738739014, | |
| "learning_rate": 9.999966855657074e-06, | |
| "loss": 1.178, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.007870548736859486, | |
| "grad_norm": 1.500401258468628, | |
| "learning_rate": 9.99996525856757e-06, | |
| "loss": 0.9837, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.007925587539215146, | |
| "grad_norm": 1.3777157068252563, | |
| "learning_rate": 9.999963623899664e-06, | |
| "loss": 1.0732, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.007980626341570807, | |
| "grad_norm": 1.4466841220855713, | |
| "learning_rate": 9.99996195165337e-06, | |
| "loss": 0.9779, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.008035665143926469, | |
| "grad_norm": 1.5304051637649536, | |
| "learning_rate": 9.9999602418287e-06, | |
| "loss": 1.196, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.008090703946282128, | |
| "grad_norm": 1.9012362957000732, | |
| "learning_rate": 9.99995849442567e-06, | |
| "loss": 0.9797, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.00814574274863779, | |
| "grad_norm": 1.430679202079773, | |
| "learning_rate": 9.999956709444289e-06, | |
| "loss": 0.9869, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.00820078155099345, | |
| "grad_norm": 1.3489817380905151, | |
| "learning_rate": 9.99995488688457e-06, | |
| "loss": 1.0137, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.008255820353349111, | |
| "grad_norm": 1.1878125667572021, | |
| "learning_rate": 9.999953026746531e-06, | |
| "loss": 0.9355, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.008310859155704772, | |
| "grad_norm": 1.3481942415237427, | |
| "learning_rate": 9.999951129030182e-06, | |
| "loss": 1.1235, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.008365897958060432, | |
| "grad_norm": 1.7335314750671387, | |
| "learning_rate": 9.999949193735539e-06, | |
| "loss": 0.9382, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.008420936760416093, | |
| "grad_norm": 1.2029480934143066, | |
| "learning_rate": 9.999947220862615e-06, | |
| "loss": 0.9419, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.008475975562771755, | |
| "grad_norm": 1.2104203701019287, | |
| "learning_rate": 9.999945210411428e-06, | |
| "loss": 0.9196, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.008531014365127414, | |
| "grad_norm": 1.1857126951217651, | |
| "learning_rate": 9.999943162381991e-06, | |
| "loss": 0.9421, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.008586053167483076, | |
| "grad_norm": 1.115027904510498, | |
| "learning_rate": 9.999941076774319e-06, | |
| "loss": 0.9634, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.008641091969838737, | |
| "grad_norm": 1.4227553606033325, | |
| "learning_rate": 9.999938953588428e-06, | |
| "loss": 1.0036, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.008696130772194397, | |
| "grad_norm": 1.2913776636123657, | |
| "learning_rate": 9.999936792824334e-06, | |
| "loss": 0.9232, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.008751169574550058, | |
| "grad_norm": 1.2817318439483643, | |
| "learning_rate": 9.999934594482055e-06, | |
| "loss": 0.9691, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.008806208376905718, | |
| "grad_norm": 1.5647841691970825, | |
| "learning_rate": 9.999932358561604e-06, | |
| "loss": 1.1842, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.00886124717926138, | |
| "grad_norm": 1.368135929107666, | |
| "learning_rate": 9.999930085063002e-06, | |
| "loss": 1.0873, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.00891628598161704, | |
| "grad_norm": 1.2297240495681763, | |
| "learning_rate": 9.999927773986262e-06, | |
| "loss": 1.0778, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.0089713247839727, | |
| "grad_norm": 1.0658279657363892, | |
| "learning_rate": 9.999925425331405e-06, | |
| "loss": 0.9008, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.009026363586328362, | |
| "grad_norm": 1.3484326601028442, | |
| "learning_rate": 9.999923039098445e-06, | |
| "loss": 1.0664, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.009081402388684023, | |
| "grad_norm": 1.1839075088500977, | |
| "learning_rate": 9.999920615287401e-06, | |
| "loss": 0.9257, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.009136441191039683, | |
| "grad_norm": 1.2757254838943481, | |
| "learning_rate": 9.999918153898295e-06, | |
| "loss": 0.9473, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.009191479993395344, | |
| "grad_norm": 1.2414579391479492, | |
| "learning_rate": 9.99991565493114e-06, | |
| "loss": 1.1091, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.009246518795751004, | |
| "grad_norm": 1.2802611589431763, | |
| "learning_rate": 9.999913118385959e-06, | |
| "loss": 1.063, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.009301557598106665, | |
| "grad_norm": 1.2055327892303467, | |
| "learning_rate": 9.99991054426277e-06, | |
| "loss": 0.8, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.009356596400462327, | |
| "grad_norm": 1.0391098260879517, | |
| "learning_rate": 9.99990793256159e-06, | |
| "loss": 0.8672, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.009411635202817986, | |
| "grad_norm": 1.131536602973938, | |
| "learning_rate": 9.99990528328244e-06, | |
| "loss": 0.9569, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.009466674005173648, | |
| "grad_norm": 1.164307951927185, | |
| "learning_rate": 9.999902596425342e-06, | |
| "loss": 0.9999, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.009521712807529309, | |
| "grad_norm": 1.2099504470825195, | |
| "learning_rate": 9.999899871990313e-06, | |
| "loss": 0.9994, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.009576751609884969, | |
| "grad_norm": 1.7294539213180542, | |
| "learning_rate": 9.999897109977376e-06, | |
| "loss": 1.0265, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.00963179041224063, | |
| "grad_norm": 1.3009883165359497, | |
| "learning_rate": 9.99989431038655e-06, | |
| "loss": 0.9022, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.00968682921459629, | |
| "grad_norm": 1.1014611721038818, | |
| "learning_rate": 9.999891473217857e-06, | |
| "loss": 0.8476, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.009741868016951951, | |
| "grad_norm": 1.2410900592803955, | |
| "learning_rate": 9.99988859847132e-06, | |
| "loss": 1.0272, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.009796906819307612, | |
| "grad_norm": 1.336348295211792, | |
| "learning_rate": 9.999885686146957e-06, | |
| "loss": 0.9456, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.009851945621663272, | |
| "grad_norm": 1.2931095361709595, | |
| "learning_rate": 9.99988273624479e-06, | |
| "loss": 0.9554, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.009906984424018933, | |
| "grad_norm": 1.2647838592529297, | |
| "learning_rate": 9.999879748764845e-06, | |
| "loss": 1.0394, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.009962023226374595, | |
| "grad_norm": 1.3485127687454224, | |
| "learning_rate": 9.99987672370714e-06, | |
| "loss": 1.1016, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.010017062028730254, | |
| "grad_norm": 1.110187292098999, | |
| "learning_rate": 9.999873661071702e-06, | |
| "loss": 0.946, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.010072100831085916, | |
| "grad_norm": 1.0991623401641846, | |
| "learning_rate": 9.999870560858551e-06, | |
| "loss": 1.0084, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.010127139633441576, | |
| "grad_norm": 1.049804449081421, | |
| "learning_rate": 9.999867423067713e-06, | |
| "loss": 0.8264, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.010182178435797237, | |
| "grad_norm": 1.0947058200836182, | |
| "learning_rate": 9.999864247699207e-06, | |
| "loss": 0.8884, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.010237217238152898, | |
| "grad_norm": 1.1147902011871338, | |
| "learning_rate": 9.999861034753061e-06, | |
| "loss": 0.9657, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.010292256040508558, | |
| "grad_norm": 1.260027527809143, | |
| "learning_rate": 9.999857784229298e-06, | |
| "loss": 1.0102, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.01034729484286422, | |
| "grad_norm": 1.1275582313537598, | |
| "learning_rate": 9.999854496127942e-06, | |
| "loss": 1.028, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.01040233364521988, | |
| "grad_norm": 1.1377174854278564, | |
| "learning_rate": 9.999851170449018e-06, | |
| "loss": 1.032, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.01045737244757554, | |
| "grad_norm": 1.1734225749969482, | |
| "learning_rate": 9.999847807192552e-06, | |
| "loss": 1.0009, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.010512411249931202, | |
| "grad_norm": 1.1934596300125122, | |
| "learning_rate": 9.999844406358565e-06, | |
| "loss": 1.0432, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.010567450052286861, | |
| "grad_norm": 1.0638024806976318, | |
| "learning_rate": 9.99984096794709e-06, | |
| "loss": 0.8651, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.010622488854642523, | |
| "grad_norm": 1.2381829023361206, | |
| "learning_rate": 9.999837491958147e-06, | |
| "loss": 1.0088, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.010677527656998184, | |
| "grad_norm": 1.030246615409851, | |
| "learning_rate": 9.999833978391763e-06, | |
| "loss": 0.9488, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.010732566459353844, | |
| "grad_norm": 1.1640657186508179, | |
| "learning_rate": 9.999830427247965e-06, | |
| "loss": 1.0588, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.010787605261709505, | |
| "grad_norm": 1.0431616306304932, | |
| "learning_rate": 9.99982683852678e-06, | |
| "loss": 0.8728, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.010842644064065167, | |
| "grad_norm": 1.032263159751892, | |
| "learning_rate": 9.999823212228235e-06, | |
| "loss": 0.9498, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.010897682866420826, | |
| "grad_norm": 1.1383745670318604, | |
| "learning_rate": 9.999819548352358e-06, | |
| "loss": 0.9498, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.010952721668776488, | |
| "grad_norm": 1.1324639320373535, | |
| "learning_rate": 9.999815846899175e-06, | |
| "loss": 1.0432, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.011007760471132147, | |
| "grad_norm": 1.188672661781311, | |
| "learning_rate": 9.999812107868714e-06, | |
| "loss": 0.982, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.011062799273487809, | |
| "grad_norm": 1.1011098623275757, | |
| "learning_rate": 9.999808331261005e-06, | |
| "loss": 0.9587, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.01111783807584347, | |
| "grad_norm": 1.1782938241958618, | |
| "learning_rate": 9.999804517076073e-06, | |
| "loss": 1.0659, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.01117287687819913, | |
| "grad_norm": 1.0520117282867432, | |
| "learning_rate": 9.99980066531395e-06, | |
| "loss": 1.0056, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.011227915680554791, | |
| "grad_norm": 1.1584919691085815, | |
| "learning_rate": 9.999796775974663e-06, | |
| "loss": 0.9435, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.011282954482910452, | |
| "grad_norm": 1.2201849222183228, | |
| "learning_rate": 9.999792849058242e-06, | |
| "loss": 1.0562, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.011337993285266112, | |
| "grad_norm": 1.2985976934432983, | |
| "learning_rate": 9.999788884564715e-06, | |
| "loss": 1.0126, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.011393032087621774, | |
| "grad_norm": 0.9926307201385498, | |
| "learning_rate": 9.999784882494115e-06, | |
| "loss": 0.7875, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.011448070889977435, | |
| "grad_norm": 1.103365182876587, | |
| "learning_rate": 9.99978084284647e-06, | |
| "loss": 0.9833, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.011503109692333095, | |
| "grad_norm": 1.1798462867736816, | |
| "learning_rate": 9.99977676562181e-06, | |
| "loss": 0.8479, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.011558148494688756, | |
| "grad_norm": 1.2887194156646729, | |
| "learning_rate": 9.999772650820168e-06, | |
| "loss": 0.9606, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.011613187297044416, | |
| "grad_norm": 1.1120634078979492, | |
| "learning_rate": 9.99976849844157e-06, | |
| "loss": 0.9604, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.011668226099400077, | |
| "grad_norm": 1.1248979568481445, | |
| "learning_rate": 9.999764308486052e-06, | |
| "loss": 0.9428, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.011723264901755738, | |
| "grad_norm": 1.274610161781311, | |
| "learning_rate": 9.999760080953643e-06, | |
| "loss": 0.9044, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.011778303704111398, | |
| "grad_norm": 1.1746865510940552, | |
| "learning_rate": 9.999755815844377e-06, | |
| "loss": 0.9114, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.01183334250646706, | |
| "grad_norm": 1.2531086206436157, | |
| "learning_rate": 9.999751513158282e-06, | |
| "loss": 1.0785, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.01188838130882272, | |
| "grad_norm": 1.0789539813995361, | |
| "learning_rate": 9.999747172895395e-06, | |
| "loss": 0.9794, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.01194342011117838, | |
| "grad_norm": 1.1805329322814941, | |
| "learning_rate": 9.999742795055746e-06, | |
| "loss": 0.9602, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.011998458913534042, | |
| "grad_norm": 2.309329032897949, | |
| "learning_rate": 9.99973837963937e-06, | |
| "loss": 0.9482, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.012053497715889702, | |
| "grad_norm": 1.2379088401794434, | |
| "learning_rate": 9.999733926646296e-06, | |
| "loss": 1.0237, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.012108536518245363, | |
| "grad_norm": 1.1581377983093262, | |
| "learning_rate": 9.999729436076562e-06, | |
| "loss": 1.0583, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.012163575320601024, | |
| "grad_norm": 1.3006727695465088, | |
| "learning_rate": 9.999724907930199e-06, | |
| "loss": 0.9581, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.012218614122956684, | |
| "grad_norm": 1.3215982913970947, | |
| "learning_rate": 9.999720342207243e-06, | |
| "loss": 0.9438, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.012273652925312345, | |
| "grad_norm": 1.1107337474822998, | |
| "learning_rate": 9.999715738907727e-06, | |
| "loss": 0.9987, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.012328691727668007, | |
| "grad_norm": 1.0745457410812378, | |
| "learning_rate": 9.999711098031685e-06, | |
| "loss": 0.9637, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.012383730530023666, | |
| "grad_norm": 1.110861897468567, | |
| "learning_rate": 9.999706419579154e-06, | |
| "loss": 1.0225, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.012438769332379328, | |
| "grad_norm": 1.0755527019500732, | |
| "learning_rate": 9.999701703550167e-06, | |
| "loss": 1.0204, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.012493808134734987, | |
| "grad_norm": 1.1694976091384888, | |
| "learning_rate": 9.99969694994476e-06, | |
| "loss": 1.0566, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.012548846937090649, | |
| "grad_norm": 1.455856442451477, | |
| "learning_rate": 9.99969215876297e-06, | |
| "loss": 0.9397, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.01260388573944631, | |
| "grad_norm": 1.0707073211669922, | |
| "learning_rate": 9.99968733000483e-06, | |
| "loss": 0.8286, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.01265892454180197, | |
| "grad_norm": 1.189548134803772, | |
| "learning_rate": 9.99968246367038e-06, | |
| "loss": 0.8762, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.012713963344157631, | |
| "grad_norm": 1.1439214944839478, | |
| "learning_rate": 9.999677559759655e-06, | |
| "loss": 0.9187, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.012769002146513293, | |
| "grad_norm": 1.2329761981964111, | |
| "learning_rate": 9.999672618272691e-06, | |
| "loss": 1.0374, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.012824040948868952, | |
| "grad_norm": 1.1545134782791138, | |
| "learning_rate": 9.999667639209527e-06, | |
| "loss": 0.9343, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.012879079751224614, | |
| "grad_norm": 1.0946775674819946, | |
| "learning_rate": 9.999662622570198e-06, | |
| "loss": 0.9568, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.012934118553580273, | |
| "grad_norm": 1.2099589109420776, | |
| "learning_rate": 9.999657568354743e-06, | |
| "loss": 1.0364, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.012989157355935935, | |
| "grad_norm": 1.09062922000885, | |
| "learning_rate": 9.999652476563202e-06, | |
| "loss": 1.0289, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.013044196158291596, | |
| "grad_norm": 1.154557228088379, | |
| "learning_rate": 9.999647347195612e-06, | |
| "loss": 0.9925, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.013099234960647256, | |
| "grad_norm": 1.025374174118042, | |
| "learning_rate": 9.999642180252008e-06, | |
| "loss": 0.9346, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.013154273763002917, | |
| "grad_norm": 1.1473641395568848, | |
| "learning_rate": 9.999636975732433e-06, | |
| "loss": 1.0244, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.013209312565358578, | |
| "grad_norm": 1.0421240329742432, | |
| "learning_rate": 9.999631733636923e-06, | |
| "loss": 0.9368, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.013264351367714238, | |
| "grad_norm": 1.1076610088348389, | |
| "learning_rate": 9.99962645396552e-06, | |
| "loss": 1.0276, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.0133193901700699, | |
| "grad_norm": 1.143559455871582, | |
| "learning_rate": 9.999621136718266e-06, | |
| "loss": 0.9626, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.01337442897242556, | |
| "grad_norm": 1.0958378314971924, | |
| "learning_rate": 9.999615781895195e-06, | |
| "loss": 1.0254, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.01342946777478122, | |
| "grad_norm": 1.117688536643982, | |
| "learning_rate": 9.99961038949635e-06, | |
| "loss": 0.9685, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.013484506577136882, | |
| "grad_norm": 1.1645647287368774, | |
| "learning_rate": 9.999604959521771e-06, | |
| "loss": 1.0666, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.013539545379492542, | |
| "grad_norm": 1.1238516569137573, | |
| "learning_rate": 9.999599491971502e-06, | |
| "loss": 1.0252, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.013594584181848203, | |
| "grad_norm": 1.0196914672851562, | |
| "learning_rate": 9.999593986845579e-06, | |
| "loss": 0.9389, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.013649622984203864, | |
| "grad_norm": 1.0231372117996216, | |
| "learning_rate": 9.999588444144049e-06, | |
| "loss": 0.8786, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.013704661786559524, | |
| "grad_norm": 1.2504147291183472, | |
| "learning_rate": 9.999582863866947e-06, | |
| "loss": 1.0969, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.013759700588915185, | |
| "grad_norm": 1.1123549938201904, | |
| "learning_rate": 9.99957724601432e-06, | |
| "loss": 0.8833, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.013814739391270847, | |
| "grad_norm": 1.1068202257156372, | |
| "learning_rate": 9.999571590586208e-06, | |
| "loss": 0.9709, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.013869778193626506, | |
| "grad_norm": 0.9891651272773743, | |
| "learning_rate": 9.999565897582655e-06, | |
| "loss": 0.8598, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.013924816995982168, | |
| "grad_norm": 0.9866491556167603, | |
| "learning_rate": 9.999560167003703e-06, | |
| "loss": 0.8101, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.013979855798337828, | |
| "grad_norm": 1.0862594842910767, | |
| "learning_rate": 9.999554398849396e-06, | |
| "loss": 0.9411, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.014034894600693489, | |
| "grad_norm": 1.1898949146270752, | |
| "learning_rate": 9.999548593119774e-06, | |
| "loss": 0.9548, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.01408993340304915, | |
| "grad_norm": 1.2167880535125732, | |
| "learning_rate": 9.999542749814886e-06, | |
| "loss": 1.0302, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.01414497220540481, | |
| "grad_norm": 1.0784146785736084, | |
| "learning_rate": 9.999536868934771e-06, | |
| "loss": 0.8875, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.014200011007760471, | |
| "grad_norm": 1.1128027439117432, | |
| "learning_rate": 9.999530950479475e-06, | |
| "loss": 0.9498, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.014255049810116133, | |
| "grad_norm": 1.1311595439910889, | |
| "learning_rate": 9.999524994449044e-06, | |
| "loss": 0.9035, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.014310088612471792, | |
| "grad_norm": 1.225615382194519, | |
| "learning_rate": 9.999519000843521e-06, | |
| "loss": 1.0104, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.014365127414827454, | |
| "grad_norm": 1.2347793579101562, | |
| "learning_rate": 9.99951296966295e-06, | |
| "loss": 1.0288, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.014420166217183113, | |
| "grad_norm": 1.1837103366851807, | |
| "learning_rate": 9.99950690090738e-06, | |
| "loss": 0.9553, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.014475205019538775, | |
| "grad_norm": 1.1985397338867188, | |
| "learning_rate": 9.999500794576852e-06, | |
| "loss": 0.9561, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.014530243821894436, | |
| "grad_norm": 1.036928415298462, | |
| "learning_rate": 9.999494650671418e-06, | |
| "loss": 0.8906, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.014585282624250096, | |
| "grad_norm": 1.0797842741012573, | |
| "learning_rate": 9.999488469191116e-06, | |
| "loss": 0.8975, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.014640321426605757, | |
| "grad_norm": 1.0571156740188599, | |
| "learning_rate": 9.999482250136e-06, | |
| "loss": 0.9334, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.014695360228961419, | |
| "grad_norm": 1.2065023183822632, | |
| "learning_rate": 9.999475993506114e-06, | |
| "loss": 0.8986, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.014750399031317078, | |
| "grad_norm": 1.201586127281189, | |
| "learning_rate": 9.999469699301502e-06, | |
| "loss": 0.9192, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.01480543783367274, | |
| "grad_norm": 1.0470168590545654, | |
| "learning_rate": 9.999463367522216e-06, | |
| "loss": 0.8604, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.0148604766360284, | |
| "grad_norm": 1.1142147779464722, | |
| "learning_rate": 9.9994569981683e-06, | |
| "loss": 0.9847, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.01491551543838406, | |
| "grad_norm": 1.0352061986923218, | |
| "learning_rate": 9.999450591239805e-06, | |
| "loss": 0.8927, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.014970554240739722, | |
| "grad_norm": 1.0353184938430786, | |
| "learning_rate": 9.999444146736779e-06, | |
| "loss": 0.8435, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.015025593043095382, | |
| "grad_norm": 1.2091951370239258, | |
| "learning_rate": 9.999437664659267e-06, | |
| "loss": 0.8959, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.015080631845451043, | |
| "grad_norm": 1.006361722946167, | |
| "learning_rate": 9.999431145007319e-06, | |
| "loss": 0.8579, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.015135670647806704, | |
| "grad_norm": 1.1265509128570557, | |
| "learning_rate": 9.999424587780985e-06, | |
| "loss": 0.8808, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.015190709450162364, | |
| "grad_norm": 1.060882568359375, | |
| "learning_rate": 9.999417992980317e-06, | |
| "loss": 1.044, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.015245748252518026, | |
| "grad_norm": 1.0216747522354126, | |
| "learning_rate": 9.999411360605358e-06, | |
| "loss": 0.7773, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.015300787054873685, | |
| "grad_norm": 1.1382462978363037, | |
| "learning_rate": 9.999404690656163e-06, | |
| "loss": 0.8954, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.015355825857229347, | |
| "grad_norm": 1.113815188407898, | |
| "learning_rate": 9.99939798313278e-06, | |
| "loss": 0.8143, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.015410864659585008, | |
| "grad_norm": 1.123530387878418, | |
| "learning_rate": 9.99939123803526e-06, | |
| "loss": 0.8872, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.015465903461940668, | |
| "grad_norm": 1.0873669385910034, | |
| "learning_rate": 9.999384455363656e-06, | |
| "loss": 1.008, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.015520942264296329, | |
| "grad_norm": 1.5956637859344482, | |
| "learning_rate": 9.999377635118014e-06, | |
| "loss": 0.9456, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.01557598106665199, | |
| "grad_norm": 1.1471425294876099, | |
| "learning_rate": 9.999370777298389e-06, | |
| "loss": 0.9897, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.01563101986900765, | |
| "grad_norm": 0.9960193634033203, | |
| "learning_rate": 9.999363881904831e-06, | |
| "loss": 0.8196, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.01568605867136331, | |
| "grad_norm": 1.1033951044082642, | |
| "learning_rate": 9.999356948937393e-06, | |
| "loss": 0.879, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.015741097473718973, | |
| "grad_norm": 1.157765507698059, | |
| "learning_rate": 9.999349978396126e-06, | |
| "loss": 1.0116, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.015796136276074634, | |
| "grad_norm": 1.0472352504730225, | |
| "learning_rate": 9.999342970281084e-06, | |
| "loss": 0.8657, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.015851175078430292, | |
| "grad_norm": 1.1346659660339355, | |
| "learning_rate": 9.999335924592315e-06, | |
| "loss": 0.8482, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.015906213880785953, | |
| "grad_norm": 1.1164487600326538, | |
| "learning_rate": 9.999328841329879e-06, | |
| "loss": 1.0542, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.015961252683141615, | |
| "grad_norm": 1.1890591382980347, | |
| "learning_rate": 9.999321720493825e-06, | |
| "loss": 0.9598, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.016016291485497276, | |
| "grad_norm": 1.0419867038726807, | |
| "learning_rate": 9.999314562084205e-06, | |
| "loss": 0.9548, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.016071330287852938, | |
| "grad_norm": 1.0652042627334595, | |
| "learning_rate": 9.999307366101077e-06, | |
| "loss": 0.9359, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.016126369090208596, | |
| "grad_norm": 1.0166404247283936, | |
| "learning_rate": 9.999300132544492e-06, | |
| "loss": 0.9276, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.016181407892564257, | |
| "grad_norm": 1.1638866662979126, | |
| "learning_rate": 9.999292861414507e-06, | |
| "loss": 0.957, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.01623644669491992, | |
| "grad_norm": 1.5505993366241455, | |
| "learning_rate": 9.999285552711173e-06, | |
| "loss": 0.9878, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.01629148549727558, | |
| "grad_norm": 1.177262783050537, | |
| "learning_rate": 9.999278206434549e-06, | |
| "loss": 0.8631, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.01634652429963124, | |
| "grad_norm": 1.8578168153762817, | |
| "learning_rate": 9.999270822584687e-06, | |
| "loss": 0.9684, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.0164015631019869, | |
| "grad_norm": 1.2617360353469849, | |
| "learning_rate": 9.999263401161643e-06, | |
| "loss": 1.014, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.01645660190434256, | |
| "grad_norm": 0.9740132689476013, | |
| "learning_rate": 9.999255942165475e-06, | |
| "loss": 0.8606, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.016511640706698222, | |
| "grad_norm": 0.9821745753288269, | |
| "learning_rate": 9.999248445596238e-06, | |
| "loss": 0.8241, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.016566679509053883, | |
| "grad_norm": 1.0200445652008057, | |
| "learning_rate": 9.999240911453986e-06, | |
| "loss": 0.8256, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.016621718311409545, | |
| "grad_norm": 1.4100390672683716, | |
| "learning_rate": 9.999233339738779e-06, | |
| "loss": 0.9057, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.016676757113765206, | |
| "grad_norm": 1.056544303894043, | |
| "learning_rate": 9.99922573045067e-06, | |
| "loss": 1.0808, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.016731795916120864, | |
| "grad_norm": 0.9271026253700256, | |
| "learning_rate": 9.99921808358972e-06, | |
| "loss": 0.878, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.016786834718476525, | |
| "grad_norm": 0.9864157438278198, | |
| "learning_rate": 9.999210399155987e-06, | |
| "loss": 0.9198, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.016841873520832187, | |
| "grad_norm": 1.093995451927185, | |
| "learning_rate": 9.999202677149525e-06, | |
| "loss": 0.9794, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.016896912323187848, | |
| "grad_norm": 0.9717912077903748, | |
| "learning_rate": 9.999194917570395e-06, | |
| "loss": 0.8764, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.01695195112554351, | |
| "grad_norm": 1.0026428699493408, | |
| "learning_rate": 9.999187120418653e-06, | |
| "loss": 0.8526, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.017006989927899167, | |
| "grad_norm": 1.122870922088623, | |
| "learning_rate": 9.999179285694359e-06, | |
| "loss": 0.9773, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.01706202873025483, | |
| "grad_norm": 1.0522836446762085, | |
| "learning_rate": 9.999171413397572e-06, | |
| "loss": 1.0183, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.01711706753261049, | |
| "grad_norm": 0.9303658604621887, | |
| "learning_rate": 9.99916350352835e-06, | |
| "loss": 0.8402, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.01717210633496615, | |
| "grad_norm": 0.9606096148490906, | |
| "learning_rate": 9.999155556086755e-06, | |
| "loss": 0.9692, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.017227145137321813, | |
| "grad_norm": 1.176992416381836, | |
| "learning_rate": 9.999147571072844e-06, | |
| "loss": 0.8172, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.017282183939677474, | |
| "grad_norm": 1.1948801279067993, | |
| "learning_rate": 9.999139548486678e-06, | |
| "loss": 1.0205, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.017337222742033132, | |
| "grad_norm": 1.0064897537231445, | |
| "learning_rate": 9.999131488328318e-06, | |
| "loss": 0.9479, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.017392261544388794, | |
| "grad_norm": 1.048242449760437, | |
| "learning_rate": 9.999123390597822e-06, | |
| "loss": 0.9862, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.017447300346744455, | |
| "grad_norm": 1.12875497341156, | |
| "learning_rate": 9.999115255295256e-06, | |
| "loss": 0.9743, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.017502339149100116, | |
| "grad_norm": 1.0607460737228394, | |
| "learning_rate": 9.999107082420674e-06, | |
| "loss": 0.8878, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.017557377951455778, | |
| "grad_norm": 1.1480191946029663, | |
| "learning_rate": 9.999098871974144e-06, | |
| "loss": 0.8769, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.017612416753811436, | |
| "grad_norm": 1.1150004863739014, | |
| "learning_rate": 9.999090623955724e-06, | |
| "loss": 0.8615, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.017667455556167097, | |
| "grad_norm": 1.137839913368225, | |
| "learning_rate": 9.999082338365478e-06, | |
| "loss": 0.9703, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.01772249435852276, | |
| "grad_norm": 1.0883489847183228, | |
| "learning_rate": 9.999074015203467e-06, | |
| "loss": 0.9273, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.01777753316087842, | |
| "grad_norm": 1.0999557971954346, | |
| "learning_rate": 9.999065654469752e-06, | |
| "loss": 0.9605, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.01783257196323408, | |
| "grad_norm": 0.9911689758300781, | |
| "learning_rate": 9.999057256164401e-06, | |
| "loss": 0.9117, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.01788761076558974, | |
| "grad_norm": 1.040933609008789, | |
| "learning_rate": 9.999048820287472e-06, | |
| "loss": 0.9229, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.0179426495679454, | |
| "grad_norm": 1.4341392517089844, | |
| "learning_rate": 9.999040346839031e-06, | |
| "loss": 1.0718, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.017997688370301062, | |
| "grad_norm": 1.0246332883834839, | |
| "learning_rate": 9.99903183581914e-06, | |
| "loss": 0.9617, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.018052727172656723, | |
| "grad_norm": 10.162322998046875, | |
| "learning_rate": 9.999023287227863e-06, | |
| "loss": 1.0391, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.018107765975012385, | |
| "grad_norm": 1.3370027542114258, | |
| "learning_rate": 9.999014701065266e-06, | |
| "loss": 1.0211, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.018162804777368046, | |
| "grad_norm": 1.0146219730377197, | |
| "learning_rate": 9.999006077331413e-06, | |
| "loss": 0.8611, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.018217843579723704, | |
| "grad_norm": 1.0899269580841064, | |
| "learning_rate": 9.998997416026368e-06, | |
| "loss": 0.9209, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.018272882382079365, | |
| "grad_norm": 1.1343204975128174, | |
| "learning_rate": 9.998988717150198e-06, | |
| "loss": 0.9405, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.018327921184435027, | |
| "grad_norm": 1.2308380603790283, | |
| "learning_rate": 9.998979980702965e-06, | |
| "loss": 0.9579, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.018382959986790688, | |
| "grad_norm": 1.1433519124984741, | |
| "learning_rate": 9.998971206684737e-06, | |
| "loss": 1.0045, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.01843799878914635, | |
| "grad_norm": 1.0585781335830688, | |
| "learning_rate": 9.99896239509558e-06, | |
| "loss": 0.9171, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.018493037591502007, | |
| "grad_norm": 1.2735164165496826, | |
| "learning_rate": 9.99895354593556e-06, | |
| "loss": 1.1001, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.01854807639385767, | |
| "grad_norm": 1.2905755043029785, | |
| "learning_rate": 9.998944659204744e-06, | |
| "loss": 1.0294, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.01860311519621333, | |
| "grad_norm": 1.1442075967788696, | |
| "learning_rate": 9.998935734903198e-06, | |
| "loss": 0.9385, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.01865815399856899, | |
| "grad_norm": 1.1005232334136963, | |
| "learning_rate": 9.998926773030987e-06, | |
| "loss": 1.026, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.018713192800924653, | |
| "grad_norm": 1.2770785093307495, | |
| "learning_rate": 9.998917773588182e-06, | |
| "loss": 1.0015, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.01876823160328031, | |
| "grad_norm": 1.0963070392608643, | |
| "learning_rate": 9.998908736574849e-06, | |
| "loss": 0.9347, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.018823270405635972, | |
| "grad_norm": 1.10364830493927, | |
| "learning_rate": 9.998899661991055e-06, | |
| "loss": 0.869, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.018878309207991634, | |
| "grad_norm": 1.0364975929260254, | |
| "learning_rate": 9.99889054983687e-06, | |
| "loss": 0.9855, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.018933348010347295, | |
| "grad_norm": 1.104702115058899, | |
| "learning_rate": 9.998881400112362e-06, | |
| "loss": 0.9555, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.018988386812702956, | |
| "grad_norm": 0.9957441687583923, | |
| "learning_rate": 9.998872212817599e-06, | |
| "loss": 0.9634, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.019043425615058618, | |
| "grad_norm": 1.262271523475647, | |
| "learning_rate": 9.998862987952651e-06, | |
| "loss": 1.0133, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.019098464417414276, | |
| "grad_norm": 1.2075226306915283, | |
| "learning_rate": 9.998853725517587e-06, | |
| "loss": 1.0588, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.019153503219769937, | |
| "grad_norm": 1.0609898567199707, | |
| "learning_rate": 9.998844425512477e-06, | |
| "loss": 0.9952, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.0192085420221256, | |
| "grad_norm": 1.1930195093154907, | |
| "learning_rate": 9.998835087937389e-06, | |
| "loss": 0.9617, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.01926358082448126, | |
| "grad_norm": 1.2359932661056519, | |
| "learning_rate": 9.998825712792396e-06, | |
| "loss": 0.8768, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.01931861962683692, | |
| "grad_norm": 0.9984115362167358, | |
| "learning_rate": 9.998816300077566e-06, | |
| "loss": 0.8205, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.01937365842919258, | |
| "grad_norm": 1.6853677034378052, | |
| "learning_rate": 9.998806849792972e-06, | |
| "loss": 0.9066, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.01942869723154824, | |
| "grad_norm": 1.2869856357574463, | |
| "learning_rate": 9.998797361938683e-06, | |
| "loss": 1.0054, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.019483736033903902, | |
| "grad_norm": 1.2791584730148315, | |
| "learning_rate": 9.99878783651477e-06, | |
| "loss": 0.7627, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.019538774836259563, | |
| "grad_norm": 1.0795867443084717, | |
| "learning_rate": 9.998778273521307e-06, | |
| "loss": 0.9343, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.019593813638615225, | |
| "grad_norm": 1.0926088094711304, | |
| "learning_rate": 9.998768672958365e-06, | |
| "loss": 0.943, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.019648852440970886, | |
| "grad_norm": 1.0530847311019897, | |
| "learning_rate": 9.998759034826015e-06, | |
| "loss": 0.9656, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.019703891243326544, | |
| "grad_norm": 1.1793400049209595, | |
| "learning_rate": 9.99874935912433e-06, | |
| "loss": 0.9799, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.019758930045682205, | |
| "grad_norm": 1.0726191997528076, | |
| "learning_rate": 9.998739645853383e-06, | |
| "loss": 0.8739, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.019813968848037867, | |
| "grad_norm": 1.0488981008529663, | |
| "learning_rate": 9.998729895013246e-06, | |
| "loss": 0.8986, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.019869007650393528, | |
| "grad_norm": 1.8267477750778198, | |
| "learning_rate": 9.998720106603993e-06, | |
| "loss": 0.9175, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.01992404645274919, | |
| "grad_norm": 0.9868306517601013, | |
| "learning_rate": 9.9987102806257e-06, | |
| "loss": 0.9609, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.019979085255104848, | |
| "grad_norm": 1.0171183347702026, | |
| "learning_rate": 9.998700417078438e-06, | |
| "loss": 0.8904, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.02003412405746051, | |
| "grad_norm": 0.9800812602043152, | |
| "learning_rate": 9.998690515962282e-06, | |
| "loss": 0.8344, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.02008916285981617, | |
| "grad_norm": 1.024707317352295, | |
| "learning_rate": 9.998680577277304e-06, | |
| "loss": 0.9026, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.02014420166217183, | |
| "grad_norm": 1.1056619882583618, | |
| "learning_rate": 9.998670601023584e-06, | |
| "loss": 1.017, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.020199240464527493, | |
| "grad_norm": 1.0555908679962158, | |
| "learning_rate": 9.998660587201191e-06, | |
| "loss": 0.9627, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.02025427926688315, | |
| "grad_norm": 0.9502031803131104, | |
| "learning_rate": 9.998650535810204e-06, | |
| "loss": 0.935, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.020309318069238812, | |
| "grad_norm": 1.0355613231658936, | |
| "learning_rate": 9.998640446850699e-06, | |
| "loss": 0.9946, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.020364356871594474, | |
| "grad_norm": 0.9906355142593384, | |
| "learning_rate": 9.99863032032275e-06, | |
| "loss": 0.9389, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.020419395673950135, | |
| "grad_norm": 0.9483911395072937, | |
| "learning_rate": 9.99862015622643e-06, | |
| "loss": 0.979, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.020474434476305797, | |
| "grad_norm": 0.9769986271858215, | |
| "learning_rate": 9.998609954561822e-06, | |
| "loss": 0.8972, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.020529473278661458, | |
| "grad_norm": 1.1682699918746948, | |
| "learning_rate": 9.998599715329e-06, | |
| "loss": 0.943, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.020584512081017116, | |
| "grad_norm": 1.007912516593933, | |
| "learning_rate": 9.99858943852804e-06, | |
| "loss": 0.8825, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.020639550883372777, | |
| "grad_norm": 0.9788785576820374, | |
| "learning_rate": 9.99857912415902e-06, | |
| "loss": 0.9667, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.02069458968572844, | |
| "grad_norm": 1.0804275274276733, | |
| "learning_rate": 9.998568772222017e-06, | |
| "loss": 1.0026, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.0207496284880841, | |
| "grad_norm": 1.0859237909317017, | |
| "learning_rate": 9.998558382717109e-06, | |
| "loss": 0.9592, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.02080466729043976, | |
| "grad_norm": 1.2925337553024292, | |
| "learning_rate": 9.998547955644373e-06, | |
| "loss": 0.9067, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.02085970609279542, | |
| "grad_norm": 0.9853373765945435, | |
| "learning_rate": 9.99853749100389e-06, | |
| "loss": 0.9538, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.02091474489515108, | |
| "grad_norm": 1.0461076498031616, | |
| "learning_rate": 9.998526988795738e-06, | |
| "loss": 0.9261, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.020969783697506742, | |
| "grad_norm": 1.024559497833252, | |
| "learning_rate": 9.998516449019995e-06, | |
| "loss": 0.9117, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.021024822499862404, | |
| "grad_norm": 1.1474825143814087, | |
| "learning_rate": 9.998505871676739e-06, | |
| "loss": 1.0177, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.021079861302218065, | |
| "grad_norm": 0.9587596654891968, | |
| "learning_rate": 9.998495256766051e-06, | |
| "loss": 0.8809, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.021134900104573723, | |
| "grad_norm": 0.9505122303962708, | |
| "learning_rate": 9.998484604288013e-06, | |
| "loss": 0.9266, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.021189938906929384, | |
| "grad_norm": 0.9625647664070129, | |
| "learning_rate": 9.9984739142427e-06, | |
| "loss": 0.9073, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.021244977709285046, | |
| "grad_norm": 0.9650934338569641, | |
| "learning_rate": 9.998463186630196e-06, | |
| "loss": 0.9042, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.021300016511640707, | |
| "grad_norm": 1.0289491415023804, | |
| "learning_rate": 9.99845242145058e-06, | |
| "loss": 0.929, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.02135505531399637, | |
| "grad_norm": 0.9543869495391846, | |
| "learning_rate": 9.998441618703935e-06, | |
| "loss": 0.9406, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.02141009411635203, | |
| "grad_norm": 0.9276942610740662, | |
| "learning_rate": 9.99843077839034e-06, | |
| "loss": 0.8982, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.021465132918707688, | |
| "grad_norm": 0.9264664053916931, | |
| "learning_rate": 9.998419900509877e-06, | |
| "loss": 0.7255, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.02152017172106335, | |
| "grad_norm": 0.9961187243461609, | |
| "learning_rate": 9.998408985062628e-06, | |
| "loss": 0.9826, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.02157521052341901, | |
| "grad_norm": 0.966596245765686, | |
| "learning_rate": 9.998398032048676e-06, | |
| "loss": 0.8159, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.021630249325774672, | |
| "grad_norm": 1.1336095333099365, | |
| "learning_rate": 9.998387041468102e-06, | |
| "loss": 0.9289, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.021685288128130333, | |
| "grad_norm": 1.0453619956970215, | |
| "learning_rate": 9.998376013320989e-06, | |
| "loss": 0.8816, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.02174032693048599, | |
| "grad_norm": 0.8961821794509888, | |
| "learning_rate": 9.998364947607419e-06, | |
| "loss": 0.871, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.021795365732841653, | |
| "grad_norm": 1.3420332670211792, | |
| "learning_rate": 9.998353844327477e-06, | |
| "loss": 0.9338, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.021850404535197314, | |
| "grad_norm": 0.9635335206985474, | |
| "learning_rate": 9.998342703481246e-06, | |
| "loss": 0.9592, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.021905443337552975, | |
| "grad_norm": 1.3322341442108154, | |
| "learning_rate": 9.998331525068807e-06, | |
| "loss": 1.0974, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.021960482139908637, | |
| "grad_norm": 1.017220377922058, | |
| "learning_rate": 9.998320309090247e-06, | |
| "loss": 0.9827, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.022015520942264295, | |
| "grad_norm": 1.0080329179763794, | |
| "learning_rate": 9.99830905554565e-06, | |
| "loss": 0.877, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.022070559744619956, | |
| "grad_norm": 0.9883211255073547, | |
| "learning_rate": 9.998297764435101e-06, | |
| "loss": 0.9625, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.022125598546975617, | |
| "grad_norm": 1.0948412418365479, | |
| "learning_rate": 9.998286435758684e-06, | |
| "loss": 0.9058, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.02218063734933128, | |
| "grad_norm": 0.9402000308036804, | |
| "learning_rate": 9.998275069516482e-06, | |
| "loss": 0.8882, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.02223567615168694, | |
| "grad_norm": 0.9858806133270264, | |
| "learning_rate": 9.998263665708583e-06, | |
| "loss": 0.9086, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.0222907149540426, | |
| "grad_norm": 1.0556131601333618, | |
| "learning_rate": 9.998252224335073e-06, | |
| "loss": 0.9583, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.02234575375639826, | |
| "grad_norm": 1.092766284942627, | |
| "learning_rate": 9.998240745396037e-06, | |
| "loss": 0.9124, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.02240079255875392, | |
| "grad_norm": 1.1902250051498413, | |
| "learning_rate": 9.998229228891563e-06, | |
| "loss": 1.0566, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.022455831361109582, | |
| "grad_norm": 1.067906141281128, | |
| "learning_rate": 9.998217674821734e-06, | |
| "loss": 0.9823, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.022510870163465244, | |
| "grad_norm": 1.0051710605621338, | |
| "learning_rate": 9.998206083186638e-06, | |
| "loss": 0.9141, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.022565908965820905, | |
| "grad_norm": 1.046412467956543, | |
| "learning_rate": 9.998194453986367e-06, | |
| "loss": 0.9439, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.022620947768176563, | |
| "grad_norm": 1.1103553771972656, | |
| "learning_rate": 9.998182787221e-06, | |
| "loss": 0.9494, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.022675986570532224, | |
| "grad_norm": 1.0508466958999634, | |
| "learning_rate": 9.998171082890632e-06, | |
| "loss": 0.9202, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.022731025372887886, | |
| "grad_norm": 1.1364226341247559, | |
| "learning_rate": 9.998159340995347e-06, | |
| "loss": 0.9859, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.022786064175243547, | |
| "grad_norm": 1.2073607444763184, | |
| "learning_rate": 9.998147561535234e-06, | |
| "loss": 0.8883, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.02284110297759921, | |
| "grad_norm": 1.0657012462615967, | |
| "learning_rate": 9.998135744510384e-06, | |
| "loss": 0.8321, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.02289614177995487, | |
| "grad_norm": 1.0101548433303833, | |
| "learning_rate": 9.998123889920881e-06, | |
| "loss": 0.9374, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.022951180582310528, | |
| "grad_norm": 1.057455062866211, | |
| "learning_rate": 9.998111997766817e-06, | |
| "loss": 0.8831, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.02300621938466619, | |
| "grad_norm": 1.206092357635498, | |
| "learning_rate": 9.998100068048282e-06, | |
| "loss": 0.8812, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.02306125818702185, | |
| "grad_norm": 1.0709773302078247, | |
| "learning_rate": 9.998088100765366e-06, | |
| "loss": 0.9486, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.023116296989377512, | |
| "grad_norm": 1.066469669342041, | |
| "learning_rate": 9.998076095918156e-06, | |
| "loss": 1.0229, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.023171335791733173, | |
| "grad_norm": 1.0443583726882935, | |
| "learning_rate": 9.998064053506744e-06, | |
| "loss": 0.8615, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.02322637459408883, | |
| "grad_norm": 1.103096842765808, | |
| "learning_rate": 9.99805197353122e-06, | |
| "loss": 0.9909, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.023281413396444493, | |
| "grad_norm": 0.9804643392562866, | |
| "learning_rate": 9.998039855991677e-06, | |
| "loss": 0.9214, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.023336452198800154, | |
| "grad_norm": 0.9880676865577698, | |
| "learning_rate": 9.998027700888202e-06, | |
| "loss": 0.9345, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.023391491001155815, | |
| "grad_norm": 0.9633826017379761, | |
| "learning_rate": 9.99801550822089e-06, | |
| "loss": 0.9897, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.023446529803511477, | |
| "grad_norm": 1.0159331560134888, | |
| "learning_rate": 9.998003277989831e-06, | |
| "loss": 0.9385, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.023501568605867135, | |
| "grad_norm": 1.009667158126831, | |
| "learning_rate": 9.99799101019512e-06, | |
| "loss": 0.9013, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.023556607408222796, | |
| "grad_norm": 0.9478578567504883, | |
| "learning_rate": 9.997978704836842e-06, | |
| "loss": 0.8775, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.023611646210578457, | |
| "grad_norm": 1.013181447982788, | |
| "learning_rate": 9.997966361915096e-06, | |
| "loss": 0.8797, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.02366668501293412, | |
| "grad_norm": 1.0337481498718262, | |
| "learning_rate": 9.997953981429974e-06, | |
| "loss": 1.0047, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.02372172381528978, | |
| "grad_norm": 0.9423721432685852, | |
| "learning_rate": 9.997941563381566e-06, | |
| "loss": 0.8639, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.02377676261764544, | |
| "grad_norm": 1.100492000579834, | |
| "learning_rate": 9.997929107769968e-06, | |
| "loss": 1.0022, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.0238318014200011, | |
| "grad_norm": 1.1232364177703857, | |
| "learning_rate": 9.997916614595272e-06, | |
| "loss": 0.9145, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.02388684022235676, | |
| "grad_norm": 0.9466833472251892, | |
| "learning_rate": 9.997904083857572e-06, | |
| "loss": 0.9397, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.023941879024712422, | |
| "grad_norm": 0.9514566659927368, | |
| "learning_rate": 9.997891515556963e-06, | |
| "loss": 0.8025, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.023996917827068084, | |
| "grad_norm": 0.9292222261428833, | |
| "learning_rate": 9.997878909693539e-06, | |
| "loss": 0.7739, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.024051956629423745, | |
| "grad_norm": 1.1049963235855103, | |
| "learning_rate": 9.997866266267397e-06, | |
| "loss": 0.9439, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.024106995431779403, | |
| "grad_norm": 1.0938019752502441, | |
| "learning_rate": 9.997853585278627e-06, | |
| "loss": 0.9479, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.024162034234135064, | |
| "grad_norm": 1.0423611402511597, | |
| "learning_rate": 9.997840866727331e-06, | |
| "loss": 0.9309, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.024217073036490726, | |
| "grad_norm": 1.0584756135940552, | |
| "learning_rate": 9.997828110613598e-06, | |
| "loss": 1.0218, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.024272111838846387, | |
| "grad_norm": 0.9986408948898315, | |
| "learning_rate": 9.997815316937527e-06, | |
| "loss": 0.9734, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.02432715064120205, | |
| "grad_norm": 0.9680983424186707, | |
| "learning_rate": 9.997802485699215e-06, | |
| "loss": 0.9286, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.024382189443557706, | |
| "grad_norm": 1.2231700420379639, | |
| "learning_rate": 9.997789616898757e-06, | |
| "loss": 0.8083, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.024437228245913368, | |
| "grad_norm": 1.0064021348953247, | |
| "learning_rate": 9.99777671053625e-06, | |
| "loss": 0.9161, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.02449226704826903, | |
| "grad_norm": 0.9658541679382324, | |
| "learning_rate": 9.99776376661179e-06, | |
| "loss": 0.8027, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.02454730585062469, | |
| "grad_norm": 0.9440343379974365, | |
| "learning_rate": 9.997750785125477e-06, | |
| "loss": 0.9124, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.024602344652980352, | |
| "grad_norm": 0.998792827129364, | |
| "learning_rate": 9.997737766077404e-06, | |
| "loss": 0.8699, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.024657383455336013, | |
| "grad_norm": 1.430880069732666, | |
| "learning_rate": 9.997724709467676e-06, | |
| "loss": 0.9158, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.02471242225769167, | |
| "grad_norm": 0.9737820029258728, | |
| "learning_rate": 9.997711615296384e-06, | |
| "loss": 0.9496, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.024767461060047333, | |
| "grad_norm": 0.9710075855255127, | |
| "learning_rate": 9.997698483563629e-06, | |
| "loss": 0.8714, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.024822499862402994, | |
| "grad_norm": 1.5286253690719604, | |
| "learning_rate": 9.997685314269511e-06, | |
| "loss": 0.8421, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.024877538664758655, | |
| "grad_norm": 1.0269445180892944, | |
| "learning_rate": 9.99767210741413e-06, | |
| "loss": 1.0131, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.024932577467114317, | |
| "grad_norm": 0.9780508279800415, | |
| "learning_rate": 9.99765886299758e-06, | |
| "loss": 0.9897, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.024987616269469975, | |
| "grad_norm": 0.998332679271698, | |
| "learning_rate": 9.997645581019965e-06, | |
| "loss": 0.9647, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.025042655071825636, | |
| "grad_norm": 1.7062602043151855, | |
| "learning_rate": 9.997632261481383e-06, | |
| "loss": 1.0729, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.025097693874181298, | |
| "grad_norm": 0.9793694615364075, | |
| "learning_rate": 9.997618904381936e-06, | |
| "loss": 0.9556, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.02515273267653696, | |
| "grad_norm": 1.0183895826339722, | |
| "learning_rate": 9.997605509721721e-06, | |
| "loss": 0.9194, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.02520777147889262, | |
| "grad_norm": 1.0288400650024414, | |
| "learning_rate": 9.997592077500844e-06, | |
| "loss": 0.955, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.025262810281248282, | |
| "grad_norm": 0.9551253914833069, | |
| "learning_rate": 9.997578607719401e-06, | |
| "loss": 0.8498, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.02531784908360394, | |
| "grad_norm": 0.9648008942604065, | |
| "learning_rate": 9.997565100377494e-06, | |
| "loss": 0.9306, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.0253728878859596, | |
| "grad_norm": 0.9206677675247192, | |
| "learning_rate": 9.997551555475225e-06, | |
| "loss": 0.7874, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.025427926688315262, | |
| "grad_norm": 1.0479545593261719, | |
| "learning_rate": 9.997537973012698e-06, | |
| "loss": 0.9201, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.025482965490670924, | |
| "grad_norm": 1.0329946279525757, | |
| "learning_rate": 9.997524352990013e-06, | |
| "loss": 0.9577, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.025538004293026585, | |
| "grad_norm": 1.1177828311920166, | |
| "learning_rate": 9.997510695407273e-06, | |
| "loss": 1.0041, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.025593043095382243, | |
| "grad_norm": 1.0351577997207642, | |
| "learning_rate": 9.99749700026458e-06, | |
| "loss": 0.9952, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.025648081897737905, | |
| "grad_norm": 0.905274510383606, | |
| "learning_rate": 9.997483267562035e-06, | |
| "loss": 0.8185, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.025703120700093566, | |
| "grad_norm": 1.0749776363372803, | |
| "learning_rate": 9.997469497299747e-06, | |
| "loss": 1.0611, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.025758159502449227, | |
| "grad_norm": 0.8972223401069641, | |
| "learning_rate": 9.997455689477815e-06, | |
| "loss": 0.8994, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.02581319830480489, | |
| "grad_norm": 1.0669914484024048, | |
| "learning_rate": 9.997441844096342e-06, | |
| "loss": 1.06, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.025868237107160547, | |
| "grad_norm": 1.0431914329528809, | |
| "learning_rate": 9.997427961155435e-06, | |
| "loss": 0.8657, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.025923275909516208, | |
| "grad_norm": 0.9609962701797485, | |
| "learning_rate": 9.997414040655198e-06, | |
| "loss": 0.8864, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.02597831471187187, | |
| "grad_norm": 1.0829721689224243, | |
| "learning_rate": 9.997400082595735e-06, | |
| "loss": 0.9221, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.02603335351422753, | |
| "grad_norm": 0.992082953453064, | |
| "learning_rate": 9.99738608697715e-06, | |
| "loss": 0.8455, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.026088392316583192, | |
| "grad_norm": 1.0486301183700562, | |
| "learning_rate": 9.997372053799547e-06, | |
| "loss": 0.8729, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.026143431118938854, | |
| "grad_norm": 1.0328491926193237, | |
| "learning_rate": 9.997357983063036e-06, | |
| "loss": 0.8788, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.02619846992129451, | |
| "grad_norm": 0.963333249092102, | |
| "learning_rate": 9.997343874767719e-06, | |
| "loss": 0.892, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.026253508723650173, | |
| "grad_norm": 1.1606497764587402, | |
| "learning_rate": 9.997329728913704e-06, | |
| "loss": 0.9984, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.026308547526005834, | |
| "grad_norm": 1.241650104522705, | |
| "learning_rate": 9.997315545501096e-06, | |
| "loss": 0.946, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.026363586328361496, | |
| "grad_norm": 1.008004069328308, | |
| "learning_rate": 9.99730132453e-06, | |
| "loss": 0.849, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.026418625130717157, | |
| "grad_norm": 0.9883478879928589, | |
| "learning_rate": 9.997287066000527e-06, | |
| "loss": 0.9478, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.026473663933072815, | |
| "grad_norm": 1.0224446058273315, | |
| "learning_rate": 9.997272769912783e-06, | |
| "loss": 1.0318, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.026528702735428476, | |
| "grad_norm": 0.9412569403648376, | |
| "learning_rate": 9.997258436266874e-06, | |
| "loss": 0.9119, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.026583741537784138, | |
| "grad_norm": 0.9214537739753723, | |
| "learning_rate": 9.997244065062906e-06, | |
| "loss": 0.8785, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.0266387803401398, | |
| "grad_norm": 1.0015628337860107, | |
| "learning_rate": 9.997229656300991e-06, | |
| "loss": 0.8869, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.02669381914249546, | |
| "grad_norm": 0.8965190052986145, | |
| "learning_rate": 9.997215209981237e-06, | |
| "loss": 0.7009, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.02674885794485112, | |
| "grad_norm": 1.1976135969161987, | |
| "learning_rate": 9.997200726103749e-06, | |
| "loss": 0.9795, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.02680389674720678, | |
| "grad_norm": 0.864780843257904, | |
| "learning_rate": 9.997186204668639e-06, | |
| "loss": 0.7687, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.02685893554956244, | |
| "grad_norm": 0.9946566820144653, | |
| "learning_rate": 9.997171645676013e-06, | |
| "loss": 0.9672, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.026913974351918103, | |
| "grad_norm": 1.043835997581482, | |
| "learning_rate": 9.997157049125985e-06, | |
| "loss": 0.862, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.026969013154273764, | |
| "grad_norm": 0.9697456955909729, | |
| "learning_rate": 9.99714241501866e-06, | |
| "loss": 0.8368, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.027024051956629425, | |
| "grad_norm": 0.9975618124008179, | |
| "learning_rate": 9.997127743354153e-06, | |
| "loss": 0.8739, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.027079090758985083, | |
| "grad_norm": 1.0055313110351562, | |
| "learning_rate": 9.99711303413257e-06, | |
| "loss": 0.9227, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.027134129561340745, | |
| "grad_norm": 1.0418384075164795, | |
| "learning_rate": 9.997098287354024e-06, | |
| "loss": 0.9978, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.027189168363696406, | |
| "grad_norm": 0.8648970723152161, | |
| "learning_rate": 9.997083503018625e-06, | |
| "loss": 0.8363, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.027244207166052067, | |
| "grad_norm": 1.13506019115448, | |
| "learning_rate": 9.997068681126483e-06, | |
| "loss": 0.8851, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.02729924596840773, | |
| "grad_norm": 0.974400520324707, | |
| "learning_rate": 9.997053821677712e-06, | |
| "loss": 0.8533, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.027354284770763387, | |
| "grad_norm": 1.226507544517517, | |
| "learning_rate": 9.997038924672419e-06, | |
| "loss": 0.8586, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.027409323573119048, | |
| "grad_norm": 1.004753589630127, | |
| "learning_rate": 9.997023990110721e-06, | |
| "loss": 0.8974, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.02746436237547471, | |
| "grad_norm": 1.0492571592330933, | |
| "learning_rate": 9.997009017992729e-06, | |
| "loss": 0.8457, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.02751940117783037, | |
| "grad_norm": 1.0068167448043823, | |
| "learning_rate": 9.996994008318554e-06, | |
| "loss": 0.9608, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.027574439980186032, | |
| "grad_norm": 0.9686044454574585, | |
| "learning_rate": 9.996978961088311e-06, | |
| "loss": 0.9041, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.027629478782541694, | |
| "grad_norm": 1.281728744506836, | |
| "learning_rate": 9.99696387630211e-06, | |
| "loss": 0.9739, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.02768451758489735, | |
| "grad_norm": 0.9069758653640747, | |
| "learning_rate": 9.996948753960065e-06, | |
| "loss": 0.8467, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.027739556387253013, | |
| "grad_norm": 1.0337222814559937, | |
| "learning_rate": 9.996933594062293e-06, | |
| "loss": 0.9638, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.027794595189608674, | |
| "grad_norm": 0.9695359468460083, | |
| "learning_rate": 9.996918396608905e-06, | |
| "loss": 0.8986, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.027849633991964336, | |
| "grad_norm": 0.9120615124702454, | |
| "learning_rate": 9.996903161600016e-06, | |
| "loss": 0.9103, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.027904672794319997, | |
| "grad_norm": 0.9736546874046326, | |
| "learning_rate": 9.996887889035741e-06, | |
| "loss": 0.9308, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.027959711596675655, | |
| "grad_norm": 1.0184897184371948, | |
| "learning_rate": 9.996872578916192e-06, | |
| "loss": 0.8978, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.028014750399031316, | |
| "grad_norm": 0.9791838526725769, | |
| "learning_rate": 9.996857231241489e-06, | |
| "loss": 0.8639, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.028069789201386978, | |
| "grad_norm": 1.2985681295394897, | |
| "learning_rate": 9.996841846011742e-06, | |
| "loss": 0.9581, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.02812482800374264, | |
| "grad_norm": 1.0647368431091309, | |
| "learning_rate": 9.996826423227071e-06, | |
| "loss": 1.0565, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.0281798668060983, | |
| "grad_norm": 1.0336421728134155, | |
| "learning_rate": 9.996810962887591e-06, | |
| "loss": 1.008, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.02823490560845396, | |
| "grad_norm": 1.1838933229446411, | |
| "learning_rate": 9.996795464993416e-06, | |
| "loss": 0.8359, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.02828994441080962, | |
| "grad_norm": 0.9898360371589661, | |
| "learning_rate": 9.996779929544663e-06, | |
| "loss": 0.8501, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.02834498321316528, | |
| "grad_norm": 0.9836066365242004, | |
| "learning_rate": 9.99676435654145e-06, | |
| "loss": 0.8795, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.028400022015520943, | |
| "grad_norm": 1.0621601343154907, | |
| "learning_rate": 9.996748745983895e-06, | |
| "loss": 0.8746, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.028455060817876604, | |
| "grad_norm": 1.0082437992095947, | |
| "learning_rate": 9.996733097872113e-06, | |
| "loss": 0.9278, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.028510099620232265, | |
| "grad_norm": 0.9903931617736816, | |
| "learning_rate": 9.996717412206222e-06, | |
| "loss": 0.8264, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.028565138422587923, | |
| "grad_norm": 1.0797243118286133, | |
| "learning_rate": 9.996701688986342e-06, | |
| "loss": 1.0077, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.028620177224943585, | |
| "grad_norm": 1.147133231163025, | |
| "learning_rate": 9.99668592821259e-06, | |
| "loss": 0.9374, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.028675216027299246, | |
| "grad_norm": 0.9993947744369507, | |
| "learning_rate": 9.996670129885082e-06, | |
| "loss": 0.9562, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.028730254829654907, | |
| "grad_norm": 0.8580895066261292, | |
| "learning_rate": 9.99665429400394e-06, | |
| "loss": 0.7985, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.02878529363201057, | |
| "grad_norm": 0.9251388907432556, | |
| "learning_rate": 9.996638420569281e-06, | |
| "loss": 0.7323, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.028840332434366227, | |
| "grad_norm": 1.0010193586349487, | |
| "learning_rate": 9.996622509581227e-06, | |
| "loss": 0.9316, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.028895371236721888, | |
| "grad_norm": 0.9822579026222229, | |
| "learning_rate": 9.996606561039894e-06, | |
| "loss": 0.8978, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.02895041003907755, | |
| "grad_norm": 1.0760595798492432, | |
| "learning_rate": 9.996590574945403e-06, | |
| "loss": 0.9125, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.02900544884143321, | |
| "grad_norm": 1.138869285583496, | |
| "learning_rate": 9.996574551297876e-06, | |
| "loss": 0.8185, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.029060487643788872, | |
| "grad_norm": 1.002994179725647, | |
| "learning_rate": 9.996558490097433e-06, | |
| "loss": 0.9404, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.02911552644614453, | |
| "grad_norm": 0.9550611972808838, | |
| "learning_rate": 9.996542391344194e-06, | |
| "loss": 0.859, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.02917056524850019, | |
| "grad_norm": 0.9236055612564087, | |
| "learning_rate": 9.996526255038277e-06, | |
| "loss": 0.7758, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.029225604050855853, | |
| "grad_norm": 1.103966474533081, | |
| "learning_rate": 9.996510081179808e-06, | |
| "loss": 1.0147, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.029280642853211514, | |
| "grad_norm": 0.9884665012359619, | |
| "learning_rate": 9.996493869768906e-06, | |
| "loss": 0.8784, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.029335681655567176, | |
| "grad_norm": 0.9173223376274109, | |
| "learning_rate": 9.996477620805694e-06, | |
| "loss": 0.8741, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.029390720457922837, | |
| "grad_norm": 0.965548574924469, | |
| "learning_rate": 9.996461334290294e-06, | |
| "loss": 0.8989, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.029445759260278495, | |
| "grad_norm": 0.9939296245574951, | |
| "learning_rate": 9.996445010222828e-06, | |
| "loss": 0.8552, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.029500798062634156, | |
| "grad_norm": 1.0081578493118286, | |
| "learning_rate": 9.996428648603417e-06, | |
| "loss": 0.9138, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.029555836864989818, | |
| "grad_norm": 1.0139487981796265, | |
| "learning_rate": 9.996412249432188e-06, | |
| "loss": 0.9452, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.02961087566734548, | |
| "grad_norm": 0.9463647603988647, | |
| "learning_rate": 9.996395812709262e-06, | |
| "loss": 0.8721, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.02966591446970114, | |
| "grad_norm": 0.9981473684310913, | |
| "learning_rate": 9.99637933843476e-06, | |
| "loss": 0.7791, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.0297209532720568, | |
| "grad_norm": 1.1637190580368042, | |
| "learning_rate": 9.996362826608812e-06, | |
| "loss": 0.8798, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.02977599207441246, | |
| "grad_norm": 2.2887051105499268, | |
| "learning_rate": 9.996346277231536e-06, | |
| "loss": 0.9303, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.02983103087676812, | |
| "grad_norm": 0.9173391461372375, | |
| "learning_rate": 9.99632969030306e-06, | |
| "loss": 0.8627, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.029886069679123783, | |
| "grad_norm": 1.033355474472046, | |
| "learning_rate": 9.996313065823506e-06, | |
| "loss": 0.9906, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.029941108481479444, | |
| "grad_norm": 0.9286639094352722, | |
| "learning_rate": 9.996296403793002e-06, | |
| "loss": 0.7043, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.029996147283835102, | |
| "grad_norm": 0.963238000869751, | |
| "learning_rate": 9.996279704211671e-06, | |
| "loss": 1.0236, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.030051186086190763, | |
| "grad_norm": 1.0275089740753174, | |
| "learning_rate": 9.99626296707964e-06, | |
| "loss": 0.976, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.030106224888546425, | |
| "grad_norm": 1.0944674015045166, | |
| "learning_rate": 9.996246192397032e-06, | |
| "loss": 0.9209, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.030161263690902086, | |
| "grad_norm": 0.9620945453643799, | |
| "learning_rate": 9.996229380163976e-06, | |
| "loss": 0.8973, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.030216302493257748, | |
| "grad_norm": 1.032549500465393, | |
| "learning_rate": 9.996212530380597e-06, | |
| "loss": 0.892, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.03027134129561341, | |
| "grad_norm": 1.0433719158172607, | |
| "learning_rate": 9.996195643047023e-06, | |
| "loss": 0.8428, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.030326380097969067, | |
| "grad_norm": 1.1541085243225098, | |
| "learning_rate": 9.996178718163378e-06, | |
| "loss": 0.9084, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.03038141890032473, | |
| "grad_norm": 0.9386873245239258, | |
| "learning_rate": 9.996161755729793e-06, | |
| "loss": 0.9246, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.03043645770268039, | |
| "grad_norm": 1.092236042022705, | |
| "learning_rate": 9.996144755746393e-06, | |
| "loss": 0.8419, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.03049149650503605, | |
| "grad_norm": 0.9517606496810913, | |
| "learning_rate": 9.996127718213306e-06, | |
| "loss": 0.9002, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.030546535307391712, | |
| "grad_norm": 0.965972900390625, | |
| "learning_rate": 9.996110643130661e-06, | |
| "loss": 0.9197, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.03060157410974737, | |
| "grad_norm": 0.9396095275878906, | |
| "learning_rate": 9.996093530498586e-06, | |
| "loss": 0.8686, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.030656612912103032, | |
| "grad_norm": 1.0154120922088623, | |
| "learning_rate": 9.99607638031721e-06, | |
| "loss": 0.9773, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.030711651714458693, | |
| "grad_norm": 1.3572301864624023, | |
| "learning_rate": 9.99605919258666e-06, | |
| "loss": 0.911, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.030766690516814355, | |
| "grad_norm": 0.968278169631958, | |
| "learning_rate": 9.996041967307066e-06, | |
| "loss": 0.7704, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.030821729319170016, | |
| "grad_norm": 0.9867869019508362, | |
| "learning_rate": 9.99602470447856e-06, | |
| "loss": 0.873, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.030876768121525677, | |
| "grad_norm": 1.056450605392456, | |
| "learning_rate": 9.996007404101269e-06, | |
| "loss": 0.941, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.030931806923881335, | |
| "grad_norm": 1.0419799089431763, | |
| "learning_rate": 9.995990066175321e-06, | |
| "loss": 0.957, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.030986845726236997, | |
| "grad_norm": 0.9789314866065979, | |
| "learning_rate": 9.995972690700852e-06, | |
| "loss": 0.9229, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.031041884528592658, | |
| "grad_norm": 0.917783796787262, | |
| "learning_rate": 9.995955277677989e-06, | |
| "loss": 0.8186, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.03109692333094832, | |
| "grad_norm": 1.0231432914733887, | |
| "learning_rate": 9.995937827106863e-06, | |
| "loss": 0.8624, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.03115196213330398, | |
| "grad_norm": 0.9552083015441895, | |
| "learning_rate": 9.995920338987605e-06, | |
| "loss": 0.7967, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.03120700093565964, | |
| "grad_norm": 0.9441083669662476, | |
| "learning_rate": 9.995902813320349e-06, | |
| "loss": 0.8471, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.0312620397380153, | |
| "grad_norm": 1.0025299787521362, | |
| "learning_rate": 9.995885250105223e-06, | |
| "loss": 0.8646, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.03131707854037096, | |
| "grad_norm": 0.8997280597686768, | |
| "learning_rate": 9.99586764934236e-06, | |
| "loss": 0.8736, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.03137211734272662, | |
| "grad_norm": 0.9090663194656372, | |
| "learning_rate": 9.995850011031896e-06, | |
| "loss": 0.8548, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.031427156145082284, | |
| "grad_norm": 0.9641294479370117, | |
| "learning_rate": 9.995832335173959e-06, | |
| "loss": 0.8667, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.031482194947437946, | |
| "grad_norm": 0.9165804982185364, | |
| "learning_rate": 9.995814621768682e-06, | |
| "loss": 0.803, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.03153723374979361, | |
| "grad_norm": 0.9672492742538452, | |
| "learning_rate": 9.995796870816202e-06, | |
| "loss": 0.8335, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.03159227255214927, | |
| "grad_norm": 0.9359404444694519, | |
| "learning_rate": 9.995779082316648e-06, | |
| "loss": 0.8294, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.03164731135450492, | |
| "grad_norm": 0.926925003528595, | |
| "learning_rate": 9.995761256270157e-06, | |
| "loss": 0.7714, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.031702350156860584, | |
| "grad_norm": 1.1848629713058472, | |
| "learning_rate": 9.995743392676862e-06, | |
| "loss": 0.8925, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.031757388959216246, | |
| "grad_norm": 0.9624786972999573, | |
| "learning_rate": 9.995725491536897e-06, | |
| "loss": 0.9292, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.03181242776157191, | |
| "grad_norm": 0.9479736089706421, | |
| "learning_rate": 9.995707552850396e-06, | |
| "loss": 0.8797, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.03186746656392757, | |
| "grad_norm": 0.9551546573638916, | |
| "learning_rate": 9.995689576617494e-06, | |
| "loss": 0.8793, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.03192250536628323, | |
| "grad_norm": 0.9210056662559509, | |
| "learning_rate": 9.995671562838325e-06, | |
| "loss": 0.9714, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.03197754416863889, | |
| "grad_norm": 1.063117504119873, | |
| "learning_rate": 9.995653511513029e-06, | |
| "loss": 0.9608, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.03203258297099455, | |
| "grad_norm": 0.9426459670066833, | |
| "learning_rate": 9.995635422641736e-06, | |
| "loss": 0.9102, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.032087621773350214, | |
| "grad_norm": 1.0176693201065063, | |
| "learning_rate": 9.995617296224584e-06, | |
| "loss": 0.9109, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.032142660575705875, | |
| "grad_norm": 0.9457042217254639, | |
| "learning_rate": 9.995599132261711e-06, | |
| "loss": 0.9017, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.03219769937806154, | |
| "grad_norm": 1.5851638317108154, | |
| "learning_rate": 9.995580930753252e-06, | |
| "loss": 0.967, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.03225273818041719, | |
| "grad_norm": 0.9961487054824829, | |
| "learning_rate": 9.995562691699345e-06, | |
| "loss": 0.9396, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.03230777698277285, | |
| "grad_norm": 0.9892112016677856, | |
| "learning_rate": 9.995544415100125e-06, | |
| "loss": 0.9058, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.032362815785128514, | |
| "grad_norm": 0.9052272439002991, | |
| "learning_rate": 9.99552610095573e-06, | |
| "loss": 0.9194, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.032417854587484175, | |
| "grad_norm": 0.8381399512290955, | |
| "learning_rate": 9.995507749266297e-06, | |
| "loss": 0.7465, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.03247289338983984, | |
| "grad_norm": 1.018964171409607, | |
| "learning_rate": 9.995489360031969e-06, | |
| "loss": 0.841, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.0325279321921955, | |
| "grad_norm": 0.908311128616333, | |
| "learning_rate": 9.995470933252876e-06, | |
| "loss": 0.8592, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.03258297099455116, | |
| "grad_norm": 1.2986040115356445, | |
| "learning_rate": 9.995452468929162e-06, | |
| "loss": 0.8341, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.03263800979690682, | |
| "grad_norm": 1.6565190553665161, | |
| "learning_rate": 9.995433967060966e-06, | |
| "loss": 0.8681, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.03269304859926248, | |
| "grad_norm": 0.9725674390792847, | |
| "learning_rate": 9.995415427648423e-06, | |
| "loss": 0.8449, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.032748087401618144, | |
| "grad_norm": 0.8683852553367615, | |
| "learning_rate": 9.995396850691677e-06, | |
| "loss": 0.8478, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.0328031262039738, | |
| "grad_norm": 0.9912856817245483, | |
| "learning_rate": 9.995378236190862e-06, | |
| "loss": 0.8912, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.03285816500632946, | |
| "grad_norm": 0.9396800398826599, | |
| "learning_rate": 9.995359584146125e-06, | |
| "loss": 0.856, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.03291320380868512, | |
| "grad_norm": 1.385006308555603, | |
| "learning_rate": 9.995340894557601e-06, | |
| "loss": 0.9633, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.03296824261104078, | |
| "grad_norm": 0.8982875943183899, | |
| "learning_rate": 9.995322167425433e-06, | |
| "loss": 0.9244, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.033023281413396444, | |
| "grad_norm": 0.8981022834777832, | |
| "learning_rate": 9.995303402749759e-06, | |
| "loss": 0.8854, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.033078320215752105, | |
| "grad_norm": 0.9917197227478027, | |
| "learning_rate": 9.995284600530724e-06, | |
| "loss": 1.0086, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.033133359018107766, | |
| "grad_norm": 1.0540626049041748, | |
| "learning_rate": 9.995265760768464e-06, | |
| "loss": 1.0022, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.03318839782046343, | |
| "grad_norm": 0.9523479342460632, | |
| "learning_rate": 9.995246883463126e-06, | |
| "loss": 0.9893, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.03324343662281909, | |
| "grad_norm": 0.9824770092964172, | |
| "learning_rate": 9.99522796861485e-06, | |
| "loss": 0.8385, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.03329847542517475, | |
| "grad_norm": 1.0968893766403198, | |
| "learning_rate": 9.995209016223776e-06, | |
| "loss": 1.0109, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.03335351422753041, | |
| "grad_norm": 0.9115625023841858, | |
| "learning_rate": 9.995190026290049e-06, | |
| "loss": 0.8656, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.033408553029886066, | |
| "grad_norm": 0.9795814156532288, | |
| "learning_rate": 9.99517099881381e-06, | |
| "loss": 0.8941, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.03346359183224173, | |
| "grad_norm": 0.9317291378974915, | |
| "learning_rate": 9.995151933795204e-06, | |
| "loss": 0.7819, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.03351863063459739, | |
| "grad_norm": 0.9936283230781555, | |
| "learning_rate": 9.995132831234373e-06, | |
| "loss": 0.8674, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.03357366943695305, | |
| "grad_norm": 0.9872812032699585, | |
| "learning_rate": 9.995113691131462e-06, | |
| "loss": 0.9038, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.03362870823930871, | |
| "grad_norm": 0.9516895413398743, | |
| "learning_rate": 9.995094513486611e-06, | |
| "loss": 0.9038, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.03368374704166437, | |
| "grad_norm": 1.090579867362976, | |
| "learning_rate": 9.995075298299968e-06, | |
| "loss": 0.9587, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.033738785844020035, | |
| "grad_norm": 1.021398663520813, | |
| "learning_rate": 9.995056045571677e-06, | |
| "loss": 0.9569, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.033793824646375696, | |
| "grad_norm": 1.009657382965088, | |
| "learning_rate": 9.99503675530188e-06, | |
| "loss": 0.8346, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.03384886344873136, | |
| "grad_norm": 1.0478712320327759, | |
| "learning_rate": 9.995017427490725e-06, | |
| "loss": 1.0566, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.03390390225108702, | |
| "grad_norm": 1.1391830444335938, | |
| "learning_rate": 9.994998062138355e-06, | |
| "loss": 1.0727, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.03395894105344268, | |
| "grad_norm": 1.0172302722930908, | |
| "learning_rate": 9.994978659244918e-06, | |
| "loss": 0.7869, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.034013979855798335, | |
| "grad_norm": 1.0532630681991577, | |
| "learning_rate": 9.994959218810558e-06, | |
| "loss": 0.8626, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.034069018658153996, | |
| "grad_norm": 0.8300478458404541, | |
| "learning_rate": 9.99493974083542e-06, | |
| "loss": 0.8166, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.03412405746050966, | |
| "grad_norm": 1.0613664388656616, | |
| "learning_rate": 9.994920225319656e-06, | |
| "loss": 0.8899, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.03417909626286532, | |
| "grad_norm": 0.9827042818069458, | |
| "learning_rate": 9.994900672263406e-06, | |
| "loss": 0.8243, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.03423413506522098, | |
| "grad_norm": 0.8790082931518555, | |
| "learning_rate": 9.994881081666818e-06, | |
| "loss": 0.8153, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.03428917386757664, | |
| "grad_norm": 1.033378005027771, | |
| "learning_rate": 9.994861453530044e-06, | |
| "loss": 0.8916, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.0343442126699323, | |
| "grad_norm": 0.9547238349914551, | |
| "learning_rate": 9.994841787853227e-06, | |
| "loss": 0.9141, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.034399251472287964, | |
| "grad_norm": 0.9606438279151917, | |
| "learning_rate": 9.994822084636514e-06, | |
| "loss": 0.9435, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.034454290274643626, | |
| "grad_norm": 0.8461503982543945, | |
| "learning_rate": 9.994802343880059e-06, | |
| "loss": 0.7914, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.03450932907699929, | |
| "grad_norm": 1.144538402557373, | |
| "learning_rate": 9.994782565584004e-06, | |
| "loss": 0.8025, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.03456436787935495, | |
| "grad_norm": 1.0099962949752808, | |
| "learning_rate": 9.994762749748502e-06, | |
| "loss": 0.9607, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.0346194066817106, | |
| "grad_norm": 0.9822041988372803, | |
| "learning_rate": 9.9947428963737e-06, | |
| "loss": 0.9216, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.034674445484066264, | |
| "grad_norm": 0.9056866765022278, | |
| "learning_rate": 9.994723005459746e-06, | |
| "loss": 0.7913, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.034729484286421926, | |
| "grad_norm": 1.0099287033081055, | |
| "learning_rate": 9.994703077006792e-06, | |
| "loss": 0.9937, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.03478452308877759, | |
| "grad_norm": 0.9559167623519897, | |
| "learning_rate": 9.994683111014984e-06, | |
| "loss": 0.9774, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.03483956189113325, | |
| "grad_norm": 1.0359059572219849, | |
| "learning_rate": 9.994663107484478e-06, | |
| "loss": 0.9062, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.03489460069348891, | |
| "grad_norm": 0.8803057074546814, | |
| "learning_rate": 9.99464306641542e-06, | |
| "loss": 0.9638, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.03494963949584457, | |
| "grad_norm": 1.0926579236984253, | |
| "learning_rate": 9.994622987807962e-06, | |
| "loss": 1.0467, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.03500467829820023, | |
| "grad_norm": 1.0051401853561401, | |
| "learning_rate": 9.994602871662253e-06, | |
| "loss": 0.8717, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.035059717100555894, | |
| "grad_norm": 1.2007508277893066, | |
| "learning_rate": 9.994582717978448e-06, | |
| "loss": 0.8004, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.035114755902911556, | |
| "grad_norm": 0.8826266527175903, | |
| "learning_rate": 9.994562526756695e-06, | |
| "loss": 0.8888, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.03516979470526721, | |
| "grad_norm": 0.9953717589378357, | |
| "learning_rate": 9.994542297997147e-06, | |
| "loss": 0.8999, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.03522483350762287, | |
| "grad_norm": 1.0203614234924316, | |
| "learning_rate": 9.994522031699958e-06, | |
| "loss": 0.8241, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.03527987230997853, | |
| "grad_norm": 0.8760203719139099, | |
| "learning_rate": 9.994501727865276e-06, | |
| "loss": 0.7893, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.035334911112334194, | |
| "grad_norm": 1.024888277053833, | |
| "learning_rate": 9.994481386493257e-06, | |
| "loss": 0.9865, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.035389949914689856, | |
| "grad_norm": 0.907454788684845, | |
| "learning_rate": 9.994461007584052e-06, | |
| "loss": 0.891, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.03544498871704552, | |
| "grad_norm": 1.0400965213775635, | |
| "learning_rate": 9.994440591137816e-06, | |
| "loss": 0.9345, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.03550002751940118, | |
| "grad_norm": 0.9816616177558899, | |
| "learning_rate": 9.9944201371547e-06, | |
| "loss": 0.91, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.03555506632175684, | |
| "grad_norm": 1.0528117418289185, | |
| "learning_rate": 9.99439964563486e-06, | |
| "loss": 0.952, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.0356101051241125, | |
| "grad_norm": 0.9802080988883972, | |
| "learning_rate": 9.99437911657845e-06, | |
| "loss": 0.9392, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.03566514392646816, | |
| "grad_norm": 0.9580393433570862, | |
| "learning_rate": 9.994358549985623e-06, | |
| "loss": 0.874, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.035720182728823824, | |
| "grad_norm": 0.8935576677322388, | |
| "learning_rate": 9.994337945856533e-06, | |
| "loss": 0.8435, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.03577522153117948, | |
| "grad_norm": 1.009699821472168, | |
| "learning_rate": 9.994317304191337e-06, | |
| "loss": 0.9436, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.03583026033353514, | |
| "grad_norm": 0.9126121401786804, | |
| "learning_rate": 9.994296624990188e-06, | |
| "loss": 0.8424, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.0358852991358908, | |
| "grad_norm": 0.9555553197860718, | |
| "learning_rate": 9.994275908253243e-06, | |
| "loss": 0.93, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.03594033793824646, | |
| "grad_norm": 0.8359857797622681, | |
| "learning_rate": 9.994255153980658e-06, | |
| "loss": 0.6326, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.035995376740602124, | |
| "grad_norm": 0.8918783664703369, | |
| "learning_rate": 9.994234362172587e-06, | |
| "loss": 0.8287, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.036050415542957785, | |
| "grad_norm": 0.9878549575805664, | |
| "learning_rate": 9.994213532829188e-06, | |
| "loss": 0.8841, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.03610545434531345, | |
| "grad_norm": 0.9504040479660034, | |
| "learning_rate": 9.994192665950617e-06, | |
| "loss": 1.0182, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.03616049314766911, | |
| "grad_norm": 0.9531422257423401, | |
| "learning_rate": 9.99417176153703e-06, | |
| "loss": 0.8504, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.03621553195002477, | |
| "grad_norm": 0.9580292105674744, | |
| "learning_rate": 9.994150819588587e-06, | |
| "loss": 0.8048, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.03627057075238043, | |
| "grad_norm": 0.9786819815635681, | |
| "learning_rate": 9.99412984010544e-06, | |
| "loss": 0.9124, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.03632560955473609, | |
| "grad_norm": 0.9733422994613647, | |
| "learning_rate": 9.994108823087751e-06, | |
| "loss": 0.8868, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.03638064835709175, | |
| "grad_norm": 1.093173623085022, | |
| "learning_rate": 9.994087768535679e-06, | |
| "loss": 0.9428, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.03643568715944741, | |
| "grad_norm": 0.9067148566246033, | |
| "learning_rate": 9.994066676449378e-06, | |
| "loss": 0.8838, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.03649072596180307, | |
| "grad_norm": 0.9509521722793579, | |
| "learning_rate": 9.99404554682901e-06, | |
| "loss": 0.9034, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.03654576476415873, | |
| "grad_norm": 0.9523824453353882, | |
| "learning_rate": 9.994024379674731e-06, | |
| "loss": 0.9623, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.03660080356651439, | |
| "grad_norm": 0.987276554107666, | |
| "learning_rate": 9.994003174986703e-06, | |
| "loss": 0.8817, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.036655842368870054, | |
| "grad_norm": 0.9500744342803955, | |
| "learning_rate": 9.993981932765083e-06, | |
| "loss": 0.9742, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.036710881171225715, | |
| "grad_norm": 0.9420705437660217, | |
| "learning_rate": 9.993960653010034e-06, | |
| "loss": 0.9657, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.036765919973581376, | |
| "grad_norm": 0.9443248510360718, | |
| "learning_rate": 9.99393933572171e-06, | |
| "loss": 0.8468, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.03682095877593704, | |
| "grad_norm": 0.9666558504104614, | |
| "learning_rate": 9.993917980900276e-06, | |
| "loss": 0.9871, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.0368759975782927, | |
| "grad_norm": 1.0236201286315918, | |
| "learning_rate": 9.993896588545892e-06, | |
| "loss": 0.9814, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.03693103638064836, | |
| "grad_norm": 1.016190528869629, | |
| "learning_rate": 9.993875158658716e-06, | |
| "loss": 1.0156, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.036986075183004015, | |
| "grad_norm": 0.9296661019325256, | |
| "learning_rate": 9.993853691238913e-06, | |
| "loss": 0.7956, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.037041113985359676, | |
| "grad_norm": 0.9276684522628784, | |
| "learning_rate": 9.993832186286643e-06, | |
| "loss": 0.9253, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.03709615278771534, | |
| "grad_norm": 0.8588787913322449, | |
| "learning_rate": 9.993810643802065e-06, | |
| "loss": 0.7878, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.037151191590071, | |
| "grad_norm": 0.9955212473869324, | |
| "learning_rate": 9.993789063785344e-06, | |
| "loss": 0.8711, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.03720623039242666, | |
| "grad_norm": 0.925578236579895, | |
| "learning_rate": 9.993767446236642e-06, | |
| "loss": 0.9431, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.03726126919478232, | |
| "grad_norm": 0.9610552787780762, | |
| "learning_rate": 9.99374579115612e-06, | |
| "loss": 0.887, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.03731630799713798, | |
| "grad_norm": 1.0052428245544434, | |
| "learning_rate": 9.99372409854394e-06, | |
| "loss": 0.8751, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.037371346799493645, | |
| "grad_norm": 0.9503066539764404, | |
| "learning_rate": 9.99370236840027e-06, | |
| "loss": 0.8556, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.037426385601849306, | |
| "grad_norm": 2.426232099533081, | |
| "learning_rate": 9.993680600725266e-06, | |
| "loss": 0.9077, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.03748142440420497, | |
| "grad_norm": 0.9119723439216614, | |
| "learning_rate": 9.993658795519096e-06, | |
| "loss": 0.8575, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.03753646320656062, | |
| "grad_norm": 0.9688286781311035, | |
| "learning_rate": 9.993636952781923e-06, | |
| "loss": 0.8921, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.03759150200891628, | |
| "grad_norm": 1.030013084411621, | |
| "learning_rate": 9.993615072513913e-06, | |
| "loss": 0.8622, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.037646540811271945, | |
| "grad_norm": 1.055187463760376, | |
| "learning_rate": 9.993593154715228e-06, | |
| "loss": 0.9251, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.037701579613627606, | |
| "grad_norm": 1.0518591403961182, | |
| "learning_rate": 9.993571199386032e-06, | |
| "loss": 0.9575, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.03775661841598327, | |
| "grad_norm": 0.9232666492462158, | |
| "learning_rate": 9.993549206526495e-06, | |
| "loss": 0.8522, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.03781165721833893, | |
| "grad_norm": 1.0212332010269165, | |
| "learning_rate": 9.993527176136775e-06, | |
| "loss": 0.9358, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.03786669602069459, | |
| "grad_norm": 0.9137141108512878, | |
| "learning_rate": 9.993505108217045e-06, | |
| "loss": 0.8561, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.03792173482305025, | |
| "grad_norm": 1.0069375038146973, | |
| "learning_rate": 9.993483002767465e-06, | |
| "loss": 0.8274, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.03797677362540591, | |
| "grad_norm": 0.9820672869682312, | |
| "learning_rate": 9.993460859788204e-06, | |
| "loss": 0.907, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.038031812427761574, | |
| "grad_norm": 1.0042002201080322, | |
| "learning_rate": 9.993438679279428e-06, | |
| "loss": 0.9263, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.038086851230117236, | |
| "grad_norm": 0.9733695983886719, | |
| "learning_rate": 9.993416461241304e-06, | |
| "loss": 0.8455, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.03814189003247289, | |
| "grad_norm": 0.9106015563011169, | |
| "learning_rate": 9.993394205673996e-06, | |
| "loss": 0.8469, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.03819692883482855, | |
| "grad_norm": 0.9802660346031189, | |
| "learning_rate": 9.993371912577677e-06, | |
| "loss": 0.8662, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.03825196763718421, | |
| "grad_norm": 0.9183964729309082, | |
| "learning_rate": 9.99334958195251e-06, | |
| "loss": 0.8968, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.038307006439539874, | |
| "grad_norm": 0.9572185277938843, | |
| "learning_rate": 9.993327213798663e-06, | |
| "loss": 0.953, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.038362045241895536, | |
| "grad_norm": 1.4480071067810059, | |
| "learning_rate": 9.993304808116307e-06, | |
| "loss": 1.1131, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.0384170840442512, | |
| "grad_norm": 0.9297361969947815, | |
| "learning_rate": 9.993282364905607e-06, | |
| "loss": 0.884, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.03847212284660686, | |
| "grad_norm": 0.9400073885917664, | |
| "learning_rate": 9.993259884166735e-06, | |
| "loss": 0.932, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.03852716164896252, | |
| "grad_norm": 0.9231798052787781, | |
| "learning_rate": 9.993237365899858e-06, | |
| "loss": 0.8981, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.03858220045131818, | |
| "grad_norm": 0.8233712911605835, | |
| "learning_rate": 9.993214810105144e-06, | |
| "loss": 0.8218, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.03863723925367384, | |
| "grad_norm": 1.0997854471206665, | |
| "learning_rate": 9.993192216782768e-06, | |
| "loss": 0.9298, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.038692278056029504, | |
| "grad_norm": 0.9570802450180054, | |
| "learning_rate": 9.993169585932893e-06, | |
| "loss": 0.7815, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.03874731685838516, | |
| "grad_norm": 0.9913730025291443, | |
| "learning_rate": 9.993146917555692e-06, | |
| "loss": 0.9621, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.03880235566074082, | |
| "grad_norm": 1.088767409324646, | |
| "learning_rate": 9.993124211651334e-06, | |
| "loss": 0.9295, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.03885739446309648, | |
| "grad_norm": 0.8199124336242676, | |
| "learning_rate": 9.993101468219995e-06, | |
| "loss": 0.7613, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.03891243326545214, | |
| "grad_norm": 1.112566351890564, | |
| "learning_rate": 9.99307868726184e-06, | |
| "loss": 0.791, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.038967472067807804, | |
| "grad_norm": 0.9372578859329224, | |
| "learning_rate": 9.99305586877704e-06, | |
| "loss": 0.8567, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.039022510870163465, | |
| "grad_norm": 1.0167721509933472, | |
| "learning_rate": 9.99303301276577e-06, | |
| "loss": 0.9787, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.03907754967251913, | |
| "grad_norm": 1.3526856899261475, | |
| "learning_rate": 9.993010119228202e-06, | |
| "loss": 1.2215, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.03913258847487479, | |
| "grad_norm": 0.8819016814231873, | |
| "learning_rate": 9.992987188164505e-06, | |
| "loss": 0.7736, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.03918762727723045, | |
| "grad_norm": 1.0033677816390991, | |
| "learning_rate": 9.992964219574852e-06, | |
| "loss": 0.9919, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.03924266607958611, | |
| "grad_norm": 0.894926130771637, | |
| "learning_rate": 9.992941213459417e-06, | |
| "loss": 0.9058, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.03929770488194177, | |
| "grad_norm": 0.9481377005577087, | |
| "learning_rate": 9.992918169818373e-06, | |
| "loss": 0.8436, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.03935274368429743, | |
| "grad_norm": 0.9312933087348938, | |
| "learning_rate": 9.992895088651893e-06, | |
| "loss": 0.8869, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.03940778248665309, | |
| "grad_norm": 0.9765705466270447, | |
| "learning_rate": 9.99287196996015e-06, | |
| "loss": 0.9512, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.03946282128900875, | |
| "grad_norm": 0.9610235691070557, | |
| "learning_rate": 9.992848813743317e-06, | |
| "loss": 0.8005, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.03951786009136441, | |
| "grad_norm": 1.102995753288269, | |
| "learning_rate": 9.99282562000157e-06, | |
| "loss": 0.8017, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.03957289889372007, | |
| "grad_norm": 1.023317575454712, | |
| "learning_rate": 9.99280238873508e-06, | |
| "loss": 0.911, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.039627937696075734, | |
| "grad_norm": 1.0531049966812134, | |
| "learning_rate": 9.992779119944025e-06, | |
| "loss": 0.8562, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.039682976498431395, | |
| "grad_norm": 0.918250322341919, | |
| "learning_rate": 9.992755813628579e-06, | |
| "loss": 0.92, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.039738015300787057, | |
| "grad_norm": 0.8508251309394836, | |
| "learning_rate": 9.992732469788915e-06, | |
| "loss": 0.7347, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.03979305410314272, | |
| "grad_norm": 0.9184926152229309, | |
| "learning_rate": 9.992709088425211e-06, | |
| "loss": 0.8732, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.03984809290549838, | |
| "grad_norm": 1.1613929271697998, | |
| "learning_rate": 9.992685669537643e-06, | |
| "loss": 0.9522, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.039903131707854034, | |
| "grad_norm": 1.091513752937317, | |
| "learning_rate": 9.992662213126386e-06, | |
| "loss": 0.9646, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.039958170510209695, | |
| "grad_norm": 1.057803750038147, | |
| "learning_rate": 9.992638719191615e-06, | |
| "loss": 0.7032, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.040013209312565357, | |
| "grad_norm": 0.8771823644638062, | |
| "learning_rate": 9.992615187733508e-06, | |
| "loss": 0.8577, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.04006824811492102, | |
| "grad_norm": 0.9471028447151184, | |
| "learning_rate": 9.992591618752244e-06, | |
| "loss": 0.9057, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.04012328691727668, | |
| "grad_norm": 0.9547705054283142, | |
| "learning_rate": 9.992568012247995e-06, | |
| "loss": 0.9549, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.04017832571963234, | |
| "grad_norm": 0.8862974047660828, | |
| "learning_rate": 9.992544368220941e-06, | |
| "loss": 0.8593, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.040233364521988, | |
| "grad_norm": 0.906334400177002, | |
| "learning_rate": 9.992520686671261e-06, | |
| "loss": 0.8832, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.04028840332434366, | |
| "grad_norm": 1.07270085811615, | |
| "learning_rate": 9.992496967599133e-06, | |
| "loss": 0.9409, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.040343442126699325, | |
| "grad_norm": 0.9026005268096924, | |
| "learning_rate": 9.992473211004734e-06, | |
| "loss": 0.8326, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.040398480929054986, | |
| "grad_norm": 0.9762942790985107, | |
| "learning_rate": 9.992449416888241e-06, | |
| "loss": 0.9048, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.04045351973141065, | |
| "grad_norm": 0.9658033847808838, | |
| "learning_rate": 9.992425585249837e-06, | |
| "loss": 0.9219, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.0405085585337663, | |
| "grad_norm": 0.8909044861793518, | |
| "learning_rate": 9.992401716089698e-06, | |
| "loss": 0.8564, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.04056359733612196, | |
| "grad_norm": 1.0387929677963257, | |
| "learning_rate": 9.992377809408001e-06, | |
| "loss": 0.9533, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.040618636138477625, | |
| "grad_norm": 0.9044275879859924, | |
| "learning_rate": 9.99235386520493e-06, | |
| "loss": 0.8508, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.040673674940833286, | |
| "grad_norm": 1.019377589225769, | |
| "learning_rate": 9.992329883480667e-06, | |
| "loss": 0.8684, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.04072871374318895, | |
| "grad_norm": 0.9394627213478088, | |
| "learning_rate": 9.992305864235385e-06, | |
| "loss": 0.7665, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.04078375254554461, | |
| "grad_norm": 0.8652323484420776, | |
| "learning_rate": 9.99228180746927e-06, | |
| "loss": 0.8576, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.04083879134790027, | |
| "grad_norm": 0.9347619414329529, | |
| "learning_rate": 9.992257713182502e-06, | |
| "loss": 0.9586, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.04089383015025593, | |
| "grad_norm": 0.9510203003883362, | |
| "learning_rate": 9.99223358137526e-06, | |
| "loss": 0.9092, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.04094886895261159, | |
| "grad_norm": 0.8242866396903992, | |
| "learning_rate": 9.992209412047729e-06, | |
| "loss": 0.6997, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.041003907754967255, | |
| "grad_norm": 0.8842730522155762, | |
| "learning_rate": 9.992185205200087e-06, | |
| "loss": 0.8873, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.041058946557322916, | |
| "grad_norm": 1.0813730955123901, | |
| "learning_rate": 9.992160960832518e-06, | |
| "loss": 1.0162, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.04111398535967857, | |
| "grad_norm": 1.1276283264160156, | |
| "learning_rate": 9.9921366789452e-06, | |
| "loss": 1.0004, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.04116902416203423, | |
| "grad_norm": 0.8810326457023621, | |
| "learning_rate": 9.992112359538323e-06, | |
| "loss": 0.7823, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.04122406296438989, | |
| "grad_norm": 0.9939407110214233, | |
| "learning_rate": 9.992088002612066e-06, | |
| "loss": 1.0016, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.041279101766745555, | |
| "grad_norm": 1.0963523387908936, | |
| "learning_rate": 9.99206360816661e-06, | |
| "loss": 0.9252, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.041334140569101216, | |
| "grad_norm": 1.1346478462219238, | |
| "learning_rate": 9.99203917620214e-06, | |
| "loss": 0.9608, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.04138917937145688, | |
| "grad_norm": 1.0108580589294434, | |
| "learning_rate": 9.992014706718841e-06, | |
| "loss": 0.9179, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.04144421817381254, | |
| "grad_norm": 0.897293210029602, | |
| "learning_rate": 9.991990199716894e-06, | |
| "loss": 0.9295, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.0414992569761682, | |
| "grad_norm": 1.0152363777160645, | |
| "learning_rate": 9.991965655196488e-06, | |
| "loss": 0.8467, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.04155429577852386, | |
| "grad_norm": 0.8655388355255127, | |
| "learning_rate": 9.9919410731578e-06, | |
| "loss": 0.796, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.04160933458087952, | |
| "grad_norm": 1.0140331983566284, | |
| "learning_rate": 9.991916453601023e-06, | |
| "loss": 0.8444, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.041664373383235184, | |
| "grad_norm": 0.9387341141700745, | |
| "learning_rate": 9.991891796526338e-06, | |
| "loss": 0.8669, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.04171941218559084, | |
| "grad_norm": 0.9395696520805359, | |
| "learning_rate": 9.991867101933928e-06, | |
| "loss": 0.8376, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.0417744509879465, | |
| "grad_norm": 1.0856634378433228, | |
| "learning_rate": 9.991842369823983e-06, | |
| "loss": 0.9271, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.04182948979030216, | |
| "grad_norm": 0.8777190446853638, | |
| "learning_rate": 9.991817600196687e-06, | |
| "loss": 0.9197, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.04188452859265782, | |
| "grad_norm": 0.9639917016029358, | |
| "learning_rate": 9.991792793052225e-06, | |
| "loss": 0.8835, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.041939567395013484, | |
| "grad_norm": 0.9384773969650269, | |
| "learning_rate": 9.991767948390785e-06, | |
| "loss": 0.8403, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.041994606197369146, | |
| "grad_norm": 0.8987650275230408, | |
| "learning_rate": 9.991743066212554e-06, | |
| "loss": 0.7948, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.04204964499972481, | |
| "grad_norm": 1.0545049905776978, | |
| "learning_rate": 9.991718146517717e-06, | |
| "loss": 0.9359, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.04210468380208047, | |
| "grad_norm": 0.9840022325515747, | |
| "learning_rate": 9.991693189306463e-06, | |
| "loss": 0.9188, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.04215972260443613, | |
| "grad_norm": 0.8769927620887756, | |
| "learning_rate": 9.991668194578981e-06, | |
| "loss": 0.8647, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.04221476140679179, | |
| "grad_norm": 0.9268791675567627, | |
| "learning_rate": 9.991643162335455e-06, | |
| "loss": 0.897, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.042269800209147446, | |
| "grad_norm": 0.9316747784614563, | |
| "learning_rate": 9.991618092576075e-06, | |
| "loss": 0.9341, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.04232483901150311, | |
| "grad_norm": 0.8348364233970642, | |
| "learning_rate": 9.991592985301031e-06, | |
| "loss": 0.7528, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.04237987781385877, | |
| "grad_norm": 0.9139068126678467, | |
| "learning_rate": 9.99156784051051e-06, | |
| "loss": 0.8596, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.04243491661621443, | |
| "grad_norm": 0.9403928518295288, | |
| "learning_rate": 9.991542658204701e-06, | |
| "loss": 0.974, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.04248995541857009, | |
| "grad_norm": 0.993549108505249, | |
| "learning_rate": 9.991517438383793e-06, | |
| "loss": 0.9479, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.04254499422092575, | |
| "grad_norm": 0.8494916558265686, | |
| "learning_rate": 9.991492181047975e-06, | |
| "loss": 0.9149, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.042600033023281414, | |
| "grad_norm": 1.0351910591125488, | |
| "learning_rate": 9.991466886197441e-06, | |
| "loss": 0.9552, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.042655071825637075, | |
| "grad_norm": 0.916829526424408, | |
| "learning_rate": 9.991441553832375e-06, | |
| "loss": 0.8781, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.04271011062799274, | |
| "grad_norm": 1.113476276397705, | |
| "learning_rate": 9.991416183952972e-06, | |
| "loss": 0.8137, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.0427651494303484, | |
| "grad_norm": 1.1608171463012695, | |
| "learning_rate": 9.991390776559421e-06, | |
| "loss": 1.0045, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.04282018823270406, | |
| "grad_norm": 1.0045493841171265, | |
| "learning_rate": 9.991365331651913e-06, | |
| "loss": 0.8813, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.042875227035059714, | |
| "grad_norm": 0.918820858001709, | |
| "learning_rate": 9.991339849230639e-06, | |
| "loss": 0.9198, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.042930265837415375, | |
| "grad_norm": 0.9875735640525818, | |
| "learning_rate": 9.991314329295792e-06, | |
| "loss": 0.8665, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.04298530463977104, | |
| "grad_norm": 0.873768150806427, | |
| "learning_rate": 9.991288771847561e-06, | |
| "loss": 0.8606, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.0430403434421267, | |
| "grad_norm": 0.8892746567726135, | |
| "learning_rate": 9.991263176886139e-06, | |
| "loss": 0.9011, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.04309538224448236, | |
| "grad_norm": 1.097734808921814, | |
| "learning_rate": 9.99123754441172e-06, | |
| "loss": 1.009, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.04315042104683802, | |
| "grad_norm": 1.0065964460372925, | |
| "learning_rate": 9.991211874424497e-06, | |
| "loss": 0.9492, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.04320545984919368, | |
| "grad_norm": 1.0791678428649902, | |
| "learning_rate": 9.99118616692466e-06, | |
| "loss": 1.0142, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.043260498651549344, | |
| "grad_norm": 0.9454777836799622, | |
| "learning_rate": 9.991160421912404e-06, | |
| "loss": 0.8058, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.043315537453905005, | |
| "grad_norm": 0.9448156952857971, | |
| "learning_rate": 9.991134639387922e-06, | |
| "loss": 0.8184, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.043370576256260666, | |
| "grad_norm": 0.9636550545692444, | |
| "learning_rate": 9.99110881935141e-06, | |
| "loss": 0.8606, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.04342561505861633, | |
| "grad_norm": 0.9933613538742065, | |
| "learning_rate": 9.991082961803058e-06, | |
| "loss": 0.9449, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.04348065386097198, | |
| "grad_norm": 0.8906797170639038, | |
| "learning_rate": 9.991057066743065e-06, | |
| "loss": 0.8053, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.043535692663327644, | |
| "grad_norm": 1.0393906831741333, | |
| "learning_rate": 9.991031134171621e-06, | |
| "loss": 0.8487, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.043590731465683305, | |
| "grad_norm": 1.0618231296539307, | |
| "learning_rate": 9.991005164088923e-06, | |
| "loss": 0.9847, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.043645770268038966, | |
| "grad_norm": 0.9525149464607239, | |
| "learning_rate": 9.990979156495167e-06, | |
| "loss": 0.9318, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.04370080907039463, | |
| "grad_norm": 0.9430851936340332, | |
| "learning_rate": 9.990953111390546e-06, | |
| "loss": 0.8483, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.04375584787275029, | |
| "grad_norm": 0.9259672164916992, | |
| "learning_rate": 9.99092702877526e-06, | |
| "loss": 0.9365, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.04381088667510595, | |
| "grad_norm": 0.942609965801239, | |
| "learning_rate": 9.9909009086495e-06, | |
| "loss": 0.8408, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.04386592547746161, | |
| "grad_norm": 0.939255952835083, | |
| "learning_rate": 9.990874751013467e-06, | |
| "loss": 0.8749, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.04392096427981727, | |
| "grad_norm": 1.1701711416244507, | |
| "learning_rate": 9.990848555867353e-06, | |
| "loss": 0.9312, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.043976003082172935, | |
| "grad_norm": 1.0441124439239502, | |
| "learning_rate": 9.990822323211358e-06, | |
| "loss": 0.8618, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.04403104188452859, | |
| "grad_norm": 0.9601489305496216, | |
| "learning_rate": 9.990796053045679e-06, | |
| "loss": 0.9569, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.04408608068688425, | |
| "grad_norm": 0.9394032955169678, | |
| "learning_rate": 9.990769745370513e-06, | |
| "loss": 0.846, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.04414111948923991, | |
| "grad_norm": 0.9631348252296448, | |
| "learning_rate": 9.990743400186056e-06, | |
| "loss": 0.8754, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.04419615829159557, | |
| "grad_norm": 0.9234963059425354, | |
| "learning_rate": 9.990717017492508e-06, | |
| "loss": 0.8613, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.044251197093951235, | |
| "grad_norm": 0.9169090390205383, | |
| "learning_rate": 9.990690597290069e-06, | |
| "loss": 0.8867, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.044306235896306896, | |
| "grad_norm": 1.0194867849349976, | |
| "learning_rate": 9.990664139578933e-06, | |
| "loss": 0.8675, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.04436127469866256, | |
| "grad_norm": 1.3226114511489868, | |
| "learning_rate": 9.990637644359302e-06, | |
| "loss": 0.997, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.04441631350101822, | |
| "grad_norm": 0.8904317617416382, | |
| "learning_rate": 9.990611111631374e-06, | |
| "loss": 0.7274, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.04447135230337388, | |
| "grad_norm": 0.8909007906913757, | |
| "learning_rate": 9.99058454139535e-06, | |
| "loss": 0.8141, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.04452639110572954, | |
| "grad_norm": 1.004015564918518, | |
| "learning_rate": 9.990557933651429e-06, | |
| "loss": 0.9883, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.0445814299080852, | |
| "grad_norm": 1.1215732097625732, | |
| "learning_rate": 9.990531288399807e-06, | |
| "loss": 0.9355, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.04463646871044086, | |
| "grad_norm": 1.0545012950897217, | |
| "learning_rate": 9.99050460564069e-06, | |
| "loss": 0.9532, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.04469150751279652, | |
| "grad_norm": 0.9608867168426514, | |
| "learning_rate": 9.990477885374277e-06, | |
| "loss": 0.9363, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.04474654631515218, | |
| "grad_norm": 0.8750461935997009, | |
| "learning_rate": 9.990451127600766e-06, | |
| "loss": 0.7343, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.04480158511750784, | |
| "grad_norm": 0.891740620136261, | |
| "learning_rate": 9.99042433232036e-06, | |
| "loss": 0.8541, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.0448566239198635, | |
| "grad_norm": 1.1520029306411743, | |
| "learning_rate": 9.990397499533264e-06, | |
| "loss": 0.7696, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.044911662722219164, | |
| "grad_norm": 0.9526278972625732, | |
| "learning_rate": 9.990370629239673e-06, | |
| "loss": 0.8953, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.044966701524574826, | |
| "grad_norm": 0.9218434691429138, | |
| "learning_rate": 9.990343721439795e-06, | |
| "loss": 0.8198, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.04502174032693049, | |
| "grad_norm": 0.8502745628356934, | |
| "learning_rate": 9.990316776133827e-06, | |
| "loss": 0.8035, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.04507677912928615, | |
| "grad_norm": 0.8861565589904785, | |
| "learning_rate": 9.990289793321975e-06, | |
| "loss": 0.8626, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.04513181793164181, | |
| "grad_norm": 1.1113256216049194, | |
| "learning_rate": 9.99026277300444e-06, | |
| "loss": 0.9363, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.04518685673399747, | |
| "grad_norm": 0.9984708428382874, | |
| "learning_rate": 9.990235715181426e-06, | |
| "loss": 1.0376, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.045241895536353126, | |
| "grad_norm": 0.9026711583137512, | |
| "learning_rate": 9.990208619853137e-06, | |
| "loss": 0.9079, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.04529693433870879, | |
| "grad_norm": 0.8724965453147888, | |
| "learning_rate": 9.990181487019775e-06, | |
| "loss": 0.8665, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.04535197314106445, | |
| "grad_norm": 0.8923047780990601, | |
| "learning_rate": 9.990154316681543e-06, | |
| "loss": 0.7779, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.04540701194342011, | |
| "grad_norm": 0.9024640321731567, | |
| "learning_rate": 9.99012710883865e-06, | |
| "loss": 0.8859, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.04546205074577577, | |
| "grad_norm": 0.9245888590812683, | |
| "learning_rate": 9.990099863491296e-06, | |
| "loss": 0.8501, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.04551708954813143, | |
| "grad_norm": 0.9257050156593323, | |
| "learning_rate": 9.990072580639687e-06, | |
| "loss": 0.9561, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.045572128350487094, | |
| "grad_norm": 0.995610773563385, | |
| "learning_rate": 9.99004526028403e-06, | |
| "loss": 0.917, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.045627167152842756, | |
| "grad_norm": 0.9524009823799133, | |
| "learning_rate": 9.990017902424525e-06, | |
| "loss": 0.9184, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.04568220595519842, | |
| "grad_norm": 0.9264503121376038, | |
| "learning_rate": 9.989990507061385e-06, | |
| "loss": 0.8615, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.04573724475755408, | |
| "grad_norm": 1.0068570375442505, | |
| "learning_rate": 9.989963074194809e-06, | |
| "loss": 0.8331, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.04579228355990974, | |
| "grad_norm": 0.9295952320098877, | |
| "learning_rate": 9.989935603825009e-06, | |
| "loss": 0.8387, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.045847322362265394, | |
| "grad_norm": 1.0408827066421509, | |
| "learning_rate": 9.989908095952186e-06, | |
| "loss": 0.9686, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.045902361164621056, | |
| "grad_norm": 0.8874136209487915, | |
| "learning_rate": 9.989880550576551e-06, | |
| "loss": 0.815, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.04595739996697672, | |
| "grad_norm": 0.9898836016654968, | |
| "learning_rate": 9.989852967698311e-06, | |
| "loss": 0.9458, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.04601243876933238, | |
| "grad_norm": 0.9828970432281494, | |
| "learning_rate": 9.989825347317668e-06, | |
| "loss": 0.7922, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.04606747757168804, | |
| "grad_norm": 1.025447964668274, | |
| "learning_rate": 9.989797689434836e-06, | |
| "loss": 0.9349, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.0461225163740437, | |
| "grad_norm": 0.8623831272125244, | |
| "learning_rate": 9.98976999405002e-06, | |
| "loss": 0.8786, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.04617755517639936, | |
| "grad_norm": 0.9614997506141663, | |
| "learning_rate": 9.98974226116343e-06, | |
| "loss": 0.7885, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.046232593978755024, | |
| "grad_norm": 1.0207616090774536, | |
| "learning_rate": 9.989714490775269e-06, | |
| "loss": 0.9786, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.046287632781110685, | |
| "grad_norm": 0.8509595990180969, | |
| "learning_rate": 9.98968668288575e-06, | |
| "loss": 0.7312, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.04634267158346635, | |
| "grad_norm": 0.9822607040405273, | |
| "learning_rate": 9.989658837495084e-06, | |
| "loss": 0.952, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.046397710385822, | |
| "grad_norm": 1.0058252811431885, | |
| "learning_rate": 9.989630954603477e-06, | |
| "loss": 0.8811, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.04645274918817766, | |
| "grad_norm": 1.0146985054016113, | |
| "learning_rate": 9.989603034211139e-06, | |
| "loss": 0.9051, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.046507787990533324, | |
| "grad_norm": 0.8976503610610962, | |
| "learning_rate": 9.98957507631828e-06, | |
| "loss": 0.879, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.046562826792888985, | |
| "grad_norm": 0.8791939616203308, | |
| "learning_rate": 9.989547080925111e-06, | |
| "loss": 0.8944, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.04661786559524465, | |
| "grad_norm": 0.8530884981155396, | |
| "learning_rate": 9.989519048031842e-06, | |
| "loss": 0.9029, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.04667290439760031, | |
| "grad_norm": 0.9621617197990417, | |
| "learning_rate": 9.989490977638683e-06, | |
| "loss": 0.8374, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.04672794319995597, | |
| "grad_norm": 0.9629075527191162, | |
| "learning_rate": 9.989462869745845e-06, | |
| "loss": 0.9032, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.04678298200231163, | |
| "grad_norm": 1.3256126642227173, | |
| "learning_rate": 9.989434724353541e-06, | |
| "loss": 0.9748, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.04683802080466729, | |
| "grad_norm": 1.0230494737625122, | |
| "learning_rate": 9.989406541461979e-06, | |
| "loss": 0.9752, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.046893059607022954, | |
| "grad_norm": 0.8454533219337463, | |
| "learning_rate": 9.989378321071375e-06, | |
| "loss": 0.8426, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.046948098409378615, | |
| "grad_norm": 0.9995863437652588, | |
| "learning_rate": 9.989350063181939e-06, | |
| "loss": 0.9955, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.04700313721173427, | |
| "grad_norm": 0.8956604599952698, | |
| "learning_rate": 9.989321767793883e-06, | |
| "loss": 0.9024, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.04705817601408993, | |
| "grad_norm": 1.0123292207717896, | |
| "learning_rate": 9.989293434907419e-06, | |
| "loss": 0.7856, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.04711321481644559, | |
| "grad_norm": 0.814577043056488, | |
| "learning_rate": 9.989265064522762e-06, | |
| "loss": 0.8377, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.047168253618801254, | |
| "grad_norm": 1.1571552753448486, | |
| "learning_rate": 9.989236656640125e-06, | |
| "loss": 0.8562, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.047223292421156915, | |
| "grad_norm": 0.9681577682495117, | |
| "learning_rate": 9.98920821125972e-06, | |
| "loss": 0.8473, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.047278331223512576, | |
| "grad_norm": 0.9680121541023254, | |
| "learning_rate": 9.989179728381761e-06, | |
| "loss": 0.9811, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.04733337002586824, | |
| "grad_norm": 0.985477089881897, | |
| "learning_rate": 9.989151208006464e-06, | |
| "loss": 0.6994, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.0473884088282239, | |
| "grad_norm": 0.8612962365150452, | |
| "learning_rate": 9.98912265013404e-06, | |
| "loss": 0.7667, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.04744344763057956, | |
| "grad_norm": 0.8884604573249817, | |
| "learning_rate": 9.989094054764708e-06, | |
| "loss": 0.8382, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.04749848643293522, | |
| "grad_norm": 1.036881923675537, | |
| "learning_rate": 9.989065421898681e-06, | |
| "loss": 0.8748, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.04755352523529088, | |
| "grad_norm": 0.9954493045806885, | |
| "learning_rate": 9.989036751536171e-06, | |
| "loss": 0.9174, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.04760856403764654, | |
| "grad_norm": 0.9984694123268127, | |
| "learning_rate": 9.989008043677399e-06, | |
| "loss": 0.7636, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.0476636028400022, | |
| "grad_norm": 1.0412588119506836, | |
| "learning_rate": 9.988979298322576e-06, | |
| "loss": 0.773, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.04771864164235786, | |
| "grad_norm": 0.8034874796867371, | |
| "learning_rate": 9.98895051547192e-06, | |
| "loss": 0.7914, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.04777368044471352, | |
| "grad_norm": 0.8983979225158691, | |
| "learning_rate": 9.988921695125648e-06, | |
| "loss": 0.7292, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.04782871924706918, | |
| "grad_norm": 0.9445077776908875, | |
| "learning_rate": 9.988892837283976e-06, | |
| "loss": 0.8263, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.047883758049424845, | |
| "grad_norm": 1.0753306150436401, | |
| "learning_rate": 9.988863941947121e-06, | |
| "loss": 1.1122, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.047938796851780506, | |
| "grad_norm": 1.0091484785079956, | |
| "learning_rate": 9.9888350091153e-06, | |
| "loss": 0.9276, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.04799383565413617, | |
| "grad_norm": 1.0977306365966797, | |
| "learning_rate": 9.988806038788732e-06, | |
| "loss": 0.854, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.04804887445649183, | |
| "grad_norm": 1.0285007953643799, | |
| "learning_rate": 9.988777030967632e-06, | |
| "loss": 0.9441, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.04810391325884749, | |
| "grad_norm": 0.8973976373672485, | |
| "learning_rate": 9.988747985652218e-06, | |
| "loss": 0.786, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.04815895206120315, | |
| "grad_norm": 0.9809553623199463, | |
| "learning_rate": 9.98871890284271e-06, | |
| "loss": 0.9042, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.048213990863558806, | |
| "grad_norm": 0.8514279723167419, | |
| "learning_rate": 9.988689782539326e-06, | |
| "loss": 0.7874, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.04826902966591447, | |
| "grad_norm": 0.8299674391746521, | |
| "learning_rate": 9.988660624742286e-06, | |
| "loss": 0.8704, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.04832406846827013, | |
| "grad_norm": 0.9862462282180786, | |
| "learning_rate": 9.988631429451809e-06, | |
| "loss": 0.9963, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.04837910727062579, | |
| "grad_norm": 0.9041131734848022, | |
| "learning_rate": 9.988602196668111e-06, | |
| "loss": 0.9207, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.04843414607298145, | |
| "grad_norm": 0.8597276210784912, | |
| "learning_rate": 9.988572926391416e-06, | |
| "loss": 0.8226, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.04848918487533711, | |
| "grad_norm": 0.9494329690933228, | |
| "learning_rate": 9.988543618621941e-06, | |
| "loss": 0.8834, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.048544223677692774, | |
| "grad_norm": 0.9129118323326111, | |
| "learning_rate": 9.98851427335991e-06, | |
| "loss": 0.7819, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.048599262480048436, | |
| "grad_norm": 0.9145999550819397, | |
| "learning_rate": 9.988484890605539e-06, | |
| "loss": 0.885, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.0486543012824041, | |
| "grad_norm": 1.0115307569503784, | |
| "learning_rate": 9.98845547035905e-06, | |
| "loss": 0.8347, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.04870934008475976, | |
| "grad_norm": 1.1372706890106201, | |
| "learning_rate": 9.988426012620667e-06, | |
| "loss": 0.944, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.04876437888711541, | |
| "grad_norm": 0.9502811431884766, | |
| "learning_rate": 9.98839651739061e-06, | |
| "loss": 0.9054, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.048819417689471074, | |
| "grad_norm": 0.9612823128700256, | |
| "learning_rate": 9.988366984669097e-06, | |
| "loss": 0.8796, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.048874456491826736, | |
| "grad_norm": 0.9551461935043335, | |
| "learning_rate": 9.988337414456355e-06, | |
| "loss": 0.8769, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.0489294952941824, | |
| "grad_norm": 0.8554086089134216, | |
| "learning_rate": 9.988307806752603e-06, | |
| "loss": 0.892, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.04898453409653806, | |
| "grad_norm": 0.8418886661529541, | |
| "learning_rate": 9.988278161558067e-06, | |
| "loss": 0.7568, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.04903957289889372, | |
| "grad_norm": 1.4780360460281372, | |
| "learning_rate": 9.988248478872967e-06, | |
| "loss": 0.9126, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.04909461170124938, | |
| "grad_norm": 0.8236714005470276, | |
| "learning_rate": 9.988218758697526e-06, | |
| "loss": 0.7317, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.04914965050360504, | |
| "grad_norm": 0.8777141571044922, | |
| "learning_rate": 9.988189001031968e-06, | |
| "loss": 0.7989, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.049204689305960704, | |
| "grad_norm": 1.0235031843185425, | |
| "learning_rate": 9.988159205876516e-06, | |
| "loss": 0.8335, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.049259728108316365, | |
| "grad_norm": 0.9340357184410095, | |
| "learning_rate": 9.988129373231395e-06, | |
| "loss": 0.8129, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.04931476691067203, | |
| "grad_norm": 1.7686667442321777, | |
| "learning_rate": 9.98809950309683e-06, | |
| "loss": 0.9792, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.04936980571302768, | |
| "grad_norm": 0.9252369403839111, | |
| "learning_rate": 9.988069595473044e-06, | |
| "loss": 0.8671, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.04942484451538334, | |
| "grad_norm": 0.9989960789680481, | |
| "learning_rate": 9.988039650360262e-06, | |
| "loss": 0.9245, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.049479883317739004, | |
| "grad_norm": 1.062912106513977, | |
| "learning_rate": 9.98800966775871e-06, | |
| "loss": 0.9146, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.049534922120094665, | |
| "grad_norm": 0.8698169589042664, | |
| "learning_rate": 9.98797964766861e-06, | |
| "loss": 0.8606, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.04958996092245033, | |
| "grad_norm": 1.6754224300384521, | |
| "learning_rate": 9.98794959009019e-06, | |
| "loss": 0.9236, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.04964499972480599, | |
| "grad_norm": 1.084174394607544, | |
| "learning_rate": 9.98791949502368e-06, | |
| "loss": 0.9252, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.04970003852716165, | |
| "grad_norm": 0.9866724610328674, | |
| "learning_rate": 9.987889362469301e-06, | |
| "loss": 0.9096, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.04975507732951731, | |
| "grad_norm": 0.8814040422439575, | |
| "learning_rate": 9.987859192427279e-06, | |
| "loss": 0.8475, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.04981011613187297, | |
| "grad_norm": 0.8796457052230835, | |
| "learning_rate": 9.987828984897843e-06, | |
| "loss": 0.8478, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.049865154934228634, | |
| "grad_norm": 1.0541884899139404, | |
| "learning_rate": 9.98779873988122e-06, | |
| "loss": 0.9799, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.049920193736584295, | |
| "grad_norm": 0.91409832239151, | |
| "learning_rate": 9.987768457377636e-06, | |
| "loss": 0.8701, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.04997523253893995, | |
| "grad_norm": 1.0120370388031006, | |
| "learning_rate": 9.98773813738732e-06, | |
| "loss": 0.8417, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.05003027134129561, | |
| "grad_norm": 1.7744206190109253, | |
| "learning_rate": 9.987707779910499e-06, | |
| "loss": 0.9263, | |
| "step": 909 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 36338, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 909, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.682514714121994e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |