| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9671179883945842, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00048355899419729207, |
| "grad_norm": 1.1121258735656738, |
| "learning_rate": 0.0, |
| "loss": 0.8143, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009671179883945841, |
| "grad_norm": 1.0534573793411255, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 0.7758, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0014506769825918763, |
| "grad_norm": 1.1191091537475586, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 0.7958, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0019342359767891683, |
| "grad_norm": 1.0339659452438354, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 0.74, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0024177949709864605, |
| "grad_norm": 1.097814917564392, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 0.7844, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0029013539651837525, |
| "grad_norm": 1.0650779008865356, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 0.7331, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0033849129593810446, |
| "grad_norm": 1.609460473060608, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 0.7787, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0038684719535783366, |
| "grad_norm": 1.2944858074188232, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 0.7915, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004352030947775629, |
| "grad_norm": 1.1526292562484741, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 0.803, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.004835589941972921, |
| "grad_norm": 1.0605498552322388, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 0.7525, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005319148936170213, |
| "grad_norm": 1.080625057220459, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 0.7566, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.005802707930367505, |
| "grad_norm": 1.035576581954956, |
| "learning_rate": 5.5e-07, |
| "loss": 0.7419, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.006286266924564797, |
| "grad_norm": 1.111180305480957, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 0.795, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.006769825918762089, |
| "grad_norm": 1.0369406938552856, |
| "learning_rate": 6.5e-07, |
| "loss": 0.7713, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.007253384912959381, |
| "grad_norm": 1.0771433115005493, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 0.7698, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.007736943907156673, |
| "grad_norm": 1.0160727500915527, |
| "learning_rate": 7.5e-07, |
| "loss": 0.78, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.008220502901353965, |
| "grad_norm": 0.992279052734375, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 0.7704, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.008704061895551257, |
| "grad_norm": 1.0474507808685303, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 0.7584, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.00918762088974855, |
| "grad_norm": 1.024119257926941, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 0.7809, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.009671179883945842, |
| "grad_norm": 0.9913584589958191, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 0.7973, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.010154738878143133, |
| "grad_norm": 0.9856535196304321, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.7498, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.010638297872340425, |
| "grad_norm": 1.2484019994735718, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 0.7672, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.011121856866537718, |
| "grad_norm": 0.8720110058784485, |
| "learning_rate": 1.1e-06, |
| "loss": 0.7411, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01160541586073501, |
| "grad_norm": 1.005557894706726, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 0.7659, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.012088974854932301, |
| "grad_norm": 0.8640859723091125, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 0.7328, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.012572533849129593, |
| "grad_norm": 0.8791433572769165, |
| "learning_rate": 1.25e-06, |
| "loss": 0.7549, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.013056092843326886, |
| "grad_norm": 0.8279618620872498, |
| "learning_rate": 1.3e-06, |
| "loss": 0.7252, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.013539651837524178, |
| "grad_norm": 0.8029568195343018, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 0.7296, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.01402321083172147, |
| "grad_norm": 0.8325296640396118, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 0.7522, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.014506769825918761, |
| "grad_norm": 0.9094786643981934, |
| "learning_rate": 1.45e-06, |
| "loss": 0.7436, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.014990328820116054, |
| "grad_norm": 0.7980680465698242, |
| "learning_rate": 1.5e-06, |
| "loss": 0.7018, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.015473887814313346, |
| "grad_norm": 0.7666404247283936, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 0.7499, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.015957446808510637, |
| "grad_norm": 0.7892335653305054, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.7287, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.01644100580270793, |
| "grad_norm": 0.7180586457252502, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 0.7548, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.016924564796905222, |
| "grad_norm": 0.6945351958274841, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 0.7032, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.017408123791102514, |
| "grad_norm": 0.752153754234314, |
| "learning_rate": 1.75e-06, |
| "loss": 0.7249, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.017891682785299807, |
| "grad_norm": 0.6585816144943237, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.7217, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0183752417794971, |
| "grad_norm": 0.6001906991004944, |
| "learning_rate": 1.85e-06, |
| "loss": 0.7099, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.018858800773694392, |
| "grad_norm": 0.5837531089782715, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 0.678, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.019342359767891684, |
| "grad_norm": 0.5784347057342529, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 0.7126, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.019825918762088973, |
| "grad_norm": 0.574742317199707, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.702, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.020309477756286266, |
| "grad_norm": 0.6225464940071106, |
| "learning_rate": 2.05e-06, |
| "loss": 0.6716, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.020793036750483558, |
| "grad_norm": 0.5501518845558167, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.7047, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.02127659574468085, |
| "grad_norm": 0.5282008051872253, |
| "learning_rate": 2.15e-06, |
| "loss": 0.6777, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.021760154738878143, |
| "grad_norm": 0.5413048267364502, |
| "learning_rate": 2.2e-06, |
| "loss": 0.7094, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.022243713733075435, |
| "grad_norm": 0.511887788772583, |
| "learning_rate": 2.25e-06, |
| "loss": 0.6785, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.022727272727272728, |
| "grad_norm": 0.544187068939209, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 0.6744, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02321083172147002, |
| "grad_norm": 0.5064325928688049, |
| "learning_rate": 2.35e-06, |
| "loss": 0.6593, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.023694390715667313, |
| "grad_norm": 0.5299221873283386, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.645, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.024177949709864602, |
| "grad_norm": 0.5178957581520081, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 0.6697, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.024661508704061894, |
| "grad_norm": 0.5200456976890564, |
| "learning_rate": 2.5e-06, |
| "loss": 0.6692, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.025145067698259187, |
| "grad_norm": 0.4843021333217621, |
| "learning_rate": 2.55e-06, |
| "loss": 0.6505, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.02562862669245648, |
| "grad_norm": 0.4847952127456665, |
| "learning_rate": 2.6e-06, |
| "loss": 0.6427, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02611218568665377, |
| "grad_norm": 0.49867674708366394, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 0.677, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.026595744680851064, |
| "grad_norm": 0.5223937630653381, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 0.6494, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.027079303675048357, |
| "grad_norm": 0.4924434721469879, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 0.642, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.02756286266924565, |
| "grad_norm": 0.5522122383117676, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.6709, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.02804642166344294, |
| "grad_norm": 0.49998044967651367, |
| "learning_rate": 2.85e-06, |
| "loss": 0.6633, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02852998065764023, |
| "grad_norm": 0.6035799980163574, |
| "learning_rate": 2.9e-06, |
| "loss": 0.6305, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.029013539651837523, |
| "grad_norm": 0.7769137620925903, |
| "learning_rate": 2.95e-06, |
| "loss": 0.6307, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.029497098646034815, |
| "grad_norm": 0.4490588307380676, |
| "learning_rate": 3e-06, |
| "loss": 0.6334, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.029980657640232108, |
| "grad_norm": 0.42361530661582947, |
| "learning_rate": 3.05e-06, |
| "loss": 0.6053, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.0304642166344294, |
| "grad_norm": 0.4436582326889038, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 0.6293, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.030947775628626693, |
| "grad_norm": 0.4686850607395172, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 0.6568, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.031431334622823985, |
| "grad_norm": 0.44556960463523865, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.6248, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.031914893617021274, |
| "grad_norm": 0.4263205826282501, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 0.6153, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.03239845261121857, |
| "grad_norm": 0.4737732708454132, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 0.6566, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.03288201160541586, |
| "grad_norm": 0.4620726704597473, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 0.6119, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.033365570599613155, |
| "grad_norm": 0.46939659118652344, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.6154, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.033849129593810444, |
| "grad_norm": 0.4302070736885071, |
| "learning_rate": 3.45e-06, |
| "loss": 0.6399, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03433268858800774, |
| "grad_norm": 0.4119694232940674, |
| "learning_rate": 3.5e-06, |
| "loss": 0.6211, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03481624758220503, |
| "grad_norm": 0.4920046925544739, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 0.6381, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.03529980657640232, |
| "grad_norm": 0.51338130235672, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.6203, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.035783365570599614, |
| "grad_norm": 0.4448227882385254, |
| "learning_rate": 3.65e-06, |
| "loss": 0.6157, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0362669245647969, |
| "grad_norm": 0.6254576444625854, |
| "learning_rate": 3.7e-06, |
| "loss": 0.6427, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.0367504835589942, |
| "grad_norm": 0.4658832848072052, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.6257, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03723404255319149, |
| "grad_norm": 0.41669055819511414, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.6104, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.037717601547388784, |
| "grad_norm": 0.4414327144622803, |
| "learning_rate": 3.85e-06, |
| "loss": 0.5986, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.03820116054158607, |
| "grad_norm": 0.4977372884750366, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 0.6349, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.03868471953578337, |
| "grad_norm": 0.4130990505218506, |
| "learning_rate": 3.95e-06, |
| "loss": 0.5956, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03916827852998066, |
| "grad_norm": 0.42152485251426697, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.6196, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.039651837524177946, |
| "grad_norm": 0.4156739115715027, |
| "learning_rate": 4.05e-06, |
| "loss": 0.6064, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.04013539651837524, |
| "grad_norm": 0.4026014804840088, |
| "learning_rate": 4.1e-06, |
| "loss": 0.6334, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.04061895551257253, |
| "grad_norm": 1.8549836874008179, |
| "learning_rate": 4.15e-06, |
| "loss": 0.6143, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.04110251450676983, |
| "grad_norm": 0.39867880940437317, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.5951, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.041586073500967116, |
| "grad_norm": 0.45038288831710815, |
| "learning_rate": 4.25e-06, |
| "loss": 0.5865, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.04206963249516441, |
| "grad_norm": 0.43607354164123535, |
| "learning_rate": 4.3e-06, |
| "loss": 0.6164, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.0425531914893617, |
| "grad_norm": 0.46121928095817566, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 0.5702, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.043036750483559, |
| "grad_norm": 2.1593496799468994, |
| "learning_rate": 4.4e-06, |
| "loss": 0.6221, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.043520309477756286, |
| "grad_norm": 0.4066154360771179, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 0.6203, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.044003868471953575, |
| "grad_norm": 0.5225070118904114, |
| "learning_rate": 4.5e-06, |
| "loss": 0.5805, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.04448742746615087, |
| "grad_norm": 0.5109372138977051, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 0.5914, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.04497098646034816, |
| "grad_norm": 0.42148903012275696, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.5955, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.045454545454545456, |
| "grad_norm": 0.40933647751808167, |
| "learning_rate": 4.65e-06, |
| "loss": 0.6053, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.045938104448742745, |
| "grad_norm": 0.6771563291549683, |
| "learning_rate": 4.7e-06, |
| "loss": 0.5938, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04642166344294004, |
| "grad_norm": 0.4065110683441162, |
| "learning_rate": 4.75e-06, |
| "loss": 0.6004, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.04690522243713733, |
| "grad_norm": 0.4085659682750702, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.5791, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.047388781431334626, |
| "grad_norm": 0.40749242901802063, |
| "learning_rate": 4.85e-06, |
| "loss": 0.5911, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.047872340425531915, |
| "grad_norm": 0.402582049369812, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.6085, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.048355899419729204, |
| "grad_norm": 0.6223399043083191, |
| "learning_rate": 4.95e-06, |
| "loss": 0.5811, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0488394584139265, |
| "grad_norm": 0.7025216221809387, |
| "learning_rate": 5e-06, |
| "loss": 0.5763, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.04932301740812379, |
| "grad_norm": 0.4087183475494385, |
| "learning_rate": 4.99999991856056e-06, |
| "loss": 0.5868, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.049806576402321084, |
| "grad_norm": 0.408280611038208, |
| "learning_rate": 4.999999674242244e-06, |
| "loss": 0.5668, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.05029013539651837, |
| "grad_norm": 0.4054011106491089, |
| "learning_rate": 4.9999992670450685e-06, |
| "loss": 0.5912, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.05077369439071567, |
| "grad_norm": 0.3877287209033966, |
| "learning_rate": 4.99999869696906e-06, |
| "loss": 0.5739, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.05125725338491296, |
| "grad_norm": 0.4031350314617157, |
| "learning_rate": 4.999997964014256e-06, |
| "loss": 0.5982, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.051740812379110254, |
| "grad_norm": 0.43434491753578186, |
| "learning_rate": 4.999997068180702e-06, |
| "loss": 0.5815, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.05222437137330754, |
| "grad_norm": 0.4342786371707916, |
| "learning_rate": 4.99999600946846e-06, |
| "loss": 0.5841, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.05270793036750483, |
| "grad_norm": 0.43785515427589417, |
| "learning_rate": 4.999994787877597e-06, |
| "loss": 0.5584, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.05319148936170213, |
| "grad_norm": 0.38336819410324097, |
| "learning_rate": 4.999993403408192e-06, |
| "loss": 0.5736, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.05367504835589942, |
| "grad_norm": 0.43652138113975525, |
| "learning_rate": 4.999991856060336e-06, |
| "loss": 0.5764, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.05415860735009671, |
| "grad_norm": 0.4337776303291321, |
| "learning_rate": 4.999990145834131e-06, |
| "loss": 0.5784, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.054642166344294, |
| "grad_norm": 0.43474823236465454, |
| "learning_rate": 4.999988272729685e-06, |
| "loss": 0.5687, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0551257253384913, |
| "grad_norm": 0.4115881025791168, |
| "learning_rate": 4.999986236747124e-06, |
| "loss": 0.5504, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.05560928433268859, |
| "grad_norm": 1.258635401725769, |
| "learning_rate": 4.999984037886578e-06, |
| "loss": 0.5809, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.05609284332688588, |
| "grad_norm": 0.46500417590141296, |
| "learning_rate": 4.999981676148191e-06, |
| "loss": 0.5855, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.05657640232108317, |
| "grad_norm": 0.479522705078125, |
| "learning_rate": 4.999979151532119e-06, |
| "loss": 0.6054, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.05705996131528046, |
| "grad_norm": 0.39936313033103943, |
| "learning_rate": 4.999976464038522e-06, |
| "loss": 0.5517, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.05754352030947776, |
| "grad_norm": 0.597406268119812, |
| "learning_rate": 4.999973613667578e-06, |
| "loss": 0.5743, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.058027079303675046, |
| "grad_norm": 0.6071715354919434, |
| "learning_rate": 4.999970600419474e-06, |
| "loss": 0.5463, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05851063829787234, |
| "grad_norm": 0.4146825671195984, |
| "learning_rate": 4.999967424294403e-06, |
| "loss": 0.5753, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.05899419729206963, |
| "grad_norm": 0.48899951577186584, |
| "learning_rate": 4.999964085292573e-06, |
| "loss": 0.5582, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.059477756286266927, |
| "grad_norm": 0.4463071823120117, |
| "learning_rate": 4.999960583414204e-06, |
| "loss": 0.5761, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.059961315280464215, |
| "grad_norm": 0.4876002073287964, |
| "learning_rate": 4.999956918659521e-06, |
| "loss": 0.5842, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.06044487427466151, |
| "grad_norm": 0.6326982975006104, |
| "learning_rate": 4.999953091028764e-06, |
| "loss": 0.5683, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0609284332688588, |
| "grad_norm": 0.45847585797309875, |
| "learning_rate": 4.999949100522183e-06, |
| "loss": 0.5895, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.06141199226305609, |
| "grad_norm": 0.45837166905403137, |
| "learning_rate": 4.9999449471400364e-06, |
| "loss": 0.5595, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.061895551257253385, |
| "grad_norm": 0.404506653547287, |
| "learning_rate": 4.999940630882597e-06, |
| "loss": 0.5798, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.062379110251450674, |
| "grad_norm": 0.38707059621810913, |
| "learning_rate": 4.999936151750143e-06, |
| "loss": 0.5825, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.06286266924564797, |
| "grad_norm": 0.4148559868335724, |
| "learning_rate": 4.99993150974297e-06, |
| "loss": 0.5782, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06334622823984526, |
| "grad_norm": 0.44726136326789856, |
| "learning_rate": 4.999926704861377e-06, |
| "loss": 0.5842, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06382978723404255, |
| "grad_norm": 0.4477657973766327, |
| "learning_rate": 4.999921737105678e-06, |
| "loss": 0.5649, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06431334622823985, |
| "grad_norm": 0.45821887254714966, |
| "learning_rate": 4.999916606476199e-06, |
| "loss": 0.5944, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.06479690522243714, |
| "grad_norm": 0.4118689298629761, |
| "learning_rate": 4.999911312973271e-06, |
| "loss": 0.5617, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.06528046421663443, |
| "grad_norm": 0.8030620813369751, |
| "learning_rate": 4.999905856597241e-06, |
| "loss": 0.5754, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06576402321083172, |
| "grad_norm": 0.42012104392051697, |
| "learning_rate": 4.999900237348463e-06, |
| "loss": 0.5944, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.06624758220502901, |
| "grad_norm": 0.3971441388130188, |
| "learning_rate": 4.999894455227304e-06, |
| "loss": 0.5429, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06673114119922631, |
| "grad_norm": 0.44027745723724365, |
| "learning_rate": 4.999888510234141e-06, |
| "loss": 0.5817, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0672147001934236, |
| "grad_norm": 0.4141297936439514, |
| "learning_rate": 4.999882402369361e-06, |
| "loss": 0.588, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.06769825918762089, |
| "grad_norm": 0.4220448434352875, |
| "learning_rate": 4.999876131633361e-06, |
| "loss": 0.5588, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06818181818181818, |
| "grad_norm": 0.47313711047172546, |
| "learning_rate": 4.999869698026551e-06, |
| "loss": 0.5783, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.06866537717601548, |
| "grad_norm": 0.8543782234191895, |
| "learning_rate": 4.99986310154935e-06, |
| "loss": 0.5635, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.06914893617021277, |
| "grad_norm": 0.4882458448410034, |
| "learning_rate": 4.999856342202187e-06, |
| "loss": 0.5568, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.06963249516441006, |
| "grad_norm": 0.395766943693161, |
| "learning_rate": 4.999849419985502e-06, |
| "loss": 0.5607, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.07011605415860735, |
| "grad_norm": 0.4293517768383026, |
| "learning_rate": 4.999842334899748e-06, |
| "loss": 0.5755, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.07059961315280464, |
| "grad_norm": 0.41062092781066895, |
| "learning_rate": 4.999835086945384e-06, |
| "loss": 0.5569, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.07108317214700194, |
| "grad_norm": 0.40252384543418884, |
| "learning_rate": 4.999827676122884e-06, |
| "loss": 0.5454, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.07156673114119923, |
| "grad_norm": 0.4043247699737549, |
| "learning_rate": 4.999820102432731e-06, |
| "loss": 0.5663, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.07205029013539652, |
| "grad_norm": 0.45598798990249634, |
| "learning_rate": 4.999812365875417e-06, |
| "loss": 0.5611, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.0725338491295938, |
| "grad_norm": 0.46978089213371277, |
| "learning_rate": 4.999804466451446e-06, |
| "loss": 0.5454, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07301740812379111, |
| "grad_norm": 0.40247493982315063, |
| "learning_rate": 4.999796404161335e-06, |
| "loss": 0.55, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.0735009671179884, |
| "grad_norm": 0.3953741192817688, |
| "learning_rate": 4.999788179005608e-06, |
| "loss": 0.5682, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.07398452611218569, |
| "grad_norm": 0.41548067331314087, |
| "learning_rate": 4.999779790984799e-06, |
| "loss": 0.5694, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07446808510638298, |
| "grad_norm": 0.4155537188053131, |
| "learning_rate": 4.999771240099457e-06, |
| "loss": 0.5759, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.07495164410058026, |
| "grad_norm": 0.46990489959716797, |
| "learning_rate": 4.999762526350138e-06, |
| "loss": 0.5706, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07543520309477757, |
| "grad_norm": 0.4070248007774353, |
| "learning_rate": 4.999753649737411e-06, |
| "loss": 0.5639, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.07591876208897486, |
| "grad_norm": 0.4163321554660797, |
| "learning_rate": 4.999744610261852e-06, |
| "loss": 0.543, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.07640232108317214, |
| "grad_norm": 0.42817747592926025, |
| "learning_rate": 4.999735407924052e-06, |
| "loss": 0.546, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.07688588007736943, |
| "grad_norm": 0.44839778542518616, |
| "learning_rate": 4.9997260427246096e-06, |
| "loss": 0.5531, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.07736943907156674, |
| "grad_norm": 0.48207932710647583, |
| "learning_rate": 4.999716514664135e-06, |
| "loss": 0.545, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07785299806576403, |
| "grad_norm": 0.38995426893234253, |
| "learning_rate": 4.999706823743248e-06, |
| "loss": 0.5368, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.07833655705996131, |
| "grad_norm": 0.42563074827194214, |
| "learning_rate": 4.999696969962583e-06, |
| "loss": 0.5669, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.0788201160541586, |
| "grad_norm": 0.42152607440948486, |
| "learning_rate": 4.999686953322779e-06, |
| "loss": 0.5815, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.07930367504835589, |
| "grad_norm": 0.4055645167827606, |
| "learning_rate": 4.999676773824489e-06, |
| "loss": 0.5711, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.0797872340425532, |
| "grad_norm": 0.4749949872493744, |
| "learning_rate": 4.9996664314683775e-06, |
| "loss": 0.5445, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.08027079303675048, |
| "grad_norm": 0.5488531589508057, |
| "learning_rate": 4.999655926255118e-06, |
| "loss": 0.5615, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.08075435203094777, |
| "grad_norm": 0.42545777559280396, |
| "learning_rate": 4.999645258185394e-06, |
| "loss": 0.5536, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.08123791102514506, |
| "grad_norm": 0.4722123444080353, |
| "learning_rate": 4.999634427259902e-06, |
| "loss": 0.5566, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.08172147001934237, |
| "grad_norm": 0.5403354167938232, |
| "learning_rate": 4.999623433479346e-06, |
| "loss": 0.5179, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.08220502901353965, |
| "grad_norm": 0.40221697092056274, |
| "learning_rate": 4.999612276844444e-06, |
| "loss": 0.5404, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08268858800773694, |
| "grad_norm": 0.4005393981933594, |
| "learning_rate": 4.999600957355921e-06, |
| "loss": 0.5391, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.08317214700193423, |
| "grad_norm": 0.4574699103832245, |
| "learning_rate": 4.999589475014516e-06, |
| "loss": 0.5431, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.08365570599613152, |
| "grad_norm": 0.4481971561908722, |
| "learning_rate": 4.999577829820976e-06, |
| "loss": 0.5574, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.08413926499032882, |
| "grad_norm": 0.44044750928878784, |
| "learning_rate": 4.999566021776061e-06, |
| "loss": 0.531, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.08462282398452611, |
| "grad_norm": 0.4010845124721527, |
| "learning_rate": 4.9995540508805385e-06, |
| "loss": 0.5431, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.0851063829787234, |
| "grad_norm": 0.48778480291366577, |
| "learning_rate": 4.99954191713519e-06, |
| "loss": 0.5458, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.08558994197292069, |
| "grad_norm": 0.4292055070400238, |
| "learning_rate": 4.999529620540805e-06, |
| "loss": 0.5227, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.086073500967118, |
| "grad_norm": 0.488142728805542, |
| "learning_rate": 4.999517161098186e-06, |
| "loss": 0.5262, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08655705996131528, |
| "grad_norm": 0.41607528924942017, |
| "learning_rate": 4.9995045388081434e-06, |
| "loss": 0.5653, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.08704061895551257, |
| "grad_norm": 0.41551366448402405, |
| "learning_rate": 4.999491753671501e-06, |
| "loss": 0.5367, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08752417794970986, |
| "grad_norm": 0.4160298705101013, |
| "learning_rate": 4.999478805689089e-06, |
| "loss": 0.5446, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.08800773694390715, |
| "grad_norm": 0.38679638504981995, |
| "learning_rate": 4.999465694861754e-06, |
| "loss": 0.5566, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.08849129593810445, |
| "grad_norm": 0.3910638093948364, |
| "learning_rate": 4.999452421190348e-06, |
| "loss": 0.5369, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.08897485493230174, |
| "grad_norm": 0.40391838550567627, |
| "learning_rate": 4.999438984675737e-06, |
| "loss": 0.525, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.08945841392649903, |
| "grad_norm": 0.40901172161102295, |
| "learning_rate": 4.999425385318797e-06, |
| "loss": 0.5518, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08994197292069632, |
| "grad_norm": 0.44393712282180786, |
| "learning_rate": 4.999411623120413e-06, |
| "loss": 0.5416, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.09042553191489362, |
| "grad_norm": 0.4262126386165619, |
| "learning_rate": 4.999397698081482e-06, |
| "loss": 0.5384, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.09090909090909091, |
| "grad_norm": 0.4574022591114044, |
| "learning_rate": 4.9993836102029105e-06, |
| "loss": 0.5625, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.0913926499032882, |
| "grad_norm": 0.42913538217544556, |
| "learning_rate": 4.999369359485617e-06, |
| "loss": 0.5402, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.09187620889748549, |
| "grad_norm": 0.39854881167411804, |
| "learning_rate": 4.99935494593053e-06, |
| "loss": 0.5571, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.09235976789168278, |
| "grad_norm": 0.4160971939563751, |
| "learning_rate": 4.999340369538588e-06, |
| "loss": 0.555, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.09284332688588008, |
| "grad_norm": 0.39643704891204834, |
| "learning_rate": 4.999325630310741e-06, |
| "loss": 0.5252, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.09332688588007737, |
| "grad_norm": 0.4330281615257263, |
| "learning_rate": 4.999310728247952e-06, |
| "loss": 0.5372, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.09381044487427466, |
| "grad_norm": 0.37870827317237854, |
| "learning_rate": 4.999295663351186e-06, |
| "loss": 0.5402, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.09429400386847195, |
| "grad_norm": 0.4608426094055176, |
| "learning_rate": 4.99928043562143e-06, |
| "loss": 0.5278, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.09477756286266925, |
| "grad_norm": 0.43381285667419434, |
| "learning_rate": 4.9992650450596725e-06, |
| "loss": 0.5624, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.09526112185686654, |
| "grad_norm": 0.414285272359848, |
| "learning_rate": 4.999249491666918e-06, |
| "loss": 0.5531, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09574468085106383, |
| "grad_norm": 0.46436765789985657, |
| "learning_rate": 4.9992337754441796e-06, |
| "loss": 0.5305, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.09622823984526112, |
| "grad_norm": 0.5019548535346985, |
| "learning_rate": 4.999217896392481e-06, |
| "loss": 0.5429, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.09671179883945841, |
| "grad_norm": 1.2389442920684814, |
| "learning_rate": 4.999201854512857e-06, |
| "loss": 0.5556, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09719535783365571, |
| "grad_norm": 0.42270204424858093, |
| "learning_rate": 4.999185649806352e-06, |
| "loss": 0.5333, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.097678916827853, |
| "grad_norm": 0.40609827637672424, |
| "learning_rate": 4.999169282274023e-06, |
| "loss": 0.5577, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.09816247582205029, |
| "grad_norm": 0.44571995735168457, |
| "learning_rate": 4.999152751916936e-06, |
| "loss": 0.5368, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.09864603481624758, |
| "grad_norm": 0.3832789361476898, |
| "learning_rate": 4.999136058736167e-06, |
| "loss": 0.5423, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.09912959381044488, |
| "grad_norm": 0.4100008010864258, |
| "learning_rate": 4.999119202732805e-06, |
| "loss": 0.539, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.09961315280464217, |
| "grad_norm": 0.41159990429878235, |
| "learning_rate": 4.999102183907947e-06, |
| "loss": 0.5448, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.10009671179883946, |
| "grad_norm": 0.39501649141311646, |
| "learning_rate": 4.999085002262701e-06, |
| "loss": 0.5319, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.10058027079303675, |
| "grad_norm": 0.4287145733833313, |
| "learning_rate": 4.99906765779819e-06, |
| "loss": 0.5261, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.10106382978723404, |
| "grad_norm": 0.45831891894340515, |
| "learning_rate": 4.999050150515541e-06, |
| "loss": 0.5393, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.10154738878143134, |
| "grad_norm": 0.9308096170425415, |
| "learning_rate": 4.999032480415894e-06, |
| "loss": 0.5128, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.10203094777562863, |
| "grad_norm": 0.4070744514465332, |
| "learning_rate": 4.999014647500403e-06, |
| "loss": 0.5284, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.10251450676982592, |
| "grad_norm": 0.5258062481880188, |
| "learning_rate": 4.998996651770228e-06, |
| "loss": 0.5461, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.1029980657640232, |
| "grad_norm": 0.43539905548095703, |
| "learning_rate": 4.998978493226542e-06, |
| "loss": 0.5179, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.10348162475822051, |
| "grad_norm": 0.4096452593803406, |
| "learning_rate": 4.9989601718705275e-06, |
| "loss": 0.5055, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1039651837524178, |
| "grad_norm": 0.40248751640319824, |
| "learning_rate": 4.998941687703379e-06, |
| "loss": 0.5424, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.10444874274661509, |
| "grad_norm": 0.40936747193336487, |
| "learning_rate": 4.9989230407263e-06, |
| "loss": 0.5297, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.10493230174081238, |
| "grad_norm": 0.402515709400177, |
| "learning_rate": 4.998904230940506e-06, |
| "loss": 0.5415, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.10541586073500966, |
| "grad_norm": 0.3962743878364563, |
| "learning_rate": 4.998885258347223e-06, |
| "loss": 0.5175, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.10589941972920697, |
| "grad_norm": 0.39665189385414124, |
| "learning_rate": 4.998866122947685e-06, |
| "loss": 0.5459, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.10638297872340426, |
| "grad_norm": 0.3833273649215698, |
| "learning_rate": 4.99884682474314e-06, |
| "loss": 0.5417, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.10686653771760155, |
| "grad_norm": 0.3995283842086792, |
| "learning_rate": 4.998827363734846e-06, |
| "loss": 0.5301, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.10735009671179883, |
| "grad_norm": 0.438424676656723, |
| "learning_rate": 4.998807739924071e-06, |
| "loss": 0.5189, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.10783365570599614, |
| "grad_norm": 0.38158780336380005, |
| "learning_rate": 4.998787953312091e-06, |
| "loss": 0.5216, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.10831721470019343, |
| "grad_norm": 0.4054715633392334, |
| "learning_rate": 4.998768003900198e-06, |
| "loss": 0.5203, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.10880077369439071, |
| "grad_norm": 0.41561102867126465, |
| "learning_rate": 4.99874789168969e-06, |
| "loss": 0.5418, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.109284332688588, |
| "grad_norm": 0.42851102352142334, |
| "learning_rate": 4.998727616681879e-06, |
| "loss": 0.5279, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.10976789168278529, |
| "grad_norm": 0.3681640923023224, |
| "learning_rate": 4.998707178878084e-06, |
| "loss": 0.5106, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.1102514506769826, |
| "grad_norm": 0.5779820084571838, |
| "learning_rate": 4.998686578279638e-06, |
| "loss": 0.553, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.11073500967117988, |
| "grad_norm": 0.41628775000572205, |
| "learning_rate": 4.998665814887883e-06, |
| "loss": 0.542, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.11121856866537717, |
| "grad_norm": 0.47802796959877014, |
| "learning_rate": 4.998644888704171e-06, |
| "loss": 0.5051, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.11170212765957446, |
| "grad_norm": 0.41832587122917175, |
| "learning_rate": 4.998623799729865e-06, |
| "loss": 0.5403, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.11218568665377177, |
| "grad_norm": 0.4051823019981384, |
| "learning_rate": 4.99860254796634e-06, |
| "loss": 0.5311, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.11266924564796905, |
| "grad_norm": 0.40647706389427185, |
| "learning_rate": 4.998581133414981e-06, |
| "loss": 0.5128, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.11315280464216634, |
| "grad_norm": 0.4021758437156677, |
| "learning_rate": 4.998559556077182e-06, |
| "loss": 0.5264, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.11363636363636363, |
| "grad_norm": 0.47185152769088745, |
| "learning_rate": 4.99853781595435e-06, |
| "loss": 0.5353, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.11411992263056092, |
| "grad_norm": 0.4166802763938904, |
| "learning_rate": 4.9985159130479e-06, |
| "loss": 0.5138, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.11460348162475822, |
| "grad_norm": 0.4907703399658203, |
| "learning_rate": 4.99849384735926e-06, |
| "loss": 0.543, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.11508704061895551, |
| "grad_norm": 0.6601998805999756, |
| "learning_rate": 4.998471618889867e-06, |
| "loss": 0.5431, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.1155705996131528, |
| "grad_norm": 0.412659615278244, |
| "learning_rate": 4.99844922764117e-06, |
| "loss": 0.5267, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.11605415860735009, |
| "grad_norm": 0.42117926478385925, |
| "learning_rate": 4.998426673614627e-06, |
| "loss": 0.5254, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1165377176015474, |
| "grad_norm": 0.3980475068092346, |
| "learning_rate": 4.998403956811708e-06, |
| "loss": 0.5077, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.11702127659574468, |
| "grad_norm": 0.4634224474430084, |
| "learning_rate": 4.998381077233894e-06, |
| "loss": 0.5169, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.11750483558994197, |
| "grad_norm": 0.4475371837615967, |
| "learning_rate": 4.998358034882673e-06, |
| "loss": 0.5394, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.11798839458413926, |
| "grad_norm": 0.4416722059249878, |
| "learning_rate": 4.998334829759548e-06, |
| "loss": 0.5314, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.11847195357833655, |
| "grad_norm": 0.42282068729400635, |
| "learning_rate": 4.998311461866031e-06, |
| "loss": 0.5262, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11895551257253385, |
| "grad_norm": 0.41945409774780273, |
| "learning_rate": 4.998287931203643e-06, |
| "loss": 0.5116, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.11943907156673114, |
| "grad_norm": 0.423809677362442, |
| "learning_rate": 4.99826423777392e-06, |
| "loss": 0.5103, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.11992263056092843, |
| "grad_norm": 0.4222816228866577, |
| "learning_rate": 4.998240381578403e-06, |
| "loss": 0.5519, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.12040618955512572, |
| "grad_norm": 0.46973058581352234, |
| "learning_rate": 4.998216362618646e-06, |
| "loss": 0.5448, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.12088974854932302, |
| "grad_norm": 0.4195155203342438, |
| "learning_rate": 4.998192180896217e-06, |
| "loss": 0.5339, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.12137330754352031, |
| "grad_norm": 0.47963947057724, |
| "learning_rate": 4.998167836412688e-06, |
| "loss": 0.5365, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.1218568665377176, |
| "grad_norm": 0.4399716556072235, |
| "learning_rate": 4.998143329169646e-06, |
| "loss": 0.5273, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.12234042553191489, |
| "grad_norm": 0.436796635389328, |
| "learning_rate": 4.998118659168689e-06, |
| "loss": 0.5317, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.12282398452611218, |
| "grad_norm": 0.5795308351516724, |
| "learning_rate": 4.998093826411423e-06, |
| "loss": 0.5165, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.12330754352030948, |
| "grad_norm": 1.2130300998687744, |
| "learning_rate": 4.998068830899466e-06, |
| "loss": 0.5611, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.12379110251450677, |
| "grad_norm": 0.44857141375541687, |
| "learning_rate": 4.998043672634448e-06, |
| "loss": 0.5224, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.12427466150870406, |
| "grad_norm": 0.4253701865673065, |
| "learning_rate": 4.998018351618007e-06, |
| "loss": 0.527, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.12475822050290135, |
| "grad_norm": 0.41572824120521545, |
| "learning_rate": 4.9979928678517915e-06, |
| "loss": 0.5286, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.12524177949709864, |
| "grad_norm": 0.41825610399246216, |
| "learning_rate": 4.997967221337463e-06, |
| "loss": 0.5312, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.12572533849129594, |
| "grad_norm": 0.45931923389434814, |
| "learning_rate": 4.997941412076693e-06, |
| "loss": 0.5417, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.12620889748549324, |
| "grad_norm": 0.4032868444919586, |
| "learning_rate": 4.997915440071162e-06, |
| "loss": 0.5221, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.12669245647969052, |
| "grad_norm": 0.8206331133842468, |
| "learning_rate": 4.997889305322563e-06, |
| "loss": 0.4896, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.12717601547388782, |
| "grad_norm": 0.4170459806919098, |
| "learning_rate": 4.997863007832597e-06, |
| "loss": 0.532, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.1276595744680851, |
| "grad_norm": 0.41096487641334534, |
| "learning_rate": 4.99783654760298e-06, |
| "loss": 0.5476, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1281431334622824, |
| "grad_norm": 0.40561696887016296, |
| "learning_rate": 4.997809924635434e-06, |
| "loss": 0.5031, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.1286266924564797, |
| "grad_norm": 0.4312552809715271, |
| "learning_rate": 4.997783138931693e-06, |
| "loss": 0.5423, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.12911025145067698, |
| "grad_norm": 0.43831390142440796, |
| "learning_rate": 4.997756190493505e-06, |
| "loss": 0.5207, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.12959381044487428, |
| "grad_norm": 0.4064142107963562, |
| "learning_rate": 4.997729079322622e-06, |
| "loss": 0.5351, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.13007736943907156, |
| "grad_norm": 0.5566521286964417, |
| "learning_rate": 4.997701805420813e-06, |
| "loss": 0.5231, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.13056092843326886, |
| "grad_norm": 0.5654227137565613, |
| "learning_rate": 4.997674368789854e-06, |
| "loss": 0.5102, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.13104448742746616, |
| "grad_norm": 0.42063838243484497, |
| "learning_rate": 4.997646769431532e-06, |
| "loss": 0.5284, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.13152804642166344, |
| "grad_norm": 0.6299861073493958, |
| "learning_rate": 4.997619007347647e-06, |
| "loss": 0.5365, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.13201160541586074, |
| "grad_norm": 0.4093479514122009, |
| "learning_rate": 4.997591082540006e-06, |
| "loss": 0.5297, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.13249516441005801, |
| "grad_norm": 0.3970275819301605, |
| "learning_rate": 4.997562995010429e-06, |
| "loss": 0.5091, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.13297872340425532, |
| "grad_norm": 0.4937737286090851, |
| "learning_rate": 4.9975347447607455e-06, |
| "loss": 0.5269, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.13346228239845262, |
| "grad_norm": 0.3909159004688263, |
| "learning_rate": 4.997506331792796e-06, |
| "loss": 0.5098, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.1339458413926499, |
| "grad_norm": 0.4457818269729614, |
| "learning_rate": 4.997477756108433e-06, |
| "loss": 0.5359, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.1344294003868472, |
| "grad_norm": 0.451669842004776, |
| "learning_rate": 4.997449017709517e-06, |
| "loss": 0.534, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.1349129593810445, |
| "grad_norm": 0.6622608304023743, |
| "learning_rate": 4.997420116597921e-06, |
| "loss": 0.5034, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.13539651837524178, |
| "grad_norm": 1.008179783821106, |
| "learning_rate": 4.997391052775526e-06, |
| "loss": 0.5117, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.13588007736943908, |
| "grad_norm": 0.4382878541946411, |
| "learning_rate": 4.997361826244229e-06, |
| "loss": 0.5219, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.13636363636363635, |
| "grad_norm": 0.45464539527893066, |
| "learning_rate": 4.997332437005932e-06, |
| "loss": 0.5447, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.13684719535783366, |
| "grad_norm": 0.5995938181877136, |
| "learning_rate": 4.99730288506255e-06, |
| "loss": 0.5292, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.13733075435203096, |
| "grad_norm": 0.4235019385814667, |
| "learning_rate": 4.997273170416007e-06, |
| "loss": 0.5098, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.13781431334622823, |
| "grad_norm": 0.45099836587905884, |
| "learning_rate": 4.997243293068242e-06, |
| "loss": 0.5151, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13829787234042554, |
| "grad_norm": 0.8173574209213257, |
| "learning_rate": 4.997213253021198e-06, |
| "loss": 0.542, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1387814313346228, |
| "grad_norm": 0.42866086959838867, |
| "learning_rate": 4.997183050276836e-06, |
| "loss": 0.5198, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.13926499032882012, |
| "grad_norm": 0.42806968092918396, |
| "learning_rate": 4.997152684837121e-06, |
| "loss": 0.5303, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.13974854932301742, |
| "grad_norm": 0.48591941595077515, |
| "learning_rate": 4.997122156704032e-06, |
| "loss": 0.5193, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.1402321083172147, |
| "grad_norm": 0.4372103214263916, |
| "learning_rate": 4.997091465879559e-06, |
| "loss": 0.5227, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.140715667311412, |
| "grad_norm": 0.4667884409427643, |
| "learning_rate": 4.9970606123656995e-06, |
| "loss": 0.5288, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.14119922630560927, |
| "grad_norm": 0.44642403721809387, |
| "learning_rate": 4.997029596164466e-06, |
| "loss": 0.5187, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.14168278529980657, |
| "grad_norm": 0.3939391076564789, |
| "learning_rate": 4.996998417277877e-06, |
| "loss": 0.5142, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.14216634429400388, |
| "grad_norm": 4.678069114685059, |
| "learning_rate": 4.996967075707965e-06, |
| "loss": 0.5002, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.14264990328820115, |
| "grad_norm": 0.5047649145126343, |
| "learning_rate": 4.996935571456773e-06, |
| "loss": 0.5261, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.14313346228239845, |
| "grad_norm": 0.42343783378601074, |
| "learning_rate": 4.9969039045263515e-06, |
| "loss": 0.5199, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.14361702127659576, |
| "grad_norm": 0.4126646816730499, |
| "learning_rate": 4.996872074918765e-06, |
| "loss": 0.5279, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.14410058027079303, |
| "grad_norm": 0.412338525056839, |
| "learning_rate": 4.996840082636087e-06, |
| "loss": 0.5256, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.14458413926499034, |
| "grad_norm": 0.41773638129234314, |
| "learning_rate": 4.996807927680401e-06, |
| "loss": 0.5316, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1450676982591876, |
| "grad_norm": 0.4320884644985199, |
| "learning_rate": 4.996775610053803e-06, |
| "loss": 0.5026, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1455512572533849, |
| "grad_norm": 0.4417104721069336, |
| "learning_rate": 4.996743129758398e-06, |
| "loss": 0.5238, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.14603481624758222, |
| "grad_norm": 0.427249938249588, |
| "learning_rate": 4.9967104867963025e-06, |
| "loss": 0.5088, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.1465183752417795, |
| "grad_norm": 0.500415027141571, |
| "learning_rate": 4.9966776811696435e-06, |
| "loss": 0.5269, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.1470019342359768, |
| "grad_norm": 0.4268072545528412, |
| "learning_rate": 4.996644712880557e-06, |
| "loss": 0.5211, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.14748549323017407, |
| "grad_norm": 0.4673326909542084, |
| "learning_rate": 4.9966115819311926e-06, |
| "loss": 0.4972, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.14796905222437137, |
| "grad_norm": 0.43386024236679077, |
| "learning_rate": 4.996578288323708e-06, |
| "loss": 0.5025, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.14845261121856868, |
| "grad_norm": 0.42369264364242554, |
| "learning_rate": 4.996544832060272e-06, |
| "loss": 0.5318, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.14893617021276595, |
| "grad_norm": 0.4846673607826233, |
| "learning_rate": 4.996511213143065e-06, |
| "loss": 0.533, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.14941972920696325, |
| "grad_norm": 0.448446661233902, |
| "learning_rate": 4.996477431574277e-06, |
| "loss": 0.5445, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.14990328820116053, |
| "grad_norm": 0.409821480512619, |
| "learning_rate": 4.996443487356109e-06, |
| "loss": 0.4791, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.15038684719535783, |
| "grad_norm": 0.43006864190101624, |
| "learning_rate": 4.9964093804907724e-06, |
| "loss": 0.5241, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.15087040618955513, |
| "grad_norm": 0.4141005873680115, |
| "learning_rate": 4.99637511098049e-06, |
| "loss": 0.5248, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.1513539651837524, |
| "grad_norm": 0.4346350133419037, |
| "learning_rate": 4.996340678827493e-06, |
| "loss": 0.5144, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.1518375241779497, |
| "grad_norm": 0.41779354214668274, |
| "learning_rate": 4.996306084034026e-06, |
| "loss": 0.5277, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.15232108317214701, |
| "grad_norm": 0.4917933940887451, |
| "learning_rate": 4.996271326602342e-06, |
| "loss": 0.5368, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.1528046421663443, |
| "grad_norm": 0.5210337042808533, |
| "learning_rate": 4.996236406534707e-06, |
| "loss": 0.5445, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.1532882011605416, |
| "grad_norm": 0.43788838386535645, |
| "learning_rate": 4.996201323833394e-06, |
| "loss": 0.5169, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.15377176015473887, |
| "grad_norm": 0.4166138470172882, |
| "learning_rate": 4.996166078500691e-06, |
| "loss": 0.4993, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.15425531914893617, |
| "grad_norm": 0.4216192662715912, |
| "learning_rate": 4.9961306705388925e-06, |
| "loss": 0.5269, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.15473887814313347, |
| "grad_norm": 0.40549808740615845, |
| "learning_rate": 4.996095099950307e-06, |
| "loss": 0.5338, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.15522243713733075, |
| "grad_norm": 0.4312421381473541, |
| "learning_rate": 4.9960593667372495e-06, |
| "loss": 0.5076, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.15570599613152805, |
| "grad_norm": 0.4673048257827759, |
| "learning_rate": 4.99602347090205e-06, |
| "loss": 0.5189, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.15618955512572533, |
| "grad_norm": 0.41923579573631287, |
| "learning_rate": 4.995987412447047e-06, |
| "loss": 0.5354, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.15667311411992263, |
| "grad_norm": 0.548323392868042, |
| "learning_rate": 4.995951191374589e-06, |
| "loss": 0.5126, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.15715667311411993, |
| "grad_norm": 0.5140036940574646, |
| "learning_rate": 4.995914807687037e-06, |
| "loss": 0.5273, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1576402321083172, |
| "grad_norm": 0.4467466473579407, |
| "learning_rate": 4.99587826138676e-06, |
| "loss": 0.5321, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.1581237911025145, |
| "grad_norm": 0.41909724473953247, |
| "learning_rate": 4.9958415524761406e-06, |
| "loss": 0.5176, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.15860735009671179, |
| "grad_norm": 0.44327664375305176, |
| "learning_rate": 4.995804680957569e-06, |
| "loss": 0.5159, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.1590909090909091, |
| "grad_norm": 0.39020073413848877, |
| "learning_rate": 4.9957676468334485e-06, |
| "loss": 0.523, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.1595744680851064, |
| "grad_norm": 0.5223175287246704, |
| "learning_rate": 4.995730450106191e-06, |
| "loss": 0.4969, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.16005802707930367, |
| "grad_norm": 0.48410263657569885, |
| "learning_rate": 4.995693090778222e-06, |
| "loss": 0.4925, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.16054158607350097, |
| "grad_norm": 0.41017135977745056, |
| "learning_rate": 4.995655568851973e-06, |
| "loss": 0.4897, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.16102514506769827, |
| "grad_norm": 0.4232168197631836, |
| "learning_rate": 4.995617884329889e-06, |
| "loss": 0.5311, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.16150870406189555, |
| "grad_norm": 0.4976195693016052, |
| "learning_rate": 4.995580037214427e-06, |
| "loss": 0.5191, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.16199226305609285, |
| "grad_norm": 0.5347289443016052, |
| "learning_rate": 4.99554202750805e-06, |
| "loss": 0.4973, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.16247582205029013, |
| "grad_norm": 0.4547256827354431, |
| "learning_rate": 4.995503855213237e-06, |
| "loss": 0.5302, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.16295938104448743, |
| "grad_norm": 0.4252434968948364, |
| "learning_rate": 4.995465520332474e-06, |
| "loss": 0.4983, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.16344294003868473, |
| "grad_norm": 0.4182872474193573, |
| "learning_rate": 4.995427022868259e-06, |
| "loss": 0.5161, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.163926499032882, |
| "grad_norm": 0.42835015058517456, |
| "learning_rate": 4.9953883628231e-06, |
| "loss": 0.5101, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.1644100580270793, |
| "grad_norm": 0.43284621834754944, |
| "learning_rate": 4.995349540199514e-06, |
| "loss": 0.513, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.16489361702127658, |
| "grad_norm": 0.4314822852611542, |
| "learning_rate": 4.995310555000033e-06, |
| "loss": 0.5203, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.1653771760154739, |
| "grad_norm": 0.4304635524749756, |
| "learning_rate": 4.995271407227195e-06, |
| "loss": 0.5221, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.1658607350096712, |
| "grad_norm": 0.4306887686252594, |
| "learning_rate": 4.995232096883552e-06, |
| "loss": 0.516, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.16634429400386846, |
| "grad_norm": 0.4424187242984772, |
| "learning_rate": 4.995192623971664e-06, |
| "loss": 0.5118, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.16682785299806577, |
| "grad_norm": 0.4274136424064636, |
| "learning_rate": 4.995152988494103e-06, |
| "loss": 0.5032, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.16731141199226304, |
| "grad_norm": 0.49120795726776123, |
| "learning_rate": 4.995113190453452e-06, |
| "loss": 0.5176, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.16779497098646035, |
| "grad_norm": 0.4164622724056244, |
| "learning_rate": 4.995073229852303e-06, |
| "loss": 0.5168, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.16827852998065765, |
| "grad_norm": 0.44215700030326843, |
| "learning_rate": 4.995033106693261e-06, |
| "loss": 0.5171, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.16876208897485492, |
| "grad_norm": 0.5706837773323059, |
| "learning_rate": 4.994992820978937e-06, |
| "loss": 0.5117, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.16924564796905223, |
| "grad_norm": 0.9634613394737244, |
| "learning_rate": 4.99495237271196e-06, |
| "loss": 0.4944, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.16972920696324953, |
| "grad_norm": 0.4841616153717041, |
| "learning_rate": 4.9949117618949615e-06, |
| "loss": 0.53, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.1702127659574468, |
| "grad_norm": 0.43846601247787476, |
| "learning_rate": 4.994870988530589e-06, |
| "loss": 0.5102, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.1706963249516441, |
| "grad_norm": 0.42763012647628784, |
| "learning_rate": 4.994830052621499e-06, |
| "loss": 0.5339, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.17117988394584138, |
| "grad_norm": 0.44781923294067383, |
| "learning_rate": 4.994788954170357e-06, |
| "loss": 0.5201, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.17166344294003869, |
| "grad_norm": 0.4358518421649933, |
| "learning_rate": 4.994747693179844e-06, |
| "loss": 0.5129, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.172147001934236, |
| "grad_norm": 0.4187730848789215, |
| "learning_rate": 4.994706269652644e-06, |
| "loss": 0.5057, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.17263056092843326, |
| "grad_norm": 0.42493316531181335, |
| "learning_rate": 4.994664683591459e-06, |
| "loss": 0.522, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.17311411992263057, |
| "grad_norm": 0.4245206117630005, |
| "learning_rate": 4.994622934998997e-06, |
| "loss": 0.5195, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.17359767891682784, |
| "grad_norm": 0.6871814131736755, |
| "learning_rate": 4.994581023877979e-06, |
| "loss": 0.495, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.17408123791102514, |
| "grad_norm": 0.45684391260147095, |
| "learning_rate": 4.994538950231134e-06, |
| "loss": 0.4917, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.17456479690522245, |
| "grad_norm": 0.4136664569377899, |
| "learning_rate": 4.994496714061205e-06, |
| "loss": 0.5171, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.17504835589941972, |
| "grad_norm": 0.43099161982536316, |
| "learning_rate": 4.994454315370943e-06, |
| "loss": 0.5176, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.17553191489361702, |
| "grad_norm": 0.40930303931236267, |
| "learning_rate": 4.994411754163109e-06, |
| "loss": 0.5285, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.1760154738878143, |
| "grad_norm": 0.4457148611545563, |
| "learning_rate": 4.994369030440477e-06, |
| "loss": 0.4951, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.1764990328820116, |
| "grad_norm": 0.429066926240921, |
| "learning_rate": 4.994326144205831e-06, |
| "loss": 0.5055, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1769825918762089, |
| "grad_norm": 0.4551694691181183, |
| "learning_rate": 4.994283095461965e-06, |
| "loss": 0.5133, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.17746615087040618, |
| "grad_norm": 0.4065353274345398, |
| "learning_rate": 4.994239884211683e-06, |
| "loss": 0.5115, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.17794970986460348, |
| "grad_norm": 0.4265615940093994, |
| "learning_rate": 4.994196510457801e-06, |
| "loss": 0.5101, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.1784332688588008, |
| "grad_norm": 0.4900777339935303, |
| "learning_rate": 4.994152974203143e-06, |
| "loss": 0.5122, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.17891682785299806, |
| "grad_norm": 0.5280573964118958, |
| "learning_rate": 4.994109275450549e-06, |
| "loss": 0.5116, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.17940038684719536, |
| "grad_norm": 0.6261852979660034, |
| "learning_rate": 4.994065414202863e-06, |
| "loss": 0.5095, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.17988394584139264, |
| "grad_norm": 0.43888241052627563, |
| "learning_rate": 4.994021390462944e-06, |
| "loss": 0.5071, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.18036750483558994, |
| "grad_norm": 0.44395115971565247, |
| "learning_rate": 4.99397720423366e-06, |
| "loss": 0.5157, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.18085106382978725, |
| "grad_norm": 0.40000030398368835, |
| "learning_rate": 4.993932855517889e-06, |
| "loss": 0.5119, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.18133462282398452, |
| "grad_norm": 0.413655549287796, |
| "learning_rate": 4.9938883443185215e-06, |
| "loss": 0.5129, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 0.5221056938171387, |
| "learning_rate": 4.993843670638458e-06, |
| "loss": 0.5144, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.1823017408123791, |
| "grad_norm": 0.4086361825466156, |
| "learning_rate": 4.993798834480607e-06, |
| "loss": 0.4922, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.1827852998065764, |
| "grad_norm": 0.5094679594039917, |
| "learning_rate": 4.99375383584789e-06, |
| "loss": 0.5176, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.1832688588007737, |
| "grad_norm": 0.43078911304473877, |
| "learning_rate": 4.993708674743241e-06, |
| "loss": 0.5031, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.18375241779497098, |
| "grad_norm": 0.4423384666442871, |
| "learning_rate": 4.9936633511696e-06, |
| "loss": 0.5118, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.18423597678916828, |
| "grad_norm": 0.42990249395370483, |
| "learning_rate": 4.99361786512992e-06, |
| "loss": 0.5073, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.18471953578336556, |
| "grad_norm": 0.7277259826660156, |
| "learning_rate": 4.993572216627166e-06, |
| "loss": 0.4965, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.18520309477756286, |
| "grad_norm": 0.41394710540771484, |
| "learning_rate": 4.993526405664311e-06, |
| "loss": 0.5197, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.18568665377176016, |
| "grad_norm": 0.41026055812835693, |
| "learning_rate": 4.99348043224434e-06, |
| "loss": 0.5102, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.18617021276595744, |
| "grad_norm": 0.41871216893196106, |
| "learning_rate": 4.9934342963702485e-06, |
| "loss": 0.5106, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.18665377176015474, |
| "grad_norm": 0.41254499554634094, |
| "learning_rate": 4.993387998045041e-06, |
| "loss": 0.493, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.18713733075435204, |
| "grad_norm": 0.6578147411346436, |
| "learning_rate": 4.993341537271735e-06, |
| "loss": 0.5113, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.18762088974854932, |
| "grad_norm": 0.39468589425086975, |
| "learning_rate": 4.993294914053358e-06, |
| "loss": 0.5202, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.18810444874274662, |
| "grad_norm": 0.39228197932243347, |
| "learning_rate": 4.993248128392947e-06, |
| "loss": 0.5397, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.1885880077369439, |
| "grad_norm": 0.4504663944244385, |
| "learning_rate": 4.99320118029355e-06, |
| "loss": 0.5017, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1890715667311412, |
| "grad_norm": 0.44478482007980347, |
| "learning_rate": 4.993154069758226e-06, |
| "loss": 0.5128, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.1895551257253385, |
| "grad_norm": 0.506403386592865, |
| "learning_rate": 4.993106796790044e-06, |
| "loss": 0.5166, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.19003868471953578, |
| "grad_norm": 0.4066163897514343, |
| "learning_rate": 4.993059361392083e-06, |
| "loss": 0.515, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.19052224371373308, |
| "grad_norm": 1.3290510177612305, |
| "learning_rate": 4.993011763567436e-06, |
| "loss": 0.4895, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.19100580270793036, |
| "grad_norm": 0.5393165349960327, |
| "learning_rate": 4.992964003319202e-06, |
| "loss": 0.5167, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.19148936170212766, |
| "grad_norm": 0.44667989015579224, |
| "learning_rate": 4.9929160806504925e-06, |
| "loss": 0.5063, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.19197292069632496, |
| "grad_norm": 0.4737187922000885, |
| "learning_rate": 4.992867995564432e-06, |
| "loss": 0.5098, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.19245647969052224, |
| "grad_norm": 0.4211980998516083, |
| "learning_rate": 4.992819748064151e-06, |
| "loss": 0.4908, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.19294003868471954, |
| "grad_norm": 0.41971683502197266, |
| "learning_rate": 4.9927713381527944e-06, |
| "loss": 0.5169, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.19342359767891681, |
| "grad_norm": 0.48630502820014954, |
| "learning_rate": 4.992722765833514e-06, |
| "loss": 0.4927, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.19390715667311412, |
| "grad_norm": 0.4188506305217743, |
| "learning_rate": 4.992674031109477e-06, |
| "loss": 0.4921, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.19439071566731142, |
| "grad_norm": 0.6137151718139648, |
| "learning_rate": 4.9926251339838574e-06, |
| "loss": 0.4918, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.1948742746615087, |
| "grad_norm": 0.41266149282455444, |
| "learning_rate": 4.992576074459841e-06, |
| "loss": 0.5081, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.195357833655706, |
| "grad_norm": 0.42797741293907166, |
| "learning_rate": 4.992526852540624e-06, |
| "loss": 0.5101, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.1958413926499033, |
| "grad_norm": 0.43434789776802063, |
| "learning_rate": 4.992477468229413e-06, |
| "loss": 0.4931, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.19632495164410058, |
| "grad_norm": 0.45915883779525757, |
| "learning_rate": 4.992427921529426e-06, |
| "loss": 0.4757, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.19680851063829788, |
| "grad_norm": 0.5629348754882812, |
| "learning_rate": 4.992378212443891e-06, |
| "loss": 0.5037, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.19729206963249515, |
| "grad_norm": 0.44655969738960266, |
| "learning_rate": 4.992328340976046e-06, |
| "loss": 0.489, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.19777562862669246, |
| "grad_norm": 0.4506199061870575, |
| "learning_rate": 4.992278307129141e-06, |
| "loss": 0.5208, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.19825918762088976, |
| "grad_norm": 0.43655723333358765, |
| "learning_rate": 4.992228110906436e-06, |
| "loss": 0.5089, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.19874274661508703, |
| "grad_norm": 1.3825441598892212, |
| "learning_rate": 4.9921777523112e-06, |
| "loss": 0.4879, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.19922630560928434, |
| "grad_norm": 0.416355699300766, |
| "learning_rate": 4.992127231346715e-06, |
| "loss": 0.5049, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.1997098646034816, |
| "grad_norm": 1.0814541578292847, |
| "learning_rate": 4.992076548016272e-06, |
| "loss": 0.4802, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.20019342359767892, |
| "grad_norm": 0.4396391212940216, |
| "learning_rate": 4.992025702323174e-06, |
| "loss": 0.5203, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.20067698259187622, |
| "grad_norm": 0.525071918964386, |
| "learning_rate": 4.991974694270733e-06, |
| "loss": 0.494, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.2011605415860735, |
| "grad_norm": 0.4244464933872223, |
| "learning_rate": 4.991923523862271e-06, |
| "loss": 0.4994, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.2016441005802708, |
| "grad_norm": 0.4466478228569031, |
| "learning_rate": 4.991872191101124e-06, |
| "loss": 0.4946, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.20212765957446807, |
| "grad_norm": 0.4531770348548889, |
| "learning_rate": 4.991820695990636e-06, |
| "loss": 0.5111, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.20261121856866537, |
| "grad_norm": 0.4186277687549591, |
| "learning_rate": 4.991769038534161e-06, |
| "loss": 0.4844, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.20309477756286268, |
| "grad_norm": 0.42326635122299194, |
| "learning_rate": 4.991717218735065e-06, |
| "loss": 0.5063, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.20357833655705995, |
| "grad_norm": 0.42900604009628296, |
| "learning_rate": 4.991665236596724e-06, |
| "loss": 0.5078, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.20406189555125726, |
| "grad_norm": 0.42313331365585327, |
| "learning_rate": 4.991613092122526e-06, |
| "loss": 0.5122, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.20454545454545456, |
| "grad_norm": 0.49496522545814514, |
| "learning_rate": 4.991560785315866e-06, |
| "loss": 0.4717, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.20502901353965183, |
| "grad_norm": 0.43023598194122314, |
| "learning_rate": 4.991508316180154e-06, |
| "loss": 0.5095, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.20551257253384914, |
| "grad_norm": 0.41138070821762085, |
| "learning_rate": 4.9914556847188076e-06, |
| "loss": 0.4941, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.2059961315280464, |
| "grad_norm": 0.40780410170555115, |
| "learning_rate": 4.991402890935255e-06, |
| "loss": 0.4808, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.20647969052224371, |
| "grad_norm": 0.5113906264305115, |
| "learning_rate": 4.9913499348329375e-06, |
| "loss": 0.4957, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.20696324951644102, |
| "grad_norm": 0.4982716739177704, |
| "learning_rate": 4.991296816415304e-06, |
| "loss": 0.4996, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.2074468085106383, |
| "grad_norm": 0.47301238775253296, |
| "learning_rate": 4.991243535685815e-06, |
| "loss": 0.4982, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.2079303675048356, |
| "grad_norm": 0.48449206352233887, |
| "learning_rate": 4.991190092647943e-06, |
| "loss": 0.4958, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.20841392649903287, |
| "grad_norm": 0.47245344519615173, |
| "learning_rate": 4.991136487305169e-06, |
| "loss": 0.506, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.20889748549323017, |
| "grad_norm": 0.4477512836456299, |
| "learning_rate": 4.9910827196609864e-06, |
| "loss": 0.496, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.20938104448742748, |
| "grad_norm": 0.4135742485523224, |
| "learning_rate": 4.991028789718897e-06, |
| "loss": 0.5174, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.20986460348162475, |
| "grad_norm": 0.4840208888053894, |
| "learning_rate": 4.990974697482415e-06, |
| "loss": 0.5087, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.21034816247582205, |
| "grad_norm": 0.5320113301277161, |
| "learning_rate": 4.990920442955065e-06, |
| "loss": 0.5231, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.21083172147001933, |
| "grad_norm": 0.41861191391944885, |
| "learning_rate": 4.9908660261403815e-06, |
| "loss": 0.4935, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.21131528046421663, |
| "grad_norm": 0.4138176441192627, |
| "learning_rate": 4.99081144704191e-06, |
| "loss": 0.4774, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.21179883945841393, |
| "grad_norm": 0.4283509850502014, |
| "learning_rate": 4.990756705663205e-06, |
| "loss": 0.5045, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.2122823984526112, |
| "grad_norm": 0.4712899923324585, |
| "learning_rate": 4.990701802007835e-06, |
| "loss": 0.4902, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.2127659574468085, |
| "grad_norm": 0.44904825091362, |
| "learning_rate": 4.990646736079376e-06, |
| "loss": 0.52, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.21324951644100582, |
| "grad_norm": 0.42403462529182434, |
| "learning_rate": 4.990591507881416e-06, |
| "loss": 0.4943, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2137330754352031, |
| "grad_norm": 0.4171410799026489, |
| "learning_rate": 4.990536117417553e-06, |
| "loss": 0.4821, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.2142166344294004, |
| "grad_norm": 0.44567805528640747, |
| "learning_rate": 4.990480564691396e-06, |
| "loss": 0.5198, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.21470019342359767, |
| "grad_norm": 0.40193620324134827, |
| "learning_rate": 4.990424849706563e-06, |
| "loss": 0.5104, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.21518375241779497, |
| "grad_norm": 0.43421146273612976, |
| "learning_rate": 4.990368972466686e-06, |
| "loss": 0.4961, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.21566731141199227, |
| "grad_norm": 0.41703906655311584, |
| "learning_rate": 4.990312932975404e-06, |
| "loss": 0.5104, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.21615087040618955, |
| "grad_norm": 0.42970454692840576, |
| "learning_rate": 4.99025673123637e-06, |
| "loss": 0.4923, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.21663442940038685, |
| "grad_norm": 0.551898717880249, |
| "learning_rate": 4.990200367253243e-06, |
| "loss": 0.4843, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.21711798839458413, |
| "grad_norm": 0.46391811966896057, |
| "learning_rate": 4.990143841029697e-06, |
| "loss": 0.5189, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.21760154738878143, |
| "grad_norm": 0.4471249282360077, |
| "learning_rate": 4.9900871525694135e-06, |
| "loss": 0.4818, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.21808510638297873, |
| "grad_norm": 0.43526965379714966, |
| "learning_rate": 4.990030301876087e-06, |
| "loss": 0.5076, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.218568665377176, |
| "grad_norm": 0.4630556106567383, |
| "learning_rate": 4.989973288953421e-06, |
| "loss": 0.5124, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.2190522243713733, |
| "grad_norm": 0.4085645377635956, |
| "learning_rate": 4.989916113805131e-06, |
| "loss": 0.5021, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.21953578336557059, |
| "grad_norm": 0.4135761260986328, |
| "learning_rate": 4.98985877643494e-06, |
| "loss": 0.4826, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2200193423597679, |
| "grad_norm": 0.4335331618785858, |
| "learning_rate": 4.989801276846584e-06, |
| "loss": 0.4997, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.2205029013539652, |
| "grad_norm": 0.43082305788993835, |
| "learning_rate": 4.989743615043811e-06, |
| "loss": 0.4937, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.22098646034816247, |
| "grad_norm": 0.4116332232952118, |
| "learning_rate": 4.989685791030377e-06, |
| "loss": 0.5036, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.22147001934235977, |
| "grad_norm": 0.5059003829956055, |
| "learning_rate": 4.989627804810047e-06, |
| "loss": 0.5024, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.22195357833655707, |
| "grad_norm": 0.7639954090118408, |
| "learning_rate": 4.989569656386602e-06, |
| "loss": 0.5046, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.22243713733075435, |
| "grad_norm": 0.43759623169898987, |
| "learning_rate": 4.989511345763829e-06, |
| "loss": 0.5198, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.22292069632495165, |
| "grad_norm": 0.40205639600753784, |
| "learning_rate": 4.989452872945527e-06, |
| "loss": 0.5016, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.22340425531914893, |
| "grad_norm": 0.4914066791534424, |
| "learning_rate": 4.989394237935507e-06, |
| "loss": 0.5008, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.22388781431334623, |
| "grad_norm": 0.479674369096756, |
| "learning_rate": 4.989335440737587e-06, |
| "loss": 0.4899, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.22437137330754353, |
| "grad_norm": 0.4184580147266388, |
| "learning_rate": 4.989276481355598e-06, |
| "loss": 0.502, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.2248549323017408, |
| "grad_norm": 0.4303237199783325, |
| "learning_rate": 4.989217359793383e-06, |
| "loss": 0.4905, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.2253384912959381, |
| "grad_norm": 0.4432399570941925, |
| "learning_rate": 4.989158076054793e-06, |
| "loss": 0.5035, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.22582205029013538, |
| "grad_norm": 0.4811629056930542, |
| "learning_rate": 4.98909863014369e-06, |
| "loss": 0.508, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.2263056092843327, |
| "grad_norm": 0.5259628295898438, |
| "learning_rate": 4.989039022063949e-06, |
| "loss": 0.4932, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.22678916827853, |
| "grad_norm": 0.6295924186706543, |
| "learning_rate": 4.98897925181945e-06, |
| "loss": 0.505, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.22727272727272727, |
| "grad_norm": 0.5184575319290161, |
| "learning_rate": 4.988919319414089e-06, |
| "loss": 0.4964, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.22775628626692457, |
| "grad_norm": 0.45514237880706787, |
| "learning_rate": 4.988859224851772e-06, |
| "loss": 0.4944, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.22823984526112184, |
| "grad_norm": 0.4045606255531311, |
| "learning_rate": 4.988798968136412e-06, |
| "loss": 0.4897, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.22872340425531915, |
| "grad_norm": 0.8098049759864807, |
| "learning_rate": 4.988738549271937e-06, |
| "loss": 0.4773, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.22920696324951645, |
| "grad_norm": 0.41523805260658264, |
| "learning_rate": 4.98867796826228e-06, |
| "loss": 0.5237, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.22969052224371372, |
| "grad_norm": 0.4325871169567108, |
| "learning_rate": 4.988617225111392e-06, |
| "loss": 0.4991, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.23017408123791103, |
| "grad_norm": 0.43960368633270264, |
| "learning_rate": 4.9885563198232275e-06, |
| "loss": 0.5099, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.23065764023210833, |
| "grad_norm": 0.4785735607147217, |
| "learning_rate": 4.988495252401756e-06, |
| "loss": 0.5096, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.2311411992263056, |
| "grad_norm": 0.4099835455417633, |
| "learning_rate": 4.988434022850956e-06, |
| "loss": 0.4957, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.2316247582205029, |
| "grad_norm": 0.43405771255493164, |
| "learning_rate": 4.9883726311748165e-06, |
| "loss": 0.4832, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.23210831721470018, |
| "grad_norm": 0.4160226881504059, |
| "learning_rate": 4.988311077377337e-06, |
| "loss": 0.5046, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.23259187620889749, |
| "grad_norm": 0.44942131638526917, |
| "learning_rate": 4.988249361462528e-06, |
| "loss": 0.4798, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.2330754352030948, |
| "grad_norm": 0.4407511353492737, |
| "learning_rate": 4.988187483434411e-06, |
| "loss": 0.4664, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.23355899419729206, |
| "grad_norm": 0.4470459222793579, |
| "learning_rate": 4.988125443297017e-06, |
| "loss": 0.4917, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.23404255319148937, |
| "grad_norm": 0.44523417949676514, |
| "learning_rate": 4.9880632410543885e-06, |
| "loss": 0.5103, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.23452611218568664, |
| "grad_norm": 0.7564685344696045, |
| "learning_rate": 4.988000876710577e-06, |
| "loss": 0.4999, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.23500967117988394, |
| "grad_norm": 0.5257455706596375, |
| "learning_rate": 4.987938350269646e-06, |
| "loss": 0.508, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.23549323017408125, |
| "grad_norm": 0.41185519099235535, |
| "learning_rate": 4.987875661735669e-06, |
| "loss": 0.5103, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.23597678916827852, |
| "grad_norm": 0.45921629667282104, |
| "learning_rate": 4.987812811112731e-06, |
| "loss": 0.5041, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.23646034816247583, |
| "grad_norm": 0.44184067845344543, |
| "learning_rate": 4.987749798404927e-06, |
| "loss": 0.4948, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2369439071566731, |
| "grad_norm": 0.5813512802124023, |
| "learning_rate": 4.987686623616361e-06, |
| "loss": 0.5122, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2374274661508704, |
| "grad_norm": 0.42440474033355713, |
| "learning_rate": 4.98762328675115e-06, |
| "loss": 0.5046, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.2379110251450677, |
| "grad_norm": 0.4219503104686737, |
| "learning_rate": 4.98755978781342e-06, |
| "loss": 0.4887, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.23839458413926498, |
| "grad_norm": 0.4938005805015564, |
| "learning_rate": 4.9874961268073095e-06, |
| "loss": 0.4896, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.23887814313346228, |
| "grad_norm": 0.47373083233833313, |
| "learning_rate": 4.987432303736963e-06, |
| "loss": 0.4957, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.2393617021276596, |
| "grad_norm": 0.41886255145072937, |
| "learning_rate": 4.987368318606543e-06, |
| "loss": 0.4895, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.23984526112185686, |
| "grad_norm": 0.41814935207366943, |
| "learning_rate": 4.987304171420214e-06, |
| "loss": 0.4957, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.24032882011605416, |
| "grad_norm": 0.49376171827316284, |
| "learning_rate": 4.987239862182157e-06, |
| "loss": 0.4962, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.24081237911025144, |
| "grad_norm": 0.4292319715023041, |
| "learning_rate": 4.987175390896563e-06, |
| "loss": 0.4968, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.24129593810444874, |
| "grad_norm": 0.42256441712379456, |
| "learning_rate": 4.987110757567631e-06, |
| "loss": 0.4762, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.24177949709864605, |
| "grad_norm": 0.42222288250923157, |
| "learning_rate": 4.987045962199572e-06, |
| "loss": 0.4998, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.24226305609284332, |
| "grad_norm": 0.6006141304969788, |
| "learning_rate": 4.986981004796608e-06, |
| "loss": 0.4924, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.24274661508704062, |
| "grad_norm": 0.4347086548805237, |
| "learning_rate": 4.986915885362971e-06, |
| "loss": 0.4808, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.2432301740812379, |
| "grad_norm": 0.4513223171234131, |
| "learning_rate": 4.986850603902904e-06, |
| "loss": 0.4821, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.2437137330754352, |
| "grad_norm": 0.45349133014678955, |
| "learning_rate": 4.986785160420659e-06, |
| "loss": 0.4844, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.2441972920696325, |
| "grad_norm": 0.43619874119758606, |
| "learning_rate": 4.986719554920501e-06, |
| "loss": 0.4996, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.24468085106382978, |
| "grad_norm": 0.43510088324546814, |
| "learning_rate": 4.986653787406703e-06, |
| "loss": 0.4749, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.24516441005802708, |
| "grad_norm": 0.420758992433548, |
| "learning_rate": 4.986587857883551e-06, |
| "loss": 0.4503, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.24564796905222436, |
| "grad_norm": 0.5028153657913208, |
| "learning_rate": 4.9865217663553405e-06, |
| "loss": 0.5111, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.24613152804642166, |
| "grad_norm": 0.5986948609352112, |
| "learning_rate": 4.986455512826377e-06, |
| "loss": 0.5032, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.24661508704061896, |
| "grad_norm": 0.6458590030670166, |
| "learning_rate": 4.986389097300976e-06, |
| "loss": 0.5118, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.24709864603481624, |
| "grad_norm": 0.4113481640815735, |
| "learning_rate": 4.9863225197834674e-06, |
| "loss": 0.4939, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.24758220502901354, |
| "grad_norm": 0.4217630624771118, |
| "learning_rate": 4.986255780278186e-06, |
| "loss": 0.4785, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.24806576402321084, |
| "grad_norm": 0.44430986046791077, |
| "learning_rate": 4.986188878789481e-06, |
| "loss": 0.4975, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.24854932301740812, |
| "grad_norm": 0.5256595611572266, |
| "learning_rate": 4.98612181532171e-06, |
| "loss": 0.4699, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.24903288201160542, |
| "grad_norm": 0.472182035446167, |
| "learning_rate": 4.9860545898792455e-06, |
| "loss": 0.5036, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.2495164410058027, |
| "grad_norm": 0.41556984186172485, |
| "learning_rate": 4.985987202466465e-06, |
| "loss": 0.4868, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.4145744740962982, |
| "learning_rate": 4.9859196530877586e-06, |
| "loss": 0.4791, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.2504835589941973, |
| "grad_norm": 0.4527714252471924, |
| "learning_rate": 4.985851941747527e-06, |
| "loss": 0.5082, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.2509671179883946, |
| "grad_norm": 0.4882536828517914, |
| "learning_rate": 4.985784068450184e-06, |
| "loss": 0.4883, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.2514506769825919, |
| "grad_norm": 0.4307822585105896, |
| "learning_rate": 4.985716033200149e-06, |
| "loss": 0.4953, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.25193423597678916, |
| "grad_norm": 0.4246330261230469, |
| "learning_rate": 4.985647836001857e-06, |
| "loss": 0.4944, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.2524177949709865, |
| "grad_norm": 0.4518243968486786, |
| "learning_rate": 4.9855794768597484e-06, |
| "loss": 0.4917, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.25290135396518376, |
| "grad_norm": 0.4250892698764801, |
| "learning_rate": 4.98551095577828e-06, |
| "loss": 0.4915, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.25338491295938104, |
| "grad_norm": 0.4896804690361023, |
| "learning_rate": 4.9854422727619135e-06, |
| "loss": 0.4968, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.2538684719535783, |
| "grad_norm": 0.4480974078178406, |
| "learning_rate": 4.985373427815125e-06, |
| "loss": 0.4923, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.25435203094777564, |
| "grad_norm": 0.4182819426059723, |
| "learning_rate": 4.985304420942399e-06, |
| "loss": 0.5064, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.2548355899419729, |
| "grad_norm": 0.42555907368659973, |
| "learning_rate": 4.985235252148233e-06, |
| "loss": 0.4666, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.2553191489361702, |
| "grad_norm": 0.41366714239120483, |
| "learning_rate": 4.985165921437131e-06, |
| "loss": 0.4951, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.2558027079303675, |
| "grad_norm": 0.4588848054409027, |
| "learning_rate": 4.985096428813613e-06, |
| "loss": 0.5035, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2562862669245648, |
| "grad_norm": 0.7335658669471741, |
| "learning_rate": 4.985026774282205e-06, |
| "loss": 0.452, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2567698259187621, |
| "grad_norm": 0.4520464539527893, |
| "learning_rate": 4.984956957847445e-06, |
| "loss": 0.506, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.2572533849129594, |
| "grad_norm": 0.45352864265441895, |
| "learning_rate": 4.98488697951388e-06, |
| "loss": 0.4864, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.2577369439071567, |
| "grad_norm": 0.4914877414703369, |
| "learning_rate": 4.984816839286072e-06, |
| "loss": 0.4866, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.25822050290135395, |
| "grad_norm": 0.47104302048683167, |
| "learning_rate": 4.98474653716859e-06, |
| "loss": 0.482, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.2587040618955513, |
| "grad_norm": 0.5308674573898315, |
| "learning_rate": 4.984676073166014e-06, |
| "loss": 0.5137, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.25918762088974856, |
| "grad_norm": 0.44214561581611633, |
| "learning_rate": 4.984605447282934e-06, |
| "loss": 0.4973, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.25967117988394584, |
| "grad_norm": 1.4595553874969482, |
| "learning_rate": 4.9845346595239525e-06, |
| "loss": 0.4951, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.2601547388781431, |
| "grad_norm": 0.44012385606765747, |
| "learning_rate": 4.984463709893681e-06, |
| "loss": 0.4766, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.26063829787234044, |
| "grad_norm": 0.5264063477516174, |
| "learning_rate": 4.984392598396742e-06, |
| "loss": 0.4601, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2611218568665377, |
| "grad_norm": 0.4360770881175995, |
| "learning_rate": 4.984321325037769e-06, |
| "loss": 0.5059, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.261605415860735, |
| "grad_norm": 0.4725351631641388, |
| "learning_rate": 4.984249889821406e-06, |
| "loss": 0.5041, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.2620889748549323, |
| "grad_norm": 0.40023666620254517, |
| "learning_rate": 4.984178292752305e-06, |
| "loss": 0.492, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2625725338491296, |
| "grad_norm": 0.4368029534816742, |
| "learning_rate": 4.984106533835132e-06, |
| "loss": 0.493, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.26305609284332687, |
| "grad_norm": 0.457823246717453, |
| "learning_rate": 4.984034613074563e-06, |
| "loss": 0.4915, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.2635396518375242, |
| "grad_norm": 0.44186931848526, |
| "learning_rate": 4.983962530475282e-06, |
| "loss": 0.5059, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2640232108317215, |
| "grad_norm": 0.4273228347301483, |
| "learning_rate": 4.983890286041987e-06, |
| "loss": 0.5036, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.26450676982591875, |
| "grad_norm": 0.4629921317100525, |
| "learning_rate": 4.983817879779384e-06, |
| "loss": 0.4859, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.26499032882011603, |
| "grad_norm": 0.44890058040618896, |
| "learning_rate": 4.983745311692189e-06, |
| "loss": 0.4962, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.26547388781431336, |
| "grad_norm": 0.6259036064147949, |
| "learning_rate": 4.983672581785132e-06, |
| "loss": 0.4942, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.26595744680851063, |
| "grad_norm": 0.45010271668434143, |
| "learning_rate": 4.983599690062953e-06, |
| "loss": 0.5171, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.2664410058027079, |
| "grad_norm": 0.422604501247406, |
| "learning_rate": 4.983526636530396e-06, |
| "loss": 0.5138, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.26692456479690524, |
| "grad_norm": 0.4481145739555359, |
| "learning_rate": 4.983453421192225e-06, |
| "loss": 0.5012, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2674081237911025, |
| "grad_norm": 0.4294244349002838, |
| "learning_rate": 4.983380044053208e-06, |
| "loss": 0.4903, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.2678916827852998, |
| "grad_norm": 0.43048569560050964, |
| "learning_rate": 4.983306505118125e-06, |
| "loss": 0.4893, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.2683752417794971, |
| "grad_norm": 0.43324270844459534, |
| "learning_rate": 4.98323280439177e-06, |
| "loss": 0.4919, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.2688588007736944, |
| "grad_norm": 0.4504631459712982, |
| "learning_rate": 4.9831589418789415e-06, |
| "loss": 0.4825, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.26934235976789167, |
| "grad_norm": 0.46867018938064575, |
| "learning_rate": 4.9830849175844544e-06, |
| "loss": 0.5159, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.269825918762089, |
| "grad_norm": 0.48262712359428406, |
| "learning_rate": 4.98301073151313e-06, |
| "loss": 0.5027, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.2703094777562863, |
| "grad_norm": 0.4334803819656372, |
| "learning_rate": 4.982936383669802e-06, |
| "loss": 0.5126, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.27079303675048355, |
| "grad_norm": 0.5654119253158569, |
| "learning_rate": 4.982861874059314e-06, |
| "loss": 0.4848, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2712765957446808, |
| "grad_norm": 0.6120291352272034, |
| "learning_rate": 4.982787202686521e-06, |
| "loss": 0.5171, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.27176015473887816, |
| "grad_norm": 0.40804538130760193, |
| "learning_rate": 4.982712369556287e-06, |
| "loss": 0.5073, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.27224371373307543, |
| "grad_norm": 0.4400192201137543, |
| "learning_rate": 4.982637374673489e-06, |
| "loss": 0.486, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.2727272727272727, |
| "grad_norm": 0.41215842962265015, |
| "learning_rate": 4.982562218043012e-06, |
| "loss": 0.4954, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.27321083172147004, |
| "grad_norm": 0.44765132665634155, |
| "learning_rate": 4.9824868996697525e-06, |
| "loss": 0.4842, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.2736943907156673, |
| "grad_norm": 0.47125014662742615, |
| "learning_rate": 4.982411419558618e-06, |
| "loss": 0.5019, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.2741779497098646, |
| "grad_norm": 0.43019676208496094, |
| "learning_rate": 4.982335777714525e-06, |
| "loss": 0.4925, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.2746615087040619, |
| "grad_norm": 0.4377966523170471, |
| "learning_rate": 4.9822599741424044e-06, |
| "loss": 0.5125, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.2751450676982592, |
| "grad_norm": 0.5210200548171997, |
| "learning_rate": 4.982184008847192e-06, |
| "loss": 0.4736, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.27562862669245647, |
| "grad_norm": 0.43145325779914856, |
| "learning_rate": 4.982107881833839e-06, |
| "loss": 0.4958, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.2761121856866538, |
| "grad_norm": 0.5334268808364868, |
| "learning_rate": 4.9820315931073035e-06, |
| "loss": 0.5099, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.2765957446808511, |
| "grad_norm": 0.46400728821754456, |
| "learning_rate": 4.981955142672558e-06, |
| "loss": 0.4918, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.27707930367504835, |
| "grad_norm": 0.47233352065086365, |
| "learning_rate": 4.981878530534581e-06, |
| "loss": 0.4936, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.2775628626692456, |
| "grad_norm": 0.46564990282058716, |
| "learning_rate": 4.9818017566983654e-06, |
| "loss": 0.4713, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.27804642166344296, |
| "grad_norm": 0.42474499344825745, |
| "learning_rate": 4.981724821168913e-06, |
| "loss": 0.4944, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.27852998065764023, |
| "grad_norm": 0.44720014929771423, |
| "learning_rate": 4.981647723951236e-06, |
| "loss": 0.513, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.2790135396518375, |
| "grad_norm": 0.4741462767124176, |
| "learning_rate": 4.981570465050357e-06, |
| "loss": 0.4652, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.27949709864603484, |
| "grad_norm": 0.4069628417491913, |
| "learning_rate": 4.9814930444713106e-06, |
| "loss": 0.4664, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.2799806576402321, |
| "grad_norm": 0.47512662410736084, |
| "learning_rate": 4.98141546221914e-06, |
| "loss": 0.4695, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.2804642166344294, |
| "grad_norm": 0.4442213773727417, |
| "learning_rate": 4.9813377182989e-06, |
| "loss": 0.4977, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2809477756286267, |
| "grad_norm": 0.4360348880290985, |
| "learning_rate": 4.981259812715656e-06, |
| "loss": 0.4918, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.281431334622824, |
| "grad_norm": 0.4233061373233795, |
| "learning_rate": 4.981181745474483e-06, |
| "loss": 0.4818, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.28191489361702127, |
| "grad_norm": 0.4427405893802643, |
| "learning_rate": 4.9811035165804675e-06, |
| "loss": 0.4954, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.28239845261121854, |
| "grad_norm": 0.486393541097641, |
| "learning_rate": 4.981025126038708e-06, |
| "loss": 0.4938, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2828820116054159, |
| "grad_norm": 0.4374271035194397, |
| "learning_rate": 4.9809465738543084e-06, |
| "loss": 0.492, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.28336557059961315, |
| "grad_norm": 0.458296537399292, |
| "learning_rate": 4.980867860032389e-06, |
| "loss": 0.4816, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.2838491295938104, |
| "grad_norm": 0.44356656074523926, |
| "learning_rate": 4.980788984578077e-06, |
| "loss": 0.4664, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.28433268858800775, |
| "grad_norm": 0.4328411817550659, |
| "learning_rate": 4.980709947496512e-06, |
| "loss": 0.4996, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.28481624758220503, |
| "grad_norm": 0.4631772041320801, |
| "learning_rate": 4.980630748792843e-06, |
| "loss": 0.4821, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.2852998065764023, |
| "grad_norm": 0.47806110978126526, |
| "learning_rate": 4.98055138847223e-06, |
| "loss": 0.4814, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.28578336557059963, |
| "grad_norm": 0.4524025321006775, |
| "learning_rate": 4.980471866539843e-06, |
| "loss": 0.4669, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.2862669245647969, |
| "grad_norm": 0.549818217754364, |
| "learning_rate": 4.980392183000864e-06, |
| "loss": 0.4769, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.2867504835589942, |
| "grad_norm": 0.41930127143859863, |
| "learning_rate": 4.9803123378604836e-06, |
| "loss": 0.492, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.2872340425531915, |
| "grad_norm": 0.4337260127067566, |
| "learning_rate": 4.980232331123904e-06, |
| "loss": 0.4972, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.2877176015473888, |
| "grad_norm": 0.4237891137599945, |
| "learning_rate": 4.980152162796338e-06, |
| "loss": 0.4984, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.28820116054158607, |
| "grad_norm": 0.4510140120983124, |
| "learning_rate": 4.980071832883008e-06, |
| "loss": 0.49, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.28868471953578334, |
| "grad_norm": 0.420654296875, |
| "learning_rate": 4.9799913413891485e-06, |
| "loss": 0.4902, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.28916827852998067, |
| "grad_norm": 0.49798089265823364, |
| "learning_rate": 4.979910688320004e-06, |
| "loss": 0.4991, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.28965183752417795, |
| "grad_norm": 0.4498788118362427, |
| "learning_rate": 4.9798298736808286e-06, |
| "loss": 0.4903, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.2901353965183752, |
| "grad_norm": 0.5480190515518188, |
| "learning_rate": 4.979748897476886e-06, |
| "loss": 0.492, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.29061895551257255, |
| "grad_norm": 0.439396470785141, |
| "learning_rate": 4.9796677597134546e-06, |
| "loss": 0.4637, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.2911025145067698, |
| "grad_norm": 0.444767028093338, |
| "learning_rate": 4.979586460395819e-06, |
| "loss": 0.4967, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.2915860735009671, |
| "grad_norm": 0.42325684428215027, |
| "learning_rate": 4.9795049995292765e-06, |
| "loss": 0.4804, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.29206963249516443, |
| "grad_norm": 0.42194440960884094, |
| "learning_rate": 4.979423377119134e-06, |
| "loss": 0.5036, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.2925531914893617, |
| "grad_norm": 0.42215487360954285, |
| "learning_rate": 4.97934159317071e-06, |
| "loss": 0.4653, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.293036750483559, |
| "grad_norm": 0.4361760914325714, |
| "learning_rate": 4.979259647689332e-06, |
| "loss": 0.4792, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.2935203094777563, |
| "grad_norm": 0.4426310658454895, |
| "learning_rate": 4.979177540680339e-06, |
| "loss": 0.478, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.2940038684719536, |
| "grad_norm": 0.4340149462223053, |
| "learning_rate": 4.979095272149081e-06, |
| "loss": 0.4987, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.29448742746615086, |
| "grad_norm": 0.4175974726676941, |
| "learning_rate": 4.979012842100919e-06, |
| "loss": 0.505, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.29497098646034814, |
| "grad_norm": 0.46888601779937744, |
| "learning_rate": 4.97893025054122e-06, |
| "loss": 0.4989, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.29545454545454547, |
| "grad_norm": 0.4319513738155365, |
| "learning_rate": 4.978847497475369e-06, |
| "loss": 0.4702, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.29593810444874274, |
| "grad_norm": 0.428959459066391, |
| "learning_rate": 4.978764582908754e-06, |
| "loss": 0.4823, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.29642166344294, |
| "grad_norm": 0.4449647068977356, |
| "learning_rate": 4.97868150684678e-06, |
| "loss": 0.4938, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.29690522243713735, |
| "grad_norm": 0.45159056782722473, |
| "learning_rate": 4.978598269294857e-06, |
| "loss": 0.5026, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.2973887814313346, |
| "grad_norm": 0.44324159622192383, |
| "learning_rate": 4.978514870258408e-06, |
| "loss": 0.4788, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2978723404255319, |
| "grad_norm": 0.425523579120636, |
| "learning_rate": 4.9784313097428695e-06, |
| "loss": 0.5136, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.29835589941972923, |
| "grad_norm": 0.4262832701206207, |
| "learning_rate": 4.978347587753683e-06, |
| "loss": 0.4714, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.2988394584139265, |
| "grad_norm": 1.2277506589889526, |
| "learning_rate": 4.978263704296305e-06, |
| "loss": 0.5021, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.2993230174081238, |
| "grad_norm": 0.883792519569397, |
| "learning_rate": 4.978179659376199e-06, |
| "loss": 0.4832, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.29980657640232106, |
| "grad_norm": 0.4646826982498169, |
| "learning_rate": 4.978095452998841e-06, |
| "loss": 0.4987, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.3002901353965184, |
| "grad_norm": 0.42909321188926697, |
| "learning_rate": 4.978011085169717e-06, |
| "loss": 0.4967, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.30077369439071566, |
| "grad_norm": 0.5661286115646362, |
| "learning_rate": 4.9779265558943254e-06, |
| "loss": 0.4777, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.30125725338491294, |
| "grad_norm": 0.4307956397533417, |
| "learning_rate": 4.977841865178171e-06, |
| "loss": 0.4998, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.30174081237911027, |
| "grad_norm": 0.44329774379730225, |
| "learning_rate": 4.977757013026773e-06, |
| "loss": 0.4828, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.30222437137330754, |
| "grad_norm": 0.40910235047340393, |
| "learning_rate": 4.977671999445659e-06, |
| "loss": 0.5033, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.3027079303675048, |
| "grad_norm": 0.41079941391944885, |
| "learning_rate": 4.977586824440369e-06, |
| "loss": 0.5052, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.30319148936170215, |
| "grad_norm": 0.46034660935401917, |
| "learning_rate": 4.977501488016451e-06, |
| "loss": 0.4866, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.3036750483558994, |
| "grad_norm": 0.5553982853889465, |
| "learning_rate": 4.977415990179464e-06, |
| "loss": 0.5019, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.3041586073500967, |
| "grad_norm": 0.46083885431289673, |
| "learning_rate": 4.977330330934981e-06, |
| "loss": 0.4858, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.30464216634429403, |
| "grad_norm": 0.4224453270435333, |
| "learning_rate": 4.97724451028858e-06, |
| "loss": 0.4655, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3051257253384913, |
| "grad_norm": 0.4268517792224884, |
| "learning_rate": 4.977158528245855e-06, |
| "loss": 0.5089, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.3056092843326886, |
| "grad_norm": 0.585080623626709, |
| "learning_rate": 4.977072384812406e-06, |
| "loss": 0.4792, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.30609284332688586, |
| "grad_norm": 0.4740874171257019, |
| "learning_rate": 4.976986079993845e-06, |
| "loss": 0.4511, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.3065764023210832, |
| "grad_norm": 0.7237485647201538, |
| "learning_rate": 4.976899613795797e-06, |
| "loss": 0.4709, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.30705996131528046, |
| "grad_norm": 0.7460974454879761, |
| "learning_rate": 4.9768129862238935e-06, |
| "loss": 0.4724, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.30754352030947774, |
| "grad_norm": 0.5382199287414551, |
| "learning_rate": 4.976726197283779e-06, |
| "loss": 0.4808, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.30802707930367507, |
| "grad_norm": 0.42365172505378723, |
| "learning_rate": 4.976639246981108e-06, |
| "loss": 0.5015, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.30851063829787234, |
| "grad_norm": 0.4385802447795868, |
| "learning_rate": 4.976552135321546e-06, |
| "loss": 0.4921, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.3089941972920696, |
| "grad_norm": 0.4448932111263275, |
| "learning_rate": 4.976464862310768e-06, |
| "loss": 0.4604, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.30947775628626695, |
| "grad_norm": 0.4279050827026367, |
| "learning_rate": 4.9763774279544595e-06, |
| "loss": 0.4919, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3099613152804642, |
| "grad_norm": 0.43031272292137146, |
| "learning_rate": 4.9762898322583184e-06, |
| "loss": 0.4872, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.3104448742746615, |
| "grad_norm": 0.5989789366722107, |
| "learning_rate": 4.976202075228049e-06, |
| "loss": 0.4636, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.31092843326885883, |
| "grad_norm": 0.41690775752067566, |
| "learning_rate": 4.9761141568693715e-06, |
| "loss": 0.4862, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.3114119922630561, |
| "grad_norm": 0.5114609599113464, |
| "learning_rate": 4.976026077188013e-06, |
| "loss": 0.4757, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.3118955512572534, |
| "grad_norm": 0.4282832741737366, |
| "learning_rate": 4.975937836189712e-06, |
| "loss": 0.4971, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.31237911025145065, |
| "grad_norm": 0.42823049426078796, |
| "learning_rate": 4.975849433880218e-06, |
| "loss": 0.4859, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.312862669245648, |
| "grad_norm": 0.4191463589668274, |
| "learning_rate": 4.975760870265289e-06, |
| "loss": 0.4951, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.31334622823984526, |
| "grad_norm": 0.5171394944190979, |
| "learning_rate": 4.975672145350696e-06, |
| "loss": 0.4909, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.31382978723404253, |
| "grad_norm": 0.4636901617050171, |
| "learning_rate": 4.97558325914222e-06, |
| "loss": 0.4849, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.31431334622823986, |
| "grad_norm": 0.4889143407344818, |
| "learning_rate": 4.975494211645652e-06, |
| "loss": 0.4787, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.31479690522243714, |
| "grad_norm": 0.4157842695713043, |
| "learning_rate": 4.975405002866793e-06, |
| "loss": 0.4851, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.3152804642166344, |
| "grad_norm": 0.42362505197525024, |
| "learning_rate": 4.975315632811456e-06, |
| "loss": 0.4707, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.31576402321083175, |
| "grad_norm": 0.4421435296535492, |
| "learning_rate": 4.9752261014854625e-06, |
| "loss": 0.4927, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.316247582205029, |
| "grad_norm": 0.4442092776298523, |
| "learning_rate": 4.975136408894646e-06, |
| "loss": 0.5045, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.3167311411992263, |
| "grad_norm": 0.4613809883594513, |
| "learning_rate": 4.975046555044851e-06, |
| "loss": 0.4934, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.31721470019342357, |
| "grad_norm": 0.5220198631286621, |
| "learning_rate": 4.97495653994193e-06, |
| "loss": 0.4765, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.3176982591876209, |
| "grad_norm": 0.4627764821052551, |
| "learning_rate": 4.974866363591749e-06, |
| "loss": 0.4757, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.3181818181818182, |
| "grad_norm": 0.4076231122016907, |
| "learning_rate": 4.974776026000182e-06, |
| "loss": 0.4884, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.31866537717601545, |
| "grad_norm": 0.44560858607292175, |
| "learning_rate": 4.974685527173116e-06, |
| "loss": 0.4909, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3191489361702128, |
| "grad_norm": 0.4903375506401062, |
| "learning_rate": 4.974594867116446e-06, |
| "loss": 0.4843, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.31963249516441006, |
| "grad_norm": 0.4852094054222107, |
| "learning_rate": 4.974504045836079e-06, |
| "loss": 0.5082, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.32011605415860733, |
| "grad_norm": 0.5490374565124512, |
| "learning_rate": 4.974413063337932e-06, |
| "loss": 0.4813, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.32059961315280466, |
| "grad_norm": 0.451930969953537, |
| "learning_rate": 4.974321919627932e-06, |
| "loss": 0.4976, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.32108317214700194, |
| "grad_norm": 0.44542908668518066, |
| "learning_rate": 4.97423061471202e-06, |
| "loss": 0.5068, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.3215667311411992, |
| "grad_norm": 0.4401024281978607, |
| "learning_rate": 4.974139148596141e-06, |
| "loss": 0.4887, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.32205029013539654, |
| "grad_norm": 0.4341893196105957, |
| "learning_rate": 4.9740475212862565e-06, |
| "loss": 0.4587, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3225338491295938, |
| "grad_norm": 0.44072195887565613, |
| "learning_rate": 4.973955732788335e-06, |
| "loss": 0.4919, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.3230174081237911, |
| "grad_norm": 0.4512384831905365, |
| "learning_rate": 4.973863783108358e-06, |
| "loss": 0.5007, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.32350096711798837, |
| "grad_norm": 0.42298397421836853, |
| "learning_rate": 4.9737716722523145e-06, |
| "loss": 0.4855, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.3239845261121857, |
| "grad_norm": 0.43051955103874207, |
| "learning_rate": 4.973679400226207e-06, |
| "loss": 0.4746, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.324468085106383, |
| "grad_norm": 0.43127870559692383, |
| "learning_rate": 4.973586967036046e-06, |
| "loss": 0.4682, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.32495164410058025, |
| "grad_norm": 0.4191921651363373, |
| "learning_rate": 4.9734943726878545e-06, |
| "loss": 0.4799, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3254352030947776, |
| "grad_norm": 0.48285409808158875, |
| "learning_rate": 4.973401617187664e-06, |
| "loss": 0.4793, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.32591876208897486, |
| "grad_norm": 0.4482521712779999, |
| "learning_rate": 4.97330870054152e-06, |
| "loss": 0.4866, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.32640232108317213, |
| "grad_norm": 0.4234021008014679, |
| "learning_rate": 4.973215622755474e-06, |
| "loss": 0.4829, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.32688588007736946, |
| "grad_norm": 0.8298510313034058, |
| "learning_rate": 4.9731223838355915e-06, |
| "loss": 0.4926, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.32736943907156674, |
| "grad_norm": 0.4476756453514099, |
| "learning_rate": 4.973028983787947e-06, |
| "loss": 0.4739, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.327852998065764, |
| "grad_norm": 0.39939942955970764, |
| "learning_rate": 4.972935422618624e-06, |
| "loss": 0.4431, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.32833655705996134, |
| "grad_norm": 0.47416552901268005, |
| "learning_rate": 4.97284170033372e-06, |
| "loss": 0.4987, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3288201160541586, |
| "grad_norm": 0.4256126284599304, |
| "learning_rate": 4.9727478169393406e-06, |
| "loss": 0.4667, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3293036750483559, |
| "grad_norm": 0.4688650965690613, |
| "learning_rate": 4.972653772441602e-06, |
| "loss": 0.4957, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.32978723404255317, |
| "grad_norm": 0.4217233955860138, |
| "learning_rate": 4.972559566846632e-06, |
| "loss": 0.4688, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.3302707930367505, |
| "grad_norm": 0.4340645968914032, |
| "learning_rate": 4.972465200160568e-06, |
| "loss": 0.4859, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.3307543520309478, |
| "grad_norm": 0.41588684916496277, |
| "learning_rate": 4.9723706723895584e-06, |
| "loss": 0.5181, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.33123791102514505, |
| "grad_norm": 0.4644707143306732, |
| "learning_rate": 4.972275983539761e-06, |
| "loss": 0.4681, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.3317214700193424, |
| "grad_norm": 0.5474352836608887, |
| "learning_rate": 4.972181133617345e-06, |
| "loss": 0.4864, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.33220502901353965, |
| "grad_norm": 0.4336998760700226, |
| "learning_rate": 4.972086122628492e-06, |
| "loss": 0.4926, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.33268858800773693, |
| "grad_norm": 0.42557400465011597, |
| "learning_rate": 4.97199095057939e-06, |
| "loss": 0.4817, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.33317214700193426, |
| "grad_norm": 0.4498555362224579, |
| "learning_rate": 4.97189561747624e-06, |
| "loss": 0.4735, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.33365570599613154, |
| "grad_norm": 0.4151577651500702, |
| "learning_rate": 4.971800123325253e-06, |
| "loss": 0.4756, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3341392649903288, |
| "grad_norm": 0.4264872968196869, |
| "learning_rate": 4.971704468132651e-06, |
| "loss": 0.4897, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3346228239845261, |
| "grad_norm": 0.40942180156707764, |
| "learning_rate": 4.971608651904667e-06, |
| "loss": 0.4798, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.3351063829787234, |
| "grad_norm": 0.42875564098358154, |
| "learning_rate": 4.971512674647542e-06, |
| "loss": 0.487, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.3355899419729207, |
| "grad_norm": 0.505617082118988, |
| "learning_rate": 4.9714165363675295e-06, |
| "loss": 0.474, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.33607350096711797, |
| "grad_norm": 0.41302382946014404, |
| "learning_rate": 4.971320237070893e-06, |
| "loss": 0.4811, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3365570599613153, |
| "grad_norm": 0.4467024505138397, |
| "learning_rate": 4.9712237767639075e-06, |
| "loss": 0.4835, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.33704061895551257, |
| "grad_norm": 0.42488452792167664, |
| "learning_rate": 4.971127155452856e-06, |
| "loss": 0.4899, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.33752417794970985, |
| "grad_norm": 0.936638355255127, |
| "learning_rate": 4.971030373144035e-06, |
| "loss": 0.4954, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3380077369439072, |
| "grad_norm": 0.43264010548591614, |
| "learning_rate": 4.970933429843748e-06, |
| "loss": 0.5001, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.33849129593810445, |
| "grad_norm": 0.4461173415184021, |
| "learning_rate": 4.970836325558314e-06, |
| "loss": 0.4979, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.33897485493230173, |
| "grad_norm": 0.4384688138961792, |
| "learning_rate": 4.970739060294056e-06, |
| "loss": 0.5053, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.33945841392649906, |
| "grad_norm": 0.44660210609436035, |
| "learning_rate": 4.970641634057314e-06, |
| "loss": 0.4922, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.33994197292069633, |
| "grad_norm": 0.6774649620056152, |
| "learning_rate": 4.970544046854434e-06, |
| "loss": 0.4641, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.3404255319148936, |
| "grad_norm": 0.4222927987575531, |
| "learning_rate": 4.970446298691775e-06, |
| "loss": 0.482, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.3409090909090909, |
| "grad_norm": 0.4312184751033783, |
| "learning_rate": 4.970348389575704e-06, |
| "loss": 0.4799, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.3413926499032882, |
| "grad_norm": 0.44325292110443115, |
| "learning_rate": 4.970250319512601e-06, |
| "loss": 0.474, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.3418762088974855, |
| "grad_norm": 0.43495866656303406, |
| "learning_rate": 4.970152088508854e-06, |
| "loss": 0.4713, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.34235976789168276, |
| "grad_norm": 0.41411668062210083, |
| "learning_rate": 4.970053696570865e-06, |
| "loss": 0.4958, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.3428433268858801, |
| "grad_norm": 0.4402662515640259, |
| "learning_rate": 4.969955143705043e-06, |
| "loss": 0.4682, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.34332688588007737, |
| "grad_norm": 0.5128571391105652, |
| "learning_rate": 4.96985642991781e-06, |
| "loss": 0.4667, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.34381044487427465, |
| "grad_norm": 0.45290181040763855, |
| "learning_rate": 4.969757555215595e-06, |
| "loss": 0.4809, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.344294003868472, |
| "grad_norm": 0.4683387875556946, |
| "learning_rate": 4.9696585196048425e-06, |
| "loss": 0.4798, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.34477756286266925, |
| "grad_norm": 0.4333076775074005, |
| "learning_rate": 4.969559323092004e-06, |
| "loss": 0.4693, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.3452611218568665, |
| "grad_norm": 0.4301561117172241, |
| "learning_rate": 4.96945996568354e-06, |
| "loss": 0.4771, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.34574468085106386, |
| "grad_norm": 0.41485023498535156, |
| "learning_rate": 4.969360447385928e-06, |
| "loss": 0.4696, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.34622823984526113, |
| "grad_norm": 0.4785935580730438, |
| "learning_rate": 4.969260768205649e-06, |
| "loss": 0.4836, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.3467117988394584, |
| "grad_norm": 0.4168457090854645, |
| "learning_rate": 4.969160928149197e-06, |
| "loss": 0.4871, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.3471953578336557, |
| "grad_norm": 0.5240389704704285, |
| "learning_rate": 4.969060927223079e-06, |
| "loss": 0.4856, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.347678916827853, |
| "grad_norm": 0.43695124983787537, |
| "learning_rate": 4.968960765433808e-06, |
| "loss": 0.4732, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.3481624758220503, |
| "grad_norm": 0.4459145963191986, |
| "learning_rate": 4.96886044278791e-06, |
| "loss": 0.474, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.34864603481624756, |
| "grad_norm": 0.6060423851013184, |
| "learning_rate": 4.968759959291922e-06, |
| "loss": 0.4582, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.3491295938104449, |
| "grad_norm": 0.45408162474632263, |
| "learning_rate": 4.968659314952391e-06, |
| "loss": 0.4875, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.34961315280464217, |
| "grad_norm": 0.4357450306415558, |
| "learning_rate": 4.968558509775872e-06, |
| "loss": 0.4809, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.35009671179883944, |
| "grad_norm": 0.4248330891132355, |
| "learning_rate": 4.9684575437689354e-06, |
| "loss": 0.4638, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.3505802707930368, |
| "grad_norm": 0.41638749837875366, |
| "learning_rate": 4.968356416938158e-06, |
| "loss": 0.4607, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.35106382978723405, |
| "grad_norm": 0.42239511013031006, |
| "learning_rate": 4.968255129290127e-06, |
| "loss": 0.4682, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.3515473887814313, |
| "grad_norm": 0.4688323438167572, |
| "learning_rate": 4.968153680831444e-06, |
| "loss": 0.4909, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.3520309477756286, |
| "grad_norm": 0.4212821125984192, |
| "learning_rate": 4.968052071568717e-06, |
| "loss": 0.4799, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.35251450676982593, |
| "grad_norm": 0.4717614948749542, |
| "learning_rate": 4.967950301508566e-06, |
| "loss": 0.488, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.3529980657640232, |
| "grad_norm": 0.4233686625957489, |
| "learning_rate": 4.967848370657622e-06, |
| "loss": 0.5076, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3534816247582205, |
| "grad_norm": 0.437971830368042, |
| "learning_rate": 4.967746279022526e-06, |
| "loss": 0.4656, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.3539651837524178, |
| "grad_norm": 0.557462215423584, |
| "learning_rate": 4.967644026609929e-06, |
| "loss": 0.4708, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.3544487427466151, |
| "grad_norm": 0.4785339832305908, |
| "learning_rate": 4.967541613426493e-06, |
| "loss": 0.4702, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.35493230174081236, |
| "grad_norm": 0.446857213973999, |
| "learning_rate": 4.96743903947889e-06, |
| "loss": 0.442, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.3554158607350097, |
| "grad_norm": 0.4136951267719269, |
| "learning_rate": 4.967336304773805e-06, |
| "loss": 0.4767, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.35589941972920697, |
| "grad_norm": 0.4243564009666443, |
| "learning_rate": 4.967233409317928e-06, |
| "loss": 0.4739, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.35638297872340424, |
| "grad_norm": 0.43003565073013306, |
| "learning_rate": 4.9671303531179635e-06, |
| "loss": 0.4597, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.3568665377176016, |
| "grad_norm": 0.48309555649757385, |
| "learning_rate": 4.967027136180629e-06, |
| "loss": 0.478, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.35735009671179885, |
| "grad_norm": 0.510793149471283, |
| "learning_rate": 4.966923758512645e-06, |
| "loss": 0.5017, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.3578336557059961, |
| "grad_norm": 0.5192124843597412, |
| "learning_rate": 4.96682022012075e-06, |
| "loss": 0.5028, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3583172147001934, |
| "grad_norm": 0.6904450058937073, |
| "learning_rate": 4.966716521011688e-06, |
| "loss": 0.4749, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.35880077369439073, |
| "grad_norm": 0.4111625552177429, |
| "learning_rate": 4.966612661192215e-06, |
| "loss": 0.4671, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.359284332688588, |
| "grad_norm": 0.4130711555480957, |
| "learning_rate": 4.966508640669099e-06, |
| "loss": 0.5011, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.3597678916827853, |
| "grad_norm": 0.469009667634964, |
| "learning_rate": 4.966404459449115e-06, |
| "loss": 0.4961, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.3602514506769826, |
| "grad_norm": 0.4455870985984802, |
| "learning_rate": 4.966300117539052e-06, |
| "loss": 0.485, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.3607350096711799, |
| "grad_norm": 0.40770214796066284, |
| "learning_rate": 4.966195614945709e-06, |
| "loss": 0.4699, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.36121856866537716, |
| "grad_norm": 0.42602604627609253, |
| "learning_rate": 4.966090951675893e-06, |
| "loss": 0.4738, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.3617021276595745, |
| "grad_norm": 0.4325065612792969, |
| "learning_rate": 4.965986127736423e-06, |
| "loss": 0.475, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.36218568665377177, |
| "grad_norm": 0.43184852600097656, |
| "learning_rate": 4.965881143134128e-06, |
| "loss": 0.4738, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.36266924564796904, |
| "grad_norm": 0.4798991084098816, |
| "learning_rate": 4.96577599787585e-06, |
| "loss": 0.4841, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.36315280464216637, |
| "grad_norm": 0.4550189971923828, |
| "learning_rate": 4.965670691968438e-06, |
| "loss": 0.4663, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 0.42557281255722046, |
| "learning_rate": 4.965565225418752e-06, |
| "loss": 0.4691, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.3641199226305609, |
| "grad_norm": 0.43087345361709595, |
| "learning_rate": 4.965459598233664e-06, |
| "loss": 0.5025, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.3646034816247582, |
| "grad_norm": 0.4292849600315094, |
| "learning_rate": 4.965353810420056e-06, |
| "loss": 0.4882, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.3650870406189555, |
| "grad_norm": 1.3346878290176392, |
| "learning_rate": 4.965247861984821e-06, |
| "loss": 0.4776, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.3655705996131528, |
| "grad_norm": 0.4174513518810272, |
| "learning_rate": 4.96514175293486e-06, |
| "loss": 0.4475, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.3660541586073501, |
| "grad_norm": 0.4157787263393402, |
| "learning_rate": 4.965035483277088e-06, |
| "loss": 0.4717, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.3665377176015474, |
| "grad_norm": 0.4442564845085144, |
| "learning_rate": 4.964929053018427e-06, |
| "loss": 0.4771, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.3670212765957447, |
| "grad_norm": 0.4338626265525818, |
| "learning_rate": 4.9648224621658125e-06, |
| "loss": 0.4714, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.36750483558994196, |
| "grad_norm": 0.44122618436813354, |
| "learning_rate": 4.964715710726188e-06, |
| "loss": 0.4595, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.3679883945841393, |
| "grad_norm": 0.42671939730644226, |
| "learning_rate": 4.964608798706508e-06, |
| "loss": 0.4661, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.36847195357833656, |
| "grad_norm": 0.41113489866256714, |
| "learning_rate": 4.964501726113741e-06, |
| "loss": 0.4914, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.36895551257253384, |
| "grad_norm": 0.5205186605453491, |
| "learning_rate": 4.96439449295486e-06, |
| "loss": 0.4791, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.3694390715667311, |
| "grad_norm": 0.435266375541687, |
| "learning_rate": 4.964287099236851e-06, |
| "loss": 0.4691, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.36992263056092844, |
| "grad_norm": 0.41085487604141235, |
| "learning_rate": 4.964179544966713e-06, |
| "loss": 0.4728, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.3704061895551257, |
| "grad_norm": 0.4392455518245697, |
| "learning_rate": 4.964071830151452e-06, |
| "loss": 0.4933, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.370889748549323, |
| "grad_norm": 0.4227438271045685, |
| "learning_rate": 4.963963954798087e-06, |
| "loss": 0.5065, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.3713733075435203, |
| "grad_norm": 0.4845467805862427, |
| "learning_rate": 4.963855918913645e-06, |
| "loss": 0.4714, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.3718568665377176, |
| "grad_norm": 0.45647627115249634, |
| "learning_rate": 4.963747722505164e-06, |
| "loss": 0.498, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.3723404255319149, |
| "grad_norm": 0.46499380469322205, |
| "learning_rate": 4.963639365579696e-06, |
| "loss": 0.4687, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3728239845261122, |
| "grad_norm": 0.4408628046512604, |
| "learning_rate": 4.963530848144298e-06, |
| "loss": 0.4769, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.3733075435203095, |
| "grad_norm": 0.4910852015018463, |
| "learning_rate": 4.963422170206042e-06, |
| "loss": 0.4826, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.37379110251450676, |
| "grad_norm": 0.4329937696456909, |
| "learning_rate": 4.963313331772008e-06, |
| "loss": 0.4635, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.3742746615087041, |
| "grad_norm": 0.49897655844688416, |
| "learning_rate": 4.963204332849285e-06, |
| "loss": 0.4716, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.37475822050290136, |
| "grad_norm": 0.4166103005409241, |
| "learning_rate": 4.963095173444976e-06, |
| "loss": 0.4552, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.37524177949709864, |
| "grad_norm": 0.4559236168861389, |
| "learning_rate": 4.962985853566193e-06, |
| "loss": 0.4768, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3757253384912959, |
| "grad_norm": 0.49545004963874817, |
| "learning_rate": 4.962876373220059e-06, |
| "loss": 0.4615, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.37620889748549324, |
| "grad_norm": 0.4941776394844055, |
| "learning_rate": 4.962766732413706e-06, |
| "loss": 0.4799, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.3766924564796905, |
| "grad_norm": 0.6698254942893982, |
| "learning_rate": 4.962656931154277e-06, |
| "loss": 0.4769, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.3771760154738878, |
| "grad_norm": 0.48517316579818726, |
| "learning_rate": 4.9625469694489266e-06, |
| "loss": 0.4738, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3776595744680851, |
| "grad_norm": 0.44697442650794983, |
| "learning_rate": 4.962436847304818e-06, |
| "loss": 0.4798, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.3781431334622824, |
| "grad_norm": 0.5347298979759216, |
| "learning_rate": 4.962326564729126e-06, |
| "loss": 0.4625, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.3786266924564797, |
| "grad_norm": 0.42385363578796387, |
| "learning_rate": 4.962216121729036e-06, |
| "loss": 0.4896, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.379110251450677, |
| "grad_norm": 0.4330436885356903, |
| "learning_rate": 4.962105518311745e-06, |
| "loss": 0.4584, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.3795938104448743, |
| "grad_norm": 0.42593157291412354, |
| "learning_rate": 4.961994754484456e-06, |
| "loss": 0.4726, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.38007736943907156, |
| "grad_norm": 0.49842873215675354, |
| "learning_rate": 4.961883830254387e-06, |
| "loss": 0.4738, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3805609284332689, |
| "grad_norm": 0.46715858578681946, |
| "learning_rate": 4.961772745628765e-06, |
| "loss": 0.4809, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.38104448742746616, |
| "grad_norm": 0.4344216287136078, |
| "learning_rate": 4.961661500614827e-06, |
| "loss": 0.4725, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.38152804642166344, |
| "grad_norm": 0.4901852607727051, |
| "learning_rate": 4.961550095219821e-06, |
| "loss": 0.4622, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.3820116054158607, |
| "grad_norm": 0.42893216013908386, |
| "learning_rate": 4.961438529451005e-06, |
| "loss": 0.4947, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.38249516441005804, |
| "grad_norm": 0.42825618386268616, |
| "learning_rate": 4.961326803315648e-06, |
| "loss": 0.4694, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.3829787234042553, |
| "grad_norm": 0.42393574118614197, |
| "learning_rate": 4.961214916821029e-06, |
| "loss": 0.4777, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3834622823984526, |
| "grad_norm": 0.4666205942630768, |
| "learning_rate": 4.961102869974438e-06, |
| "loss": 0.4588, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.3839458413926499, |
| "grad_norm": 0.43628597259521484, |
| "learning_rate": 4.960990662783174e-06, |
| "loss": 0.4456, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.3844294003868472, |
| "grad_norm": 0.43910062313079834, |
| "learning_rate": 4.960878295254549e-06, |
| "loss": 0.4581, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.3849129593810445, |
| "grad_norm": 0.4191967248916626, |
| "learning_rate": 4.960765767395881e-06, |
| "loss": 0.4753, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.3853965183752418, |
| "grad_norm": 0.4429578483104706, |
| "learning_rate": 4.960653079214505e-06, |
| "loss": 0.4614, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.3858800773694391, |
| "grad_norm": 0.5809339880943298, |
| "learning_rate": 4.960540230717761e-06, |
| "loss": 0.4701, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.38636363636363635, |
| "grad_norm": 0.48615169525146484, |
| "learning_rate": 4.960427221913e-06, |
| "loss": 0.4688, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.38684719535783363, |
| "grad_norm": 0.4294247329235077, |
| "learning_rate": 4.960314052807588e-06, |
| "loss": 0.4804, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.38733075435203096, |
| "grad_norm": 0.46103084087371826, |
| "learning_rate": 4.960200723408895e-06, |
| "loss": 0.482, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.38781431334622823, |
| "grad_norm": 0.45017433166503906, |
| "learning_rate": 4.960087233724306e-06, |
| "loss": 0.4706, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3882978723404255, |
| "grad_norm": 0.42244574427604675, |
| "learning_rate": 4.959973583761215e-06, |
| "loss": 0.478, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.38878143133462284, |
| "grad_norm": 0.44186943769454956, |
| "learning_rate": 4.959859773527027e-06, |
| "loss": 0.4984, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.3892649903288201, |
| "grad_norm": 0.4306003153324127, |
| "learning_rate": 4.959745803029155e-06, |
| "loss": 0.4918, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.3897485493230174, |
| "grad_norm": 0.45696625113487244, |
| "learning_rate": 4.959631672275026e-06, |
| "loss": 0.4896, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.3902321083172147, |
| "grad_norm": 0.4278884828090668, |
| "learning_rate": 4.959517381272075e-06, |
| "loss": 0.4728, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.390715667311412, |
| "grad_norm": 0.44117751717567444, |
| "learning_rate": 4.95940293002775e-06, |
| "loss": 0.4902, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.39119922630560927, |
| "grad_norm": 0.4210093319416046, |
| "learning_rate": 4.959288318549505e-06, |
| "loss": 0.4642, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.3916827852998066, |
| "grad_norm": 0.4378984868526459, |
| "learning_rate": 4.959173546844809e-06, |
| "loss": 0.4886, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3921663442940039, |
| "grad_norm": 0.42724746465682983, |
| "learning_rate": 4.959058614921139e-06, |
| "loss": 0.4938, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.39264990328820115, |
| "grad_norm": 0.4462886452674866, |
| "learning_rate": 4.958943522785982e-06, |
| "loss": 0.4676, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.3931334622823984, |
| "grad_norm": 0.4206470251083374, |
| "learning_rate": 4.95882827044684e-06, |
| "loss": 0.4939, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.39361702127659576, |
| "grad_norm": 0.5013949871063232, |
| "learning_rate": 4.958712857911217e-06, |
| "loss": 0.4937, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.39410058027079303, |
| "grad_norm": 0.5348606109619141, |
| "learning_rate": 4.958597285186635e-06, |
| "loss": 0.4844, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.3945841392649903, |
| "grad_norm": 0.41226282715797424, |
| "learning_rate": 4.958481552280623e-06, |
| "loss": 0.4726, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.39506769825918764, |
| "grad_norm": 0.4058435261249542, |
| "learning_rate": 4.958365659200722e-06, |
| "loss": 0.4628, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.3955512572533849, |
| "grad_norm": 0.4504850506782532, |
| "learning_rate": 4.958249605954481e-06, |
| "loss": 0.4756, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.3960348162475822, |
| "grad_norm": 0.42663368582725525, |
| "learning_rate": 4.9581333925494635e-06, |
| "loss": 0.484, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.3965183752417795, |
| "grad_norm": 0.43051379919052124, |
| "learning_rate": 4.95801701899324e-06, |
| "loss": 0.4838, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3970019342359768, |
| "grad_norm": 0.44090908765792847, |
| "learning_rate": 4.9579004852933906e-06, |
| "loss": 0.4673, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.39748549323017407, |
| "grad_norm": 0.4541039764881134, |
| "learning_rate": 4.95778379145751e-06, |
| "loss": 0.4829, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.3979690522243714, |
| "grad_norm": 0.46486979722976685, |
| "learning_rate": 4.9576669374932e-06, |
| "loss": 0.4913, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.3984526112185687, |
| "grad_norm": 0.4304236173629761, |
| "learning_rate": 4.957549923408074e-06, |
| "loss": 0.4571, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.39893617021276595, |
| "grad_norm": 0.44261449575424194, |
| "learning_rate": 4.957432749209755e-06, |
| "loss": 0.4771, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.3994197292069632, |
| "grad_norm": 0.5330602526664734, |
| "learning_rate": 4.957315414905877e-06, |
| "loss": 0.4721, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.39990328820116056, |
| "grad_norm": 0.4316442012786865, |
| "learning_rate": 4.957197920504087e-06, |
| "loss": 0.4949, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.40038684719535783, |
| "grad_norm": 0.42876294255256653, |
| "learning_rate": 4.957080266012037e-06, |
| "loss": 0.4501, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.4008704061895551, |
| "grad_norm": 0.41960573196411133, |
| "learning_rate": 4.956962451437394e-06, |
| "loss": 0.4719, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.40135396518375244, |
| "grad_norm": 0.44260984659194946, |
| "learning_rate": 4.9568444767878335e-06, |
| "loss": 0.4618, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.4018375241779497, |
| "grad_norm": 0.44216662645339966, |
| "learning_rate": 4.95672634207104e-06, |
| "loss": 0.485, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.402321083172147, |
| "grad_norm": 0.46946024894714355, |
| "learning_rate": 4.9566080472947134e-06, |
| "loss": 0.4676, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.4028046421663443, |
| "grad_norm": 0.45229020714759827, |
| "learning_rate": 4.956489592466558e-06, |
| "loss": 0.4743, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.4032882011605416, |
| "grad_norm": 0.45127013325691223, |
| "learning_rate": 4.9563709775942925e-06, |
| "loss": 0.4749, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.40377176015473887, |
| "grad_norm": 0.44455087184906006, |
| "learning_rate": 4.956252202685645e-06, |
| "loss": 0.4852, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.40425531914893614, |
| "grad_norm": 0.5142653584480286, |
| "learning_rate": 4.956133267748353e-06, |
| "loss": 0.4766, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.4047388781431335, |
| "grad_norm": 0.43196821212768555, |
| "learning_rate": 4.956014172790166e-06, |
| "loss": 0.4726, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.40522243713733075, |
| "grad_norm": 0.42082950472831726, |
| "learning_rate": 4.955894917818844e-06, |
| "loss": 0.4927, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.405705996131528, |
| "grad_norm": 0.4351148009300232, |
| "learning_rate": 4.955775502842155e-06, |
| "loss": 0.47, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.40618955512572535, |
| "grad_norm": 0.45302635431289673, |
| "learning_rate": 4.95565592786788e-06, |
| "loss": 0.4632, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.40667311411992263, |
| "grad_norm": 0.45961737632751465, |
| "learning_rate": 4.955536192903809e-06, |
| "loss": 0.4802, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.4071566731141199, |
| "grad_norm": 0.4482019543647766, |
| "learning_rate": 4.955416297957744e-06, |
| "loss": 0.4748, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.40764023210831724, |
| "grad_norm": 0.44860005378723145, |
| "learning_rate": 4.955296243037494e-06, |
| "loss": 0.4775, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.4081237911025145, |
| "grad_norm": 0.44915902614593506, |
| "learning_rate": 4.955176028150884e-06, |
| "loss": 0.481, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.4086073500967118, |
| "grad_norm": 0.4330085813999176, |
| "learning_rate": 4.9550556533057435e-06, |
| "loss": 0.4956, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.4090909090909091, |
| "grad_norm": 0.48874109983444214, |
| "learning_rate": 4.954935118509917e-06, |
| "loss": 0.4811, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.4095744680851064, |
| "grad_norm": 0.4293757379055023, |
| "learning_rate": 4.9548144237712556e-06, |
| "loss": 0.4752, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.41005802707930367, |
| "grad_norm": 0.4687563180923462, |
| "learning_rate": 4.954693569097625e-06, |
| "loss": 0.4645, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.41054158607350094, |
| "grad_norm": 0.45286861062049866, |
| "learning_rate": 4.954572554496897e-06, |
| "loss": 0.4757, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.41102514506769827, |
| "grad_norm": 0.4203643202781677, |
| "learning_rate": 4.9544513799769564e-06, |
| "loss": 0.4696, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.41150870406189555, |
| "grad_norm": 0.4808759093284607, |
| "learning_rate": 4.954330045545699e-06, |
| "loss": 0.4771, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.4119922630560928, |
| "grad_norm": 0.42002764344215393, |
| "learning_rate": 4.954208551211029e-06, |
| "loss": 0.4862, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.41247582205029015, |
| "grad_norm": 0.4864538013935089, |
| "learning_rate": 4.954086896980863e-06, |
| "loss": 0.4572, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.41295938104448743, |
| "grad_norm": 0.42396342754364014, |
| "learning_rate": 4.9539650828631246e-06, |
| "loss": 0.4565, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.4134429400386847, |
| "grad_norm": 0.41734591126441956, |
| "learning_rate": 4.953843108865752e-06, |
| "loss": 0.5006, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.41392649903288203, |
| "grad_norm": 0.42748942971229553, |
| "learning_rate": 4.953720974996692e-06, |
| "loss": 0.4865, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.4144100580270793, |
| "grad_norm": 0.4658924639225006, |
| "learning_rate": 4.953598681263902e-06, |
| "loss": 0.4724, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.4148936170212766, |
| "grad_norm": 0.45556163787841797, |
| "learning_rate": 4.953476227675349e-06, |
| "loss": 0.4956, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.4153771760154739, |
| "grad_norm": 0.45793822407722473, |
| "learning_rate": 4.95335361423901e-06, |
| "loss": 0.4886, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.4158607350096712, |
| "grad_norm": 0.43772590160369873, |
| "learning_rate": 4.953230840962876e-06, |
| "loss": 0.4696, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.41634429400386846, |
| "grad_norm": 0.4840233623981476, |
| "learning_rate": 4.9531079078549434e-06, |
| "loss": 0.4663, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.41682785299806574, |
| "grad_norm": 0.4341041147708893, |
| "learning_rate": 4.9529848149232244e-06, |
| "loss": 0.4748, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.41731141199226307, |
| "grad_norm": 0.4478056728839874, |
| "learning_rate": 4.9528615621757345e-06, |
| "loss": 0.4872, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.41779497098646035, |
| "grad_norm": 0.44516491889953613, |
| "learning_rate": 4.952738149620508e-06, |
| "loss": 0.4908, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.4182785299806576, |
| "grad_norm": 0.472673237323761, |
| "learning_rate": 4.952614577265582e-06, |
| "loss": 0.4679, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.41876208897485495, |
| "grad_norm": 0.4395389258861542, |
| "learning_rate": 4.95249084511901e-06, |
| "loss": 0.4921, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.4192456479690522, |
| "grad_norm": 0.5204181671142578, |
| "learning_rate": 4.952366953188852e-06, |
| "loss": 0.4487, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.4197292069632495, |
| "grad_norm": 0.4538368880748749, |
| "learning_rate": 4.952242901483181e-06, |
| "loss": 0.4677, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.42021276595744683, |
| "grad_norm": 0.4413670301437378, |
| "learning_rate": 4.952118690010077e-06, |
| "loss": 0.468, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.4206963249516441, |
| "grad_norm": 0.4103778898715973, |
| "learning_rate": 4.951994318777634e-06, |
| "loss": 0.4761, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.4211798839458414, |
| "grad_norm": 0.4950055778026581, |
| "learning_rate": 4.951869787793956e-06, |
| "loss": 0.4851, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.42166344294003866, |
| "grad_norm": 0.42650583386421204, |
| "learning_rate": 4.9517450970671544e-06, |
| "loss": 0.4815, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.422147001934236, |
| "grad_norm": 0.5697447061538696, |
| "learning_rate": 4.951620246605353e-06, |
| "loss": 0.4688, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.42263056092843326, |
| "grad_norm": 0.4130346179008484, |
| "learning_rate": 4.9514952364166886e-06, |
| "loss": 0.4542, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.42311411992263054, |
| "grad_norm": 0.4870031177997589, |
| "learning_rate": 4.9513700665093025e-06, |
| "loss": 0.4784, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.42359767891682787, |
| "grad_norm": 0.4782927334308624, |
| "learning_rate": 4.951244736891352e-06, |
| "loss": 0.4574, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.42408123791102514, |
| "grad_norm": 0.4310762584209442, |
| "learning_rate": 4.951119247571001e-06, |
| "loss": 0.4901, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.4245647969052224, |
| "grad_norm": 0.4448501765727997, |
| "learning_rate": 4.950993598556427e-06, |
| "loss": 0.4661, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.42504835589941975, |
| "grad_norm": 0.43321534991264343, |
| "learning_rate": 4.950867789855815e-06, |
| "loss": 0.4873, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.425531914893617, |
| "grad_norm": 0.4259124994277954, |
| "learning_rate": 4.950741821477361e-06, |
| "loss": 0.4714, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4260154738878143, |
| "grad_norm": 0.4658030867576599, |
| "learning_rate": 4.950615693429275e-06, |
| "loss": 0.4611, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.42649903288201163, |
| "grad_norm": 0.44376733899116516, |
| "learning_rate": 4.950489405719771e-06, |
| "loss": 0.4789, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.4269825918762089, |
| "grad_norm": 0.42676928639411926, |
| "learning_rate": 4.950362958357078e-06, |
| "loss": 0.4604, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.4274661508704062, |
| "grad_norm": 0.4533901512622833, |
| "learning_rate": 4.950236351349436e-06, |
| "loss": 0.4909, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.42794970986460346, |
| "grad_norm": 0.4707791209220886, |
| "learning_rate": 4.950109584705091e-06, |
| "loss": 0.4622, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.4284332688588008, |
| "grad_norm": 0.46238699555397034, |
| "learning_rate": 4.949982658432303e-06, |
| "loss": 0.4925, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.42891682785299806, |
| "grad_norm": 0.8114577531814575, |
| "learning_rate": 4.9498555725393415e-06, |
| "loss": 0.4728, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.42940038684719534, |
| "grad_norm": 0.5063269138336182, |
| "learning_rate": 4.949728327034487e-06, |
| "loss": 0.4976, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.42988394584139267, |
| "grad_norm": 1.0141496658325195, |
| "learning_rate": 4.949600921926029e-06, |
| "loss": 0.4818, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.43036750483558994, |
| "grad_norm": 0.42779985070228577, |
| "learning_rate": 4.949473357222269e-06, |
| "loss": 0.4734, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.4308510638297872, |
| "grad_norm": 0.42237281799316406, |
| "learning_rate": 4.949345632931516e-06, |
| "loss": 0.4607, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.43133462282398455, |
| "grad_norm": 0.44154638051986694, |
| "learning_rate": 4.949217749062093e-06, |
| "loss": 0.4971, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4318181818181818, |
| "grad_norm": 0.5624458193778992, |
| "learning_rate": 4.949089705622333e-06, |
| "loss": 0.4617, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4323017408123791, |
| "grad_norm": 0.4163053035736084, |
| "learning_rate": 4.948961502620576e-06, |
| "loss": 0.4651, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.43278529980657643, |
| "grad_norm": 0.4546625316143036, |
| "learning_rate": 4.948833140065175e-06, |
| "loss": 0.4694, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.4332688588007737, |
| "grad_norm": 0.43393319845199585, |
| "learning_rate": 4.948704617964495e-06, |
| "loss": 0.4898, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.433752417794971, |
| "grad_norm": 0.6030253171920776, |
| "learning_rate": 4.948575936326907e-06, |
| "loss": 0.4709, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.43423597678916825, |
| "grad_norm": 0.46790799498558044, |
| "learning_rate": 4.948447095160796e-06, |
| "loss": 0.482, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.4347195357833656, |
| "grad_norm": 0.42384713888168335, |
| "learning_rate": 4.948318094474555e-06, |
| "loss": 0.4757, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.43520309477756286, |
| "grad_norm": 0.4444863796234131, |
| "learning_rate": 4.94818893427659e-06, |
| "loss": 0.4734, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.43568665377176014, |
| "grad_norm": 1.2005969285964966, |
| "learning_rate": 4.948059614575316e-06, |
| "loss": 0.4649, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.43617021276595747, |
| "grad_norm": 0.4669468402862549, |
| "learning_rate": 4.947930135379158e-06, |
| "loss": 0.4847, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.43665377176015474, |
| "grad_norm": 0.4478405714035034, |
| "learning_rate": 4.947800496696551e-06, |
| "loss": 0.4699, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.437137330754352, |
| "grad_norm": 0.419539213180542, |
| "learning_rate": 4.947670698535943e-06, |
| "loss": 0.4632, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.43762088974854935, |
| "grad_norm": 0.4189528822898865, |
| "learning_rate": 4.947540740905789e-06, |
| "loss": 0.4982, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.4381044487427466, |
| "grad_norm": 0.4514327943325043, |
| "learning_rate": 4.9474106238145555e-06, |
| "loss": 0.452, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.4385880077369439, |
| "grad_norm": 0.4805455207824707, |
| "learning_rate": 4.947280347270721e-06, |
| "loss": 0.4752, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.43907156673114117, |
| "grad_norm": 0.4451103210449219, |
| "learning_rate": 4.9471499112827726e-06, |
| "loss": 0.4759, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.4395551257253385, |
| "grad_norm": 0.45392805337905884, |
| "learning_rate": 4.947019315859209e-06, |
| "loss": 0.468, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.4400386847195358, |
| "grad_norm": 0.41176101565361023, |
| "learning_rate": 4.946888561008539e-06, |
| "loss": 0.4553, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.44052224371373305, |
| "grad_norm": 0.465889573097229, |
| "learning_rate": 4.94675764673928e-06, |
| "loss": 0.4712, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.4410058027079304, |
| "grad_norm": 0.4795728623867035, |
| "learning_rate": 4.946626573059963e-06, |
| "loss": 0.4866, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.44148936170212766, |
| "grad_norm": 0.4447157084941864, |
| "learning_rate": 4.946495339979126e-06, |
| "loss": 0.4409, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.44197292069632493, |
| "grad_norm": 0.47352585196495056, |
| "learning_rate": 4.946363947505321e-06, |
| "loss": 0.4564, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.44245647969052226, |
| "grad_norm": 0.41461408138275146, |
| "learning_rate": 4.946232395647106e-06, |
| "loss": 0.4687, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.44294003868471954, |
| "grad_norm": 0.4392583966255188, |
| "learning_rate": 4.946100684413053e-06, |
| "loss": 0.4679, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.4434235976789168, |
| "grad_norm": 0.4193490743637085, |
| "learning_rate": 4.945968813811743e-06, |
| "loss": 0.4774, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.44390715667311414, |
| "grad_norm": 0.4322820007801056, |
| "learning_rate": 4.945836783851769e-06, |
| "loss": 0.4764, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.4443907156673114, |
| "grad_norm": 0.6839078664779663, |
| "learning_rate": 4.945704594541731e-06, |
| "loss": 0.4749, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.4448742746615087, |
| "grad_norm": 0.4112018644809723, |
| "learning_rate": 4.945572245890242e-06, |
| "loss": 0.4743, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.44535783365570597, |
| "grad_norm": 0.4316619634628296, |
| "learning_rate": 4.945439737905926e-06, |
| "loss": 0.4702, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.4458413926499033, |
| "grad_norm": 0.4236302077770233, |
| "learning_rate": 4.945307070597414e-06, |
| "loss": 0.4705, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.4463249516441006, |
| "grad_norm": 0.4219607412815094, |
| "learning_rate": 4.9451742439733505e-06, |
| "loss": 0.4506, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.44680851063829785, |
| "grad_norm": 0.4156830608844757, |
| "learning_rate": 4.94504125804239e-06, |
| "loss": 0.4826, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.4472920696324952, |
| "grad_norm": 0.4480549395084381, |
| "learning_rate": 4.9449081128131945e-06, |
| "loss": 0.4713, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.44777562862669246, |
| "grad_norm": 0.4185437560081482, |
| "learning_rate": 4.944774808294441e-06, |
| "loss": 0.4748, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.44825918762088973, |
| "grad_norm": 0.4328237473964691, |
| "learning_rate": 4.944641344494815e-06, |
| "loss": 0.4784, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.44874274661508706, |
| "grad_norm": 0.44591256976127625, |
| "learning_rate": 4.9445077214230085e-06, |
| "loss": 0.4838, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.44922630560928434, |
| "grad_norm": 0.4475826323032379, |
| "learning_rate": 4.94437393908773e-06, |
| "loss": 0.4829, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.4497098646034816, |
| "grad_norm": 0.4374013841152191, |
| "learning_rate": 4.944239997497695e-06, |
| "loss": 0.4636, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.45019342359767894, |
| "grad_norm": 0.4626249670982361, |
| "learning_rate": 4.944105896661629e-06, |
| "loss": 0.4648, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.4506769825918762, |
| "grad_norm": 0.5082606077194214, |
| "learning_rate": 4.943971636588271e-06, |
| "loss": 0.4776, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.4511605415860735, |
| "grad_norm": 0.4721316695213318, |
| "learning_rate": 4.943837217286367e-06, |
| "loss": 0.4839, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.45164410058027077, |
| "grad_norm": 0.4364699125289917, |
| "learning_rate": 4.943702638764674e-06, |
| "loss": 0.4681, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.4521276595744681, |
| "grad_norm": 0.4581095278263092, |
| "learning_rate": 4.94356790103196e-06, |
| "loss": 0.4656, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.4526112185686654, |
| "grad_norm": 0.4476464092731476, |
| "learning_rate": 4.9434330040970054e-06, |
| "loss": 0.468, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.45309477756286265, |
| "grad_norm": 0.43913260102272034, |
| "learning_rate": 4.9432979479685975e-06, |
| "loss": 0.4868, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.45357833655706, |
| "grad_norm": 0.4544861912727356, |
| "learning_rate": 4.943162732655534e-06, |
| "loss": 0.4799, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.45406189555125726, |
| "grad_norm": 0.436844140291214, |
| "learning_rate": 4.943027358166628e-06, |
| "loss": 0.462, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.4419472813606262, |
| "learning_rate": 4.942891824510695e-06, |
| "loss": 0.4626, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.45502901353965186, |
| "grad_norm": 0.7483431696891785, |
| "learning_rate": 4.942756131696569e-06, |
| "loss": 0.4629, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.45551257253384914, |
| "grad_norm": 0.416421115398407, |
| "learning_rate": 4.942620279733089e-06, |
| "loss": 0.4794, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.4559961315280464, |
| "grad_norm": 0.4357644021511078, |
| "learning_rate": 4.9424842686291056e-06, |
| "loss": 0.449, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.4564796905222437, |
| "grad_norm": 0.4860648810863495, |
| "learning_rate": 4.9423480983934806e-06, |
| "loss": 0.4701, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.456963249516441, |
| "grad_norm": 0.45846328139305115, |
| "learning_rate": 4.9422117690350855e-06, |
| "loss": 0.4876, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.4574468085106383, |
| "grad_norm": 0.438472718000412, |
| "learning_rate": 4.942075280562802e-06, |
| "loss": 0.4594, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.45793036750483557, |
| "grad_norm": 0.4141273498535156, |
| "learning_rate": 4.941938632985524e-06, |
| "loss": 0.4691, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.4584139264990329, |
| "grad_norm": 0.4864295423030853, |
| "learning_rate": 4.941801826312153e-06, |
| "loss": 0.4619, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.4588974854932302, |
| "grad_norm": 0.4366392493247986, |
| "learning_rate": 4.941664860551603e-06, |
| "loss": 0.4571, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.45938104448742745, |
| "grad_norm": 0.43606841564178467, |
| "learning_rate": 4.941527735712796e-06, |
| "loss": 0.4656, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.4598646034816248, |
| "grad_norm": 0.43493443727493286, |
| "learning_rate": 4.941390451804668e-06, |
| "loss": 0.4786, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.46034816247582205, |
| "grad_norm": 0.49057891964912415, |
| "learning_rate": 4.941253008836162e-06, |
| "loss": 0.4673, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.46083172147001933, |
| "grad_norm": 0.428835391998291, |
| "learning_rate": 4.9411154068162325e-06, |
| "loss": 0.4514, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.46131528046421666, |
| "grad_norm": 0.41059479117393494, |
| "learning_rate": 4.940977645753845e-06, |
| "loss": 0.4852, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.46179883945841393, |
| "grad_norm": 0.4246974587440491, |
| "learning_rate": 4.9408397256579745e-06, |
| "loss": 0.4863, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.4622823984526112, |
| "grad_norm": 0.4776301085948944, |
| "learning_rate": 4.940701646537607e-06, |
| "loss": 0.4596, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.4627659574468085, |
| "grad_norm": 0.4455105662345886, |
| "learning_rate": 4.940563408401738e-06, |
| "loss": 0.4892, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.4632495164410058, |
| "grad_norm": 0.4926896095275879, |
| "learning_rate": 4.940425011259375e-06, |
| "loss": 0.4669, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.4637330754352031, |
| "grad_norm": 0.42672210931777954, |
| "learning_rate": 4.940286455119535e-06, |
| "loss": 0.4654, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.46421663442940037, |
| "grad_norm": 0.41769957542419434, |
| "learning_rate": 4.940147739991243e-06, |
| "loss": 0.4683, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.4647001934235977, |
| "grad_norm": 0.438005268573761, |
| "learning_rate": 4.940008865883538e-06, |
| "loss": 0.482, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.46518375241779497, |
| "grad_norm": 0.42409947514533997, |
| "learning_rate": 4.939869832805468e-06, |
| "loss": 0.4653, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.46566731141199225, |
| "grad_norm": 0.42889320850372314, |
| "learning_rate": 4.93973064076609e-06, |
| "loss": 0.4695, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.4661508704061896, |
| "grad_norm": 0.425735741853714, |
| "learning_rate": 4.9395912897744746e-06, |
| "loss": 0.4709, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.46663442940038685, |
| "grad_norm": 0.4460727870464325, |
| "learning_rate": 4.9394517798397e-06, |
| "loss": 0.4758, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.4671179883945841, |
| "grad_norm": 0.42308881878852844, |
| "learning_rate": 4.939312110970854e-06, |
| "loss": 0.4824, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.46760154738878146, |
| "grad_norm": 0.42842990159988403, |
| "learning_rate": 4.939172283177037e-06, |
| "loss": 0.4573, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.46808510638297873, |
| "grad_norm": 0.5351126194000244, |
| "learning_rate": 4.939032296467361e-06, |
| "loss": 0.4621, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.468568665377176, |
| "grad_norm": 0.42374545335769653, |
| "learning_rate": 4.938892150850944e-06, |
| "loss": 0.4715, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.4690522243713733, |
| "grad_norm": 0.42353230714797974, |
| "learning_rate": 4.938751846336917e-06, |
| "loss": 0.48, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.4695357833655706, |
| "grad_norm": 0.44193336367607117, |
| "learning_rate": 4.938611382934421e-06, |
| "loss": 0.4676, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.4700193423597679, |
| "grad_norm": 0.4455101191997528, |
| "learning_rate": 4.9384707606526084e-06, |
| "loss": 0.4788, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.47050290135396516, |
| "grad_norm": 0.4394087791442871, |
| "learning_rate": 4.938329979500641e-06, |
| "loss": 0.4496, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.4709864603481625, |
| "grad_norm": 0.5428414940834045, |
| "learning_rate": 4.9381890394876895e-06, |
| "loss": 0.4859, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.47147001934235977, |
| "grad_norm": 0.42253056168556213, |
| "learning_rate": 4.9380479406229375e-06, |
| "loss": 0.4572, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.47195357833655704, |
| "grad_norm": 0.4509589672088623, |
| "learning_rate": 4.9379066829155775e-06, |
| "loss": 0.4839, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.4724371373307544, |
| "grad_norm": 0.4330948293209076, |
| "learning_rate": 4.9377652663748125e-06, |
| "loss": 0.4597, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.47292069632495165, |
| "grad_norm": 0.48132073879241943, |
| "learning_rate": 4.9376236910098565e-06, |
| "loss": 0.4874, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.4734042553191489, |
| "grad_norm": 0.5274008512496948, |
| "learning_rate": 4.937481956829933e-06, |
| "loss": 0.4685, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.4738878143133462, |
| "grad_norm": 0.4428861737251282, |
| "learning_rate": 4.937340063844276e-06, |
| "loss": 0.5127, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.47437137330754353, |
| "grad_norm": 0.45901939272880554, |
| "learning_rate": 4.937198012062131e-06, |
| "loss": 0.4591, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.4748549323017408, |
| "grad_norm": 0.4397450089454651, |
| "learning_rate": 4.937055801492752e-06, |
| "loss": 0.4587, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.4753384912959381, |
| "grad_norm": 0.4530391991138458, |
| "learning_rate": 4.936913432145403e-06, |
| "loss": 0.459, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.4758220502901354, |
| "grad_norm": 0.4467536509037018, |
| "learning_rate": 4.936770904029362e-06, |
| "loss": 0.4735, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.4763056092843327, |
| "grad_norm": 0.45150211453437805, |
| "learning_rate": 4.936628217153914e-06, |
| "loss": 0.4736, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.47678916827852996, |
| "grad_norm": 0.4347638785839081, |
| "learning_rate": 4.936485371528356e-06, |
| "loss": 0.4715, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.4772727272727273, |
| "grad_norm": 0.45149534940719604, |
| "learning_rate": 4.936342367161992e-06, |
| "loss": 0.4834, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.47775628626692457, |
| "grad_norm": 0.47228920459747314, |
| "learning_rate": 4.936199204064142e-06, |
| "loss": 0.4773, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.47823984526112184, |
| "grad_norm": 0.4575802683830261, |
| "learning_rate": 4.936055882244132e-06, |
| "loss": 0.4739, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.4787234042553192, |
| "grad_norm": 0.44767946004867554, |
| "learning_rate": 4.935912401711299e-06, |
| "loss": 0.473, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.47920696324951645, |
| "grad_norm": 0.490839421749115, |
| "learning_rate": 4.935768762474993e-06, |
| "loss": 0.4979, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.4796905222437137, |
| "grad_norm": 0.50435870885849, |
| "learning_rate": 4.9356249645445695e-06, |
| "loss": 0.4661, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.480174081237911, |
| "grad_norm": 0.49018508195877075, |
| "learning_rate": 4.935481007929399e-06, |
| "loss": 0.4678, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.48065764023210833, |
| "grad_norm": 0.43506816029548645, |
| "learning_rate": 4.9353368926388615e-06, |
| "loss": 0.485, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.4811411992263056, |
| "grad_norm": 0.6681720614433289, |
| "learning_rate": 4.935192618682343e-06, |
| "loss": 0.4334, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.4816247582205029, |
| "grad_norm": 0.8788197040557861, |
| "learning_rate": 4.935048186069247e-06, |
| "loss": 0.4749, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.4821083172147002, |
| "grad_norm": 0.46034419536590576, |
| "learning_rate": 4.934903594808981e-06, |
| "loss": 0.4545, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.4825918762088975, |
| "grad_norm": 0.4211420714855194, |
| "learning_rate": 4.934758844910965e-06, |
| "loss": 0.4821, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.48307543520309476, |
| "grad_norm": 0.4607398509979248, |
| "learning_rate": 4.934613936384632e-06, |
| "loss": 0.4493, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.4835589941972921, |
| "grad_norm": 0.43166399002075195, |
| "learning_rate": 4.934468869239421e-06, |
| "loss": 0.4575, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.48404255319148937, |
| "grad_norm": 0.4517035484313965, |
| "learning_rate": 4.934323643484784e-06, |
| "loss": 0.4808, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.48452611218568664, |
| "grad_norm": 0.457938551902771, |
| "learning_rate": 4.934178259130183e-06, |
| "loss": 0.4672, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.48500967117988397, |
| "grad_norm": 1.534679651260376, |
| "learning_rate": 4.93403271618509e-06, |
| "loss": 0.4605, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.48549323017408125, |
| "grad_norm": 0.4722941517829895, |
| "learning_rate": 4.9338870146589866e-06, |
| "loss": 0.4811, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.4859767891682785, |
| "grad_norm": 0.4111993908882141, |
| "learning_rate": 4.933741154561367e-06, |
| "loss": 0.4608, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.4864603481624758, |
| "grad_norm": 0.5045567750930786, |
| "learning_rate": 4.933595135901733e-06, |
| "loss": 0.4731, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.48694390715667313, |
| "grad_norm": 0.4574251174926758, |
| "learning_rate": 4.9334489586895975e-06, |
| "loss": 0.467, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.4874274661508704, |
| "grad_norm": 0.4980320930480957, |
| "learning_rate": 4.933302622934485e-06, |
| "loss": 0.4492, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.4879110251450677, |
| "grad_norm": 0.4482066035270691, |
| "learning_rate": 4.933156128645929e-06, |
| "loss": 0.487, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.488394584139265, |
| "grad_norm": 0.42327502369880676, |
| "learning_rate": 4.933009475833474e-06, |
| "loss": 0.4513, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.4888781431334623, |
| "grad_norm": 0.44477933645248413, |
| "learning_rate": 4.9328626645066755e-06, |
| "loss": 0.4835, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.48936170212765956, |
| "grad_norm": 0.47308075428009033, |
| "learning_rate": 4.932715694675098e-06, |
| "loss": 0.464, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.4898452611218569, |
| "grad_norm": 0.563520610332489, |
| "learning_rate": 4.932568566348316e-06, |
| "loss": 0.4828, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.49032882011605416, |
| "grad_norm": 0.4451202154159546, |
| "learning_rate": 4.932421279535916e-06, |
| "loss": 0.4705, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.49081237911025144, |
| "grad_norm": 0.4930382966995239, |
| "learning_rate": 4.932273834247494e-06, |
| "loss": 0.4507, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.4912959381044487, |
| "grad_norm": 0.4431816637516022, |
| "learning_rate": 4.932126230492656e-06, |
| "loss": 0.4553, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.49177949709864605, |
| "grad_norm": 0.4477396607398987, |
| "learning_rate": 4.931978468281018e-06, |
| "loss": 0.4754, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.4922630560928433, |
| "grad_norm": 0.5088204145431519, |
| "learning_rate": 4.9318305476222074e-06, |
| "loss": 0.4628, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.4927466150870406, |
| "grad_norm": 0.43095219135284424, |
| "learning_rate": 4.931682468525863e-06, |
| "loss": 0.4791, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.4932301740812379, |
| "grad_norm": 0.5227051377296448, |
| "learning_rate": 4.931534231001629e-06, |
| "loss": 0.4715, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.4937137330754352, |
| "grad_norm": 0.44510793685913086, |
| "learning_rate": 4.931385835059167e-06, |
| "loss": 0.4518, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.4941972920696325, |
| "grad_norm": 0.4213857054710388, |
| "learning_rate": 4.9312372807081424e-06, |
| "loss": 0.4612, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.4946808510638298, |
| "grad_norm": 0.42244675755500793, |
| "learning_rate": 4.9310885679582355e-06, |
| "loss": 0.4554, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.4951644100580271, |
| "grad_norm": 0.5022426843643188, |
| "learning_rate": 4.930939696819135e-06, |
| "loss": 0.4664, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.49564796905222436, |
| "grad_norm": 0.45399320125579834, |
| "learning_rate": 4.930790667300539e-06, |
| "loss": 0.4684, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.4961315280464217, |
| "grad_norm": 0.52347731590271, |
| "learning_rate": 4.930641479412157e-06, |
| "loss": 0.4799, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.49661508704061896, |
| "grad_norm": 0.4325565993785858, |
| "learning_rate": 4.93049213316371e-06, |
| "loss": 0.4864, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.49709864603481624, |
| "grad_norm": 0.4530152380466461, |
| "learning_rate": 4.930342628564928e-06, |
| "loss": 0.4495, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.4975822050290135, |
| "grad_norm": 0.4562103748321533, |
| "learning_rate": 4.930192965625551e-06, |
| "loss": 0.4778, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.49806576402321084, |
| "grad_norm": 0.4942949116230011, |
| "learning_rate": 4.9300431443553295e-06, |
| "loss": 0.4752, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.4985493230174081, |
| "grad_norm": 0.4768705368041992, |
| "learning_rate": 4.929893164764025e-06, |
| "loss": 0.4871, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.4990328820116054, |
| "grad_norm": 0.41821593046188354, |
| "learning_rate": 4.929743026861409e-06, |
| "loss": 0.4548, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.4995164410058027, |
| "grad_norm": 0.4341305196285248, |
| "learning_rate": 4.929592730657262e-06, |
| "loss": 0.4626, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.41584035754203796, |
| "learning_rate": 4.929442276161378e-06, |
| "loss": 0.4544, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.5004835589941973, |
| "grad_norm": 0.42965272068977356, |
| "learning_rate": 4.929291663383559e-06, |
| "loss": 0.4483, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.5009671179883946, |
| "grad_norm": 0.40160828828811646, |
| "learning_rate": 4.929140892333616e-06, |
| "loss": 0.4482, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.5014506769825918, |
| "grad_norm": 0.4636249840259552, |
| "learning_rate": 4.928989963021373e-06, |
| "loss": 0.4652, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.5019342359767892, |
| "grad_norm": 0.4661104083061218, |
| "learning_rate": 4.928838875456664e-06, |
| "loss": 0.4787, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.5024177949709865, |
| "grad_norm": 0.47609642148017883, |
| "learning_rate": 4.928687629649331e-06, |
| "loss": 0.4709, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.5029013539651838, |
| "grad_norm": 0.45435553789138794, |
| "learning_rate": 4.92853622560923e-06, |
| "loss": 0.4614, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.503384912959381, |
| "grad_norm": 0.4226183295249939, |
| "learning_rate": 4.928384663346223e-06, |
| "loss": 0.4724, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.5038684719535783, |
| "grad_norm": 0.4644449055194855, |
| "learning_rate": 4.9282329428701865e-06, |
| "loss": 0.4677, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.5043520309477756, |
| "grad_norm": 0.46138256788253784, |
| "learning_rate": 4.928081064191004e-06, |
| "loss": 0.4755, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.504835589941973, |
| "grad_norm": 0.6464718580245972, |
| "learning_rate": 4.92792902731857e-06, |
| "loss": 0.4525, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.5053191489361702, |
| "grad_norm": 0.4627712666988373, |
| "learning_rate": 4.927776832262792e-06, |
| "loss": 0.4712, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.5058027079303675, |
| "grad_norm": 0.44957953691482544, |
| "learning_rate": 4.9276244790335844e-06, |
| "loss": 0.448, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.5062862669245648, |
| "grad_norm": 0.44223344326019287, |
| "learning_rate": 4.927471967640873e-06, |
| "loss": 0.4597, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.5067698259187621, |
| "grad_norm": 0.47617247700691223, |
| "learning_rate": 4.927319298094596e-06, |
| "loss": 0.4704, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.5072533849129593, |
| "grad_norm": 0.4532371461391449, |
| "learning_rate": 4.927166470404698e-06, |
| "loss": 0.4833, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.5077369439071566, |
| "grad_norm": 0.4266747832298279, |
| "learning_rate": 4.9270134845811355e-06, |
| "loss": 0.455, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.508220502901354, |
| "grad_norm": 0.4532632529735565, |
| "learning_rate": 4.926860340633879e-06, |
| "loss": 0.4707, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.5087040618955513, |
| "grad_norm": 0.45259150862693787, |
| "learning_rate": 4.926707038572903e-06, |
| "loss": 0.467, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.5091876208897486, |
| "grad_norm": 0.4061692953109741, |
| "learning_rate": 4.9265535784081965e-06, |
| "loss": 0.4461, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.5096711798839458, |
| "grad_norm": 0.4721916913986206, |
| "learning_rate": 4.926399960149757e-06, |
| "loss": 0.4555, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.5101547388781431, |
| "grad_norm": 0.4325840473175049, |
| "learning_rate": 4.926246183807593e-06, |
| "loss": 0.4696, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.5106382978723404, |
| "grad_norm": 0.4277395009994507, |
| "learning_rate": 4.926092249391725e-06, |
| "loss": 0.4773, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.5111218568665378, |
| "grad_norm": 0.4312560260295868, |
| "learning_rate": 4.925938156912181e-06, |
| "loss": 0.4863, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.511605415860735, |
| "grad_norm": 1.3537847995758057, |
| "learning_rate": 4.925783906379e-06, |
| "loss": 0.4593, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.5120889748549323, |
| "grad_norm": 0.4193797707557678, |
| "learning_rate": 4.925629497802232e-06, |
| "loss": 0.4708, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.5125725338491296, |
| "grad_norm": 0.4714594781398773, |
| "learning_rate": 4.9254749311919355e-06, |
| "loss": 0.4675, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.5130560928433269, |
| "grad_norm": 0.44514137506484985, |
| "learning_rate": 4.925320206558184e-06, |
| "loss": 0.4916, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.5135396518375241, |
| "grad_norm": 0.4270736575126648, |
| "learning_rate": 4.9251653239110555e-06, |
| "loss": 0.488, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.5140232108317214, |
| "grad_norm": 0.43738606572151184, |
| "learning_rate": 4.925010283260641e-06, |
| "loss": 0.4749, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.5145067698259188, |
| "grad_norm": 0.5914368629455566, |
| "learning_rate": 4.924855084617042e-06, |
| "loss": 0.4686, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.5149903288201161, |
| "grad_norm": 0.5277994275093079, |
| "learning_rate": 4.92469972799037e-06, |
| "loss": 0.4839, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.5154738878143134, |
| "grad_norm": 0.5200470685958862, |
| "learning_rate": 4.9245442133907475e-06, |
| "loss": 0.4735, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.5159574468085106, |
| "grad_norm": 0.4452257454395294, |
| "learning_rate": 4.924388540828305e-06, |
| "loss": 0.4609, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.5164410058027079, |
| "grad_norm": 0.5149017572402954, |
| "learning_rate": 4.924232710313187e-06, |
| "loss": 0.4712, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.5169245647969052, |
| "grad_norm": 0.41530728340148926, |
| "learning_rate": 4.924076721855544e-06, |
| "loss": 0.4748, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.5174081237911026, |
| "grad_norm": 0.45812344551086426, |
| "learning_rate": 4.923920575465539e-06, |
| "loss": 0.4664, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.5178916827852998, |
| "grad_norm": 0.4297925531864166, |
| "learning_rate": 4.923764271153346e-06, |
| "loss": 0.4812, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.5183752417794971, |
| "grad_norm": 0.43673601746559143, |
| "learning_rate": 4.923607808929149e-06, |
| "loss": 0.4585, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.5188588007736944, |
| "grad_norm": 0.40688005089759827, |
| "learning_rate": 4.92345118880314e-06, |
| "loss": 0.4898, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.5193423597678917, |
| "grad_norm": 0.4260459244251251, |
| "learning_rate": 4.923294410785525e-06, |
| "loss": 0.4628, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.519825918762089, |
| "grad_norm": 0.44062745571136475, |
| "learning_rate": 4.923137474886517e-06, |
| "loss": 0.4706, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.5203094777562862, |
| "grad_norm": 0.6602137088775635, |
| "learning_rate": 4.92298038111634e-06, |
| "loss": 0.4732, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.5207930367504836, |
| "grad_norm": 0.4522308111190796, |
| "learning_rate": 4.922823129485231e-06, |
| "loss": 0.491, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.5212765957446809, |
| "grad_norm": 0.4215511381626129, |
| "learning_rate": 4.9226657200034335e-06, |
| "loss": 0.4792, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.5217601547388782, |
| "grad_norm": 0.44352343678474426, |
| "learning_rate": 4.922508152681205e-06, |
| "loss": 0.4434, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.5222437137330754, |
| "grad_norm": 0.4431546926498413, |
| "learning_rate": 4.922350427528808e-06, |
| "loss": 0.4652, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5227272727272727, |
| "grad_norm": 0.4811055064201355, |
| "learning_rate": 4.922192544556521e-06, |
| "loss": 0.479, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.52321083172147, |
| "grad_norm": 0.6013498902320862, |
| "learning_rate": 4.922034503774629e-06, |
| "loss": 0.4493, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.5236943907156673, |
| "grad_norm": 0.5949416756629944, |
| "learning_rate": 4.921876305193431e-06, |
| "loss": 0.4614, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.5241779497098646, |
| "grad_norm": 0.4291402995586395, |
| "learning_rate": 4.9217179488232315e-06, |
| "loss": 0.4946, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.5246615087040619, |
| "grad_norm": 0.42322617769241333, |
| "learning_rate": 4.921559434674348e-06, |
| "loss": 0.4616, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.5251450676982592, |
| "grad_norm": 0.4919283390045166, |
| "learning_rate": 4.921400762757108e-06, |
| "loss": 0.4372, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.5256286266924565, |
| "grad_norm": 0.5418685078620911, |
| "learning_rate": 4.92124193308185e-06, |
| "loss": 0.486, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.5261121856866537, |
| "grad_norm": 0.4854177236557007, |
| "learning_rate": 4.921082945658922e-06, |
| "loss": 0.4812, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.526595744680851, |
| "grad_norm": 0.46961551904678345, |
| "learning_rate": 4.92092380049868e-06, |
| "loss": 0.4834, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.5270793036750484, |
| "grad_norm": 0.5009759664535522, |
| "learning_rate": 4.920764497611496e-06, |
| "loss": 0.4734, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.5275628626692457, |
| "grad_norm": 0.48609328269958496, |
| "learning_rate": 4.9206050370077464e-06, |
| "loss": 0.4739, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.528046421663443, |
| "grad_norm": 0.4356881082057953, |
| "learning_rate": 4.920445418697821e-06, |
| "loss": 0.473, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.5285299806576402, |
| "grad_norm": 0.4519873559474945, |
| "learning_rate": 4.9202856426921195e-06, |
| "loss": 0.4583, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.5290135396518375, |
| "grad_norm": 0.42688310146331787, |
| "learning_rate": 4.920125709001051e-06, |
| "loss": 0.4435, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.5294970986460348, |
| "grad_norm": 0.4380148649215698, |
| "learning_rate": 4.9199656176350354e-06, |
| "loss": 0.4768, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.5299806576402321, |
| "grad_norm": 0.4532415568828583, |
| "learning_rate": 4.9198053686045044e-06, |
| "loss": 0.4712, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.5304642166344294, |
| "grad_norm": 0.44293212890625, |
| "learning_rate": 4.919644961919896e-06, |
| "loss": 0.4499, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.5309477756286267, |
| "grad_norm": 0.45117902755737305, |
| "learning_rate": 4.919484397591663e-06, |
| "loss": 0.4719, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.531431334622824, |
| "grad_norm": 0.43399882316589355, |
| "learning_rate": 4.9193236756302654e-06, |
| "loss": 0.4739, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.5319148936170213, |
| "grad_norm": 0.4192769527435303, |
| "learning_rate": 4.9191627960461756e-06, |
| "loss": 0.458, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5323984526112185, |
| "grad_norm": 0.4767812192440033, |
| "learning_rate": 4.919001758849873e-06, |
| "loss": 0.4871, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.5328820116054158, |
| "grad_norm": 0.47003915905952454, |
| "learning_rate": 4.918840564051851e-06, |
| "loss": 0.477, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.5333655705996132, |
| "grad_norm": 0.42697975039482117, |
| "learning_rate": 4.918679211662613e-06, |
| "loss": 0.4792, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.5338491295938105, |
| "grad_norm": 0.44821104407310486, |
| "learning_rate": 4.918517701692668e-06, |
| "loss": 0.4609, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.5343326885880078, |
| "grad_norm": 0.47149497270584106, |
| "learning_rate": 4.91835603415254e-06, |
| "loss": 0.4549, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.534816247582205, |
| "grad_norm": 0.4447268843650818, |
| "learning_rate": 4.918194209052764e-06, |
| "loss": 0.4653, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.5352998065764023, |
| "grad_norm": 0.44459810853004456, |
| "learning_rate": 4.9180322264038805e-06, |
| "loss": 0.4749, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.5357833655705996, |
| "grad_norm": 0.4288979768753052, |
| "learning_rate": 4.917870086216443e-06, |
| "loss": 0.4628, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.5362669245647969, |
| "grad_norm": 0.4439257085323334, |
| "learning_rate": 4.917707788501017e-06, |
| "loss": 0.4751, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.5367504835589942, |
| "grad_norm": 0.43748125433921814, |
| "learning_rate": 4.917545333268176e-06, |
| "loss": 0.4565, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5372340425531915, |
| "grad_norm": 0.5199353694915771, |
| "learning_rate": 4.917382720528503e-06, |
| "loss": 0.4612, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.5377176015473888, |
| "grad_norm": 0.5482754111289978, |
| "learning_rate": 4.917219950292593e-06, |
| "loss": 0.4731, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.5382011605415861, |
| "grad_norm": 0.49058425426483154, |
| "learning_rate": 4.917057022571052e-06, |
| "loss": 0.4473, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.5386847195357833, |
| "grad_norm": 0.41992440819740295, |
| "learning_rate": 4.9168939373744926e-06, |
| "loss": 0.487, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.5391682785299806, |
| "grad_norm": 0.4376087486743927, |
| "learning_rate": 4.916730694713542e-06, |
| "loss": 0.4837, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.539651837524178, |
| "grad_norm": 0.44848495721817017, |
| "learning_rate": 4.916567294598835e-06, |
| "loss": 0.4742, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.5401353965183753, |
| "grad_norm": 0.6971132159233093, |
| "learning_rate": 4.916403737041018e-06, |
| "loss": 0.4804, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.5406189555125726, |
| "grad_norm": 0.45096465945243835, |
| "learning_rate": 4.916240022050746e-06, |
| "loss": 0.4502, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.5411025145067698, |
| "grad_norm": 0.4433208703994751, |
| "learning_rate": 4.916076149638686e-06, |
| "loss": 0.4732, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.5415860735009671, |
| "grad_norm": 0.44899240136146545, |
| "learning_rate": 4.915912119815513e-06, |
| "loss": 0.4709, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5420696324951644, |
| "grad_norm": 0.4663994610309601, |
| "learning_rate": 4.915747932591916e-06, |
| "loss": 0.4566, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.5425531914893617, |
| "grad_norm": 0.42710351943969727, |
| "learning_rate": 4.915583587978591e-06, |
| "loss": 0.4637, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.543036750483559, |
| "grad_norm": 0.4772963523864746, |
| "learning_rate": 4.915419085986246e-06, |
| "loss": 0.468, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.5435203094777563, |
| "grad_norm": 0.4386994540691376, |
| "learning_rate": 4.915254426625597e-06, |
| "loss": 0.4963, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.5440038684719536, |
| "grad_norm": 0.4515167772769928, |
| "learning_rate": 4.915089609907374e-06, |
| "loss": 0.4943, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.5444874274661509, |
| "grad_norm": 0.4320240020751953, |
| "learning_rate": 4.914924635842314e-06, |
| "loss": 0.4656, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.5449709864603481, |
| "grad_norm": 0.47680628299713135, |
| "learning_rate": 4.914759504441165e-06, |
| "loss": 0.471, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 0.46807920932769775, |
| "learning_rate": 4.914594215714685e-06, |
| "loss": 0.454, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.5459381044487428, |
| "grad_norm": 0.48494380712509155, |
| "learning_rate": 4.914428769673644e-06, |
| "loss": 0.473, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.5464216634429401, |
| "grad_norm": 0.4585503935813904, |
| "learning_rate": 4.91426316632882e-06, |
| "loss": 0.4757, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5469052224371374, |
| "grad_norm": 0.4581963121891022, |
| "learning_rate": 4.914097405691004e-06, |
| "loss": 0.4895, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.5473887814313346, |
| "grad_norm": 0.4545809328556061, |
| "learning_rate": 4.913931487770994e-06, |
| "loss": 0.4562, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.5478723404255319, |
| "grad_norm": 0.4689292907714844, |
| "learning_rate": 4.913765412579601e-06, |
| "loss": 0.4663, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.5483558994197292, |
| "grad_norm": 0.4483727216720581, |
| "learning_rate": 4.9135991801276435e-06, |
| "loss": 0.4647, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.5488394584139265, |
| "grad_norm": 0.5269478559494019, |
| "learning_rate": 4.9134327904259525e-06, |
| "loss": 0.463, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.5493230174081238, |
| "grad_norm": 0.4482375979423523, |
| "learning_rate": 4.91326624348537e-06, |
| "loss": 0.4689, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.5498065764023211, |
| "grad_norm": 0.5002569556236267, |
| "learning_rate": 4.913099539316744e-06, |
| "loss": 0.4805, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.5502901353965184, |
| "grad_norm": 0.48099225759506226, |
| "learning_rate": 4.912932677930939e-06, |
| "loss": 0.4671, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.5507736943907157, |
| "grad_norm": 0.42564383149147034, |
| "learning_rate": 4.912765659338823e-06, |
| "loss": 0.4614, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.5512572533849129, |
| "grad_norm": 0.4481738209724426, |
| "learning_rate": 4.912598483551279e-06, |
| "loss": 0.4674, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5517408123791102, |
| "grad_norm": 0.42052412033081055, |
| "learning_rate": 4.9124311505792e-06, |
| "loss": 0.457, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.5522243713733076, |
| "grad_norm": 0.4620019793510437, |
| "learning_rate": 4.912263660433485e-06, |
| "loss": 0.4756, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.5527079303675049, |
| "grad_norm": 0.46690240502357483, |
| "learning_rate": 4.912096013125048e-06, |
| "loss": 0.4696, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.5531914893617021, |
| "grad_norm": 0.4311160743236542, |
| "learning_rate": 4.911928208664813e-06, |
| "loss": 0.4662, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.5536750483558994, |
| "grad_norm": 0.4894680678844452, |
| "learning_rate": 4.911760247063709e-06, |
| "loss": 0.462, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.5541586073500967, |
| "grad_norm": 0.4718480706214905, |
| "learning_rate": 4.9115921283326814e-06, |
| "loss": 0.4586, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.554642166344294, |
| "grad_norm": 0.44869405031204224, |
| "learning_rate": 4.911423852482684e-06, |
| "loss": 0.453, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.5551257253384912, |
| "grad_norm": 0.436161607503891, |
| "learning_rate": 4.9112554195246785e-06, |
| "loss": 0.4875, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.5556092843326886, |
| "grad_norm": 0.44257354736328125, |
| "learning_rate": 4.91108682946964e-06, |
| "loss": 0.4666, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.5560928433268859, |
| "grad_norm": 0.4438575506210327, |
| "learning_rate": 4.910918082328552e-06, |
| "loss": 0.4633, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5565764023210832, |
| "grad_norm": 0.4686394929885864, |
| "learning_rate": 4.910749178112407e-06, |
| "loss": 0.466, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.5570599613152805, |
| "grad_norm": 0.45705240964889526, |
| "learning_rate": 4.910580116832212e-06, |
| "loss": 0.4821, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.5575435203094777, |
| "grad_norm": 0.458423376083374, |
| "learning_rate": 4.91041089849898e-06, |
| "loss": 0.4537, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.558027079303675, |
| "grad_norm": 0.45928600430488586, |
| "learning_rate": 4.910241523123736e-06, |
| "loss": 0.4624, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.5585106382978723, |
| "grad_norm": 0.44112783670425415, |
| "learning_rate": 4.910071990717516e-06, |
| "loss": 0.4542, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.5589941972920697, |
| "grad_norm": 0.5385165214538574, |
| "learning_rate": 4.909902301291364e-06, |
| "loss": 0.4553, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.559477756286267, |
| "grad_norm": 0.45208409428596497, |
| "learning_rate": 4.909732454856336e-06, |
| "loss": 0.4694, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.5599613152804642, |
| "grad_norm": 0.43043410778045654, |
| "learning_rate": 4.909562451423498e-06, |
| "loss": 0.486, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.5604448742746615, |
| "grad_norm": 0.5170729756355286, |
| "learning_rate": 4.909392291003926e-06, |
| "loss": 0.4555, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.5609284332688588, |
| "grad_norm": 0.5027633309364319, |
| "learning_rate": 4.909221973608705e-06, |
| "loss": 0.446, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.561411992263056, |
| "grad_norm": 0.5824642777442932, |
| "learning_rate": 4.909051499248934e-06, |
| "loss": 0.4772, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.5618955512572534, |
| "grad_norm": 0.42544853687286377, |
| "learning_rate": 4.908880867935717e-06, |
| "loss": 0.47, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.5623791102514507, |
| "grad_norm": 0.42126137018203735, |
| "learning_rate": 4.908710079680173e-06, |
| "loss": 0.4701, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.562862669245648, |
| "grad_norm": 0.432250052690506, |
| "learning_rate": 4.908539134493428e-06, |
| "loss": 0.4603, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.5633462282398453, |
| "grad_norm": 0.43167534470558167, |
| "learning_rate": 4.908368032386619e-06, |
| "loss": 0.4692, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.5638297872340425, |
| "grad_norm": 0.4323950707912445, |
| "learning_rate": 4.9081967733708945e-06, |
| "loss": 0.4536, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.5643133462282398, |
| "grad_norm": 0.42598477005958557, |
| "learning_rate": 4.908025357457412e-06, |
| "loss": 0.4628, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.5647969052224371, |
| "grad_norm": 0.445354163646698, |
| "learning_rate": 4.907853784657339e-06, |
| "loss": 0.4731, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.5652804642166345, |
| "grad_norm": 0.48324230313301086, |
| "learning_rate": 4.907682054981855e-06, |
| "loss": 0.4568, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.5657640232108317, |
| "grad_norm": 0.4197344481945038, |
| "learning_rate": 4.9075101684421474e-06, |
| "loss": 0.4668, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.566247582205029, |
| "grad_norm": 0.5068573355674744, |
| "learning_rate": 4.907338125049415e-06, |
| "loss": 0.4607, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.5667311411992263, |
| "grad_norm": 0.57051682472229, |
| "learning_rate": 4.907165924814866e-06, |
| "loss": 0.46, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.5672147001934236, |
| "grad_norm": 0.48964497447013855, |
| "learning_rate": 4.9069935677497206e-06, |
| "loss": 0.4494, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.5676982591876208, |
| "grad_norm": 0.4429013431072235, |
| "learning_rate": 4.906821053865208e-06, |
| "loss": 0.4689, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.5681818181818182, |
| "grad_norm": 0.5011411905288696, |
| "learning_rate": 4.906648383172567e-06, |
| "loss": 0.478, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.5686653771760155, |
| "grad_norm": 0.4188624620437622, |
| "learning_rate": 4.906475555683049e-06, |
| "loss": 0.4657, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.5691489361702128, |
| "grad_norm": 0.4383828341960907, |
| "learning_rate": 4.9063025714079125e-06, |
| "loss": 0.4604, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.5696324951644101, |
| "grad_norm": 0.4345352351665497, |
| "learning_rate": 4.906129430358428e-06, |
| "loss": 0.4467, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.5701160541586073, |
| "grad_norm": 0.46759259700775146, |
| "learning_rate": 4.905956132545876e-06, |
| "loss": 0.4553, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.5705996131528046, |
| "grad_norm": 0.4374469518661499, |
| "learning_rate": 4.905782677981546e-06, |
| "loss": 0.4414, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5710831721470019, |
| "grad_norm": 0.553343653678894, |
| "learning_rate": 4.905609066676742e-06, |
| "loss": 0.4467, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.5715667311411993, |
| "grad_norm": 0.49388110637664795, |
| "learning_rate": 4.905435298642771e-06, |
| "loss": 0.4395, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.5720502901353965, |
| "grad_norm": 0.4327850341796875, |
| "learning_rate": 4.905261373890958e-06, |
| "loss": 0.4563, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.5725338491295938, |
| "grad_norm": 0.47157958149909973, |
| "learning_rate": 4.905087292432632e-06, |
| "loss": 0.4556, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.5730174081237911, |
| "grad_norm": 0.4458956718444824, |
| "learning_rate": 4.904913054279136e-06, |
| "loss": 0.4573, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.5735009671179884, |
| "grad_norm": 0.4228770434856415, |
| "learning_rate": 4.90473865944182e-06, |
| "loss": 0.4671, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.5739845261121856, |
| "grad_norm": 0.7752367854118347, |
| "learning_rate": 4.904564107932048e-06, |
| "loss": 0.4719, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.574468085106383, |
| "grad_norm": 0.49437078833580017, |
| "learning_rate": 4.904389399761192e-06, |
| "loss": 0.4663, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.5749516441005803, |
| "grad_norm": 0.4362508952617645, |
| "learning_rate": 4.9042145349406335e-06, |
| "loss": 0.464, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.5754352030947776, |
| "grad_norm": 0.4986853003501892, |
| "learning_rate": 4.9040395134817666e-06, |
| "loss": 0.4408, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5759187620889749, |
| "grad_norm": 0.5251513719558716, |
| "learning_rate": 4.9038643353959935e-06, |
| "loss": 0.4656, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.5764023210831721, |
| "grad_norm": 1.008799433708191, |
| "learning_rate": 4.903689000694727e-06, |
| "loss": 0.4876, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.5768858800773694, |
| "grad_norm": 0.47108638286590576, |
| "learning_rate": 4.903513509389391e-06, |
| "loss": 0.4801, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.5773694390715667, |
| "grad_norm": 0.43808120489120483, |
| "learning_rate": 4.903337861491418e-06, |
| "loss": 0.4584, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.5778529980657641, |
| "grad_norm": 0.5051426887512207, |
| "learning_rate": 4.903162057012254e-06, |
| "loss": 0.469, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.5783365570599613, |
| "grad_norm": 0.5038641691207886, |
| "learning_rate": 4.9029860959633504e-06, |
| "loss": 0.4615, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.5788201160541586, |
| "grad_norm": 0.49047771096229553, |
| "learning_rate": 4.902809978356173e-06, |
| "loss": 0.4678, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.5793036750483559, |
| "grad_norm": 0.4524144232273102, |
| "learning_rate": 4.902633704202196e-06, |
| "loss": 0.4773, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.5797872340425532, |
| "grad_norm": 0.4350757598876953, |
| "learning_rate": 4.9024572735129026e-06, |
| "loss": 0.4725, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.5802707930367504, |
| "grad_norm": 0.4484714865684509, |
| "learning_rate": 4.902280686299789e-06, |
| "loss": 0.454, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5807543520309478, |
| "grad_norm": 0.429993212223053, |
| "learning_rate": 4.902103942574358e-06, |
| "loss": 0.4608, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.5812379110251451, |
| "grad_norm": 0.4781748652458191, |
| "learning_rate": 4.901927042348128e-06, |
| "loss": 0.4678, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.5817214700193424, |
| "grad_norm": 0.41221436858177185, |
| "learning_rate": 4.901749985632622e-06, |
| "loss": 0.4516, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.5822050290135397, |
| "grad_norm": 0.5129284262657166, |
| "learning_rate": 4.901572772439376e-06, |
| "loss": 0.4649, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.5826885880077369, |
| "grad_norm": 0.4145076870918274, |
| "learning_rate": 4.901395402779936e-06, |
| "loss": 0.4341, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.5831721470019342, |
| "grad_norm": 0.4602621793746948, |
| "learning_rate": 4.901217876665858e-06, |
| "loss": 0.4457, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.5836557059961315, |
| "grad_norm": 0.4283426105976105, |
| "learning_rate": 4.9010401941087074e-06, |
| "loss": 0.4755, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.5841392649903289, |
| "grad_norm": 0.4351952373981476, |
| "learning_rate": 4.900862355120061e-06, |
| "loss": 0.4692, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.5846228239845261, |
| "grad_norm": 0.4370359182357788, |
| "learning_rate": 4.900684359711505e-06, |
| "loss": 0.4749, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.5851063829787234, |
| "grad_norm": 0.4324999749660492, |
| "learning_rate": 4.900506207894637e-06, |
| "loss": 0.4791, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5855899419729207, |
| "grad_norm": 0.4950762391090393, |
| "learning_rate": 4.900327899681064e-06, |
| "loss": 0.4744, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.586073500967118, |
| "grad_norm": 0.44710519909858704, |
| "learning_rate": 4.9001494350824e-06, |
| "loss": 0.4681, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.5865570599613152, |
| "grad_norm": 0.4250546395778656, |
| "learning_rate": 4.899970814110276e-06, |
| "loss": 0.4687, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.5870406189555126, |
| "grad_norm": 0.5042000412940979, |
| "learning_rate": 4.899792036776327e-06, |
| "loss": 0.4839, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.5875241779497099, |
| "grad_norm": 0.5127058625221252, |
| "learning_rate": 4.899613103092202e-06, |
| "loss": 0.4536, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.5880077369439072, |
| "grad_norm": 0.4671849310398102, |
| "learning_rate": 4.899434013069558e-06, |
| "loss": 0.4656, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.5884912959381045, |
| "grad_norm": 0.4258441925048828, |
| "learning_rate": 4.899254766720064e-06, |
| "loss": 0.4626, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.5889748549323017, |
| "grad_norm": 0.46578940749168396, |
| "learning_rate": 4.899075364055398e-06, |
| "loss": 0.4835, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.589458413926499, |
| "grad_norm": 0.461418479681015, |
| "learning_rate": 4.898895805087247e-06, |
| "loss": 0.4676, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.5899419729206963, |
| "grad_norm": 0.4375152885913849, |
| "learning_rate": 4.89871608982731e-06, |
| "loss": 0.4608, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5904255319148937, |
| "grad_norm": 0.47547534108161926, |
| "learning_rate": 4.898536218287296e-06, |
| "loss": 0.4752, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.5909090909090909, |
| "grad_norm": 0.46386417746543884, |
| "learning_rate": 4.898356190478925e-06, |
| "loss": 0.4508, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.5913926499032882, |
| "grad_norm": 0.4304327070713043, |
| "learning_rate": 4.898176006413925e-06, |
| "loss": 0.4706, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.5918762088974855, |
| "grad_norm": 0.6741103529930115, |
| "learning_rate": 4.897995666104035e-06, |
| "loss": 0.45, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.5923597678916828, |
| "grad_norm": 0.4445508122444153, |
| "learning_rate": 4.897815169561005e-06, |
| "loss": 0.4373, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.59284332688588, |
| "grad_norm": 0.6164776682853699, |
| "learning_rate": 4.897634516796595e-06, |
| "loss": 0.4469, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.5933268858800773, |
| "grad_norm": 0.4640771746635437, |
| "learning_rate": 4.897453707822574e-06, |
| "loss": 0.4671, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.5938104448742747, |
| "grad_norm": 0.5377663969993591, |
| "learning_rate": 4.897272742650722e-06, |
| "loss": 0.461, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.594294003868472, |
| "grad_norm": 0.4565662443637848, |
| "learning_rate": 4.8970916212928295e-06, |
| "loss": 0.4685, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.5947775628626693, |
| "grad_norm": 0.5152607560157776, |
| "learning_rate": 4.896910343760697e-06, |
| "loss": 0.4663, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5952611218568665, |
| "grad_norm": 0.4439282715320587, |
| "learning_rate": 4.896728910066136e-06, |
| "loss": 0.4623, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.5957446808510638, |
| "grad_norm": 0.44906753301620483, |
| "learning_rate": 4.896547320220964e-06, |
| "loss": 0.4712, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.5962282398452611, |
| "grad_norm": 0.43596217036247253, |
| "learning_rate": 4.896365574237014e-06, |
| "loss": 0.4727, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.5967117988394585, |
| "grad_norm": 0.6117439866065979, |
| "learning_rate": 4.896183672126128e-06, |
| "loss": 0.4652, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.5971953578336557, |
| "grad_norm": 0.5615205764770508, |
| "learning_rate": 4.8960016139001555e-06, |
| "loss": 0.4608, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.597678916827853, |
| "grad_norm": 0.4595431089401245, |
| "learning_rate": 4.895819399570958e-06, |
| "loss": 0.4447, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.5981624758220503, |
| "grad_norm": 0.5729189515113831, |
| "learning_rate": 4.895637029150408e-06, |
| "loss": 0.4697, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.5986460348162476, |
| "grad_norm": 0.4351283311843872, |
| "learning_rate": 4.895454502650388e-06, |
| "loss": 0.4817, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.5991295938104448, |
| "grad_norm": 0.42649757862091064, |
| "learning_rate": 4.895271820082787e-06, |
| "loss": 0.4753, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.5996131528046421, |
| "grad_norm": 0.5059411525726318, |
| "learning_rate": 4.895088981459509e-06, |
| "loss": 0.4824, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.6000967117988395, |
| "grad_norm": 0.46130621433258057, |
| "learning_rate": 4.894905986792465e-06, |
| "loss": 0.4661, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.6005802707930368, |
| "grad_norm": 0.4530123770236969, |
| "learning_rate": 4.8947228360935795e-06, |
| "loss": 0.4606, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.601063829787234, |
| "grad_norm": 0.4376404583454132, |
| "learning_rate": 4.894539529374784e-06, |
| "loss": 0.4622, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.6015473887814313, |
| "grad_norm": 0.6113477945327759, |
| "learning_rate": 4.894356066648021e-06, |
| "loss": 0.4983, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.6020309477756286, |
| "grad_norm": 0.44446712732315063, |
| "learning_rate": 4.894172447925242e-06, |
| "loss": 0.4529, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.6025145067698259, |
| "grad_norm": 0.43907102942466736, |
| "learning_rate": 4.8939886732184125e-06, |
| "loss": 0.4532, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.6029980657640233, |
| "grad_norm": 0.43806588649749756, |
| "learning_rate": 4.893804742539505e-06, |
| "loss": 0.4707, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.6034816247582205, |
| "grad_norm": 0.4340677857398987, |
| "learning_rate": 4.893620655900502e-06, |
| "loss": 0.4455, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.6039651837524178, |
| "grad_norm": 0.45514774322509766, |
| "learning_rate": 4.893436413313398e-06, |
| "loss": 0.4786, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.6044487427466151, |
| "grad_norm": 0.5131993889808655, |
| "learning_rate": 4.893252014790195e-06, |
| "loss": 0.458, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.6049323017408124, |
| "grad_norm": 0.4115256667137146, |
| "learning_rate": 4.893067460342909e-06, |
| "loss": 0.4427, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.6054158607350096, |
| "grad_norm": 0.4817465543746948, |
| "learning_rate": 4.892882749983564e-06, |
| "loss": 0.4613, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.6058994197292069, |
| "grad_norm": 0.45432931184768677, |
| "learning_rate": 4.892697883724193e-06, |
| "loss": 0.4818, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.6063829787234043, |
| "grad_norm": 0.47187644243240356, |
| "learning_rate": 4.892512861576841e-06, |
| "loss": 0.4666, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.6068665377176016, |
| "grad_norm": 0.4426283538341522, |
| "learning_rate": 4.89232768355356e-06, |
| "loss": 0.475, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.6073500967117988, |
| "grad_norm": 0.4366290867328644, |
| "learning_rate": 4.892142349666418e-06, |
| "loss": 0.4646, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.6078336557059961, |
| "grad_norm": 0.4257701635360718, |
| "learning_rate": 4.891956859927489e-06, |
| "loss": 0.4656, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.6083172147001934, |
| "grad_norm": 0.4350696802139282, |
| "learning_rate": 4.891771214348857e-06, |
| "loss": 0.4552, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.6088007736943907, |
| "grad_norm": 0.4178631603717804, |
| "learning_rate": 4.891585412942617e-06, |
| "loss": 0.4759, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.6092843326885881, |
| "grad_norm": 0.43329527974128723, |
| "learning_rate": 4.8913994557208756e-06, |
| "loss": 0.4675, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.6097678916827853, |
| "grad_norm": 0.46253421902656555, |
| "learning_rate": 4.891213342695747e-06, |
| "loss": 0.4872, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.6102514506769826, |
| "grad_norm": 0.4532800316810608, |
| "learning_rate": 4.891027073879357e-06, |
| "loss": 0.4829, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.6107350096711799, |
| "grad_norm": 0.4343501925468445, |
| "learning_rate": 4.890840649283843e-06, |
| "loss": 0.4603, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.6112185686653772, |
| "grad_norm": 0.6483604907989502, |
| "learning_rate": 4.890654068921347e-06, |
| "loss": 0.4418, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.6117021276595744, |
| "grad_norm": 0.45191410183906555, |
| "learning_rate": 4.890467332804029e-06, |
| "loss": 0.4588, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.6121856866537717, |
| "grad_norm": 0.4154477119445801, |
| "learning_rate": 4.890280440944053e-06, |
| "loss": 0.4415, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.6126692456479691, |
| "grad_norm": 0.46999967098236084, |
| "learning_rate": 4.890093393353596e-06, |
| "loss": 0.469, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.6131528046421664, |
| "grad_norm": 0.4785558879375458, |
| "learning_rate": 4.889906190044843e-06, |
| "loss": 0.4627, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.6136363636363636, |
| "grad_norm": 0.43093442916870117, |
| "learning_rate": 4.889718831029993e-06, |
| "loss": 0.4383, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.6141199226305609, |
| "grad_norm": 0.43612828850746155, |
| "learning_rate": 4.889531316321251e-06, |
| "loss": 0.4759, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.6146034816247582, |
| "grad_norm": 0.40855318307876587, |
| "learning_rate": 4.889343645930834e-06, |
| "loss": 0.4579, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.6150870406189555, |
| "grad_norm": 0.8085166215896606, |
| "learning_rate": 4.88915581987097e-06, |
| "loss": 0.4704, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.6155705996131529, |
| "grad_norm": 0.4581427276134491, |
| "learning_rate": 4.8889678381538954e-06, |
| "loss": 0.4727, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.6160541586073501, |
| "grad_norm": 0.44169020652770996, |
| "learning_rate": 4.888779700791858e-06, |
| "loss": 0.4643, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.6165377176015474, |
| "grad_norm": 0.42497655749320984, |
| "learning_rate": 4.8885914077971155e-06, |
| "loss": 0.4699, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.6170212765957447, |
| "grad_norm": 0.5462936162948608, |
| "learning_rate": 4.888402959181934e-06, |
| "loss": 0.4687, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.617504835589942, |
| "grad_norm": 0.4208332598209381, |
| "learning_rate": 4.888214354958592e-06, |
| "loss": 0.4694, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.6179883945841392, |
| "grad_norm": 0.41665810346603394, |
| "learning_rate": 4.888025595139377e-06, |
| "loss": 0.4729, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.6184719535783365, |
| "grad_norm": 0.45152920484542847, |
| "learning_rate": 4.887836679736588e-06, |
| "loss": 0.4486, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.6189555125725339, |
| "grad_norm": 0.42997369170188904, |
| "learning_rate": 4.887647608762533e-06, |
| "loss": 0.4452, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.6194390715667312, |
| "grad_norm": 0.49401333928108215, |
| "learning_rate": 4.88745838222953e-06, |
| "loss": 0.47, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.6199226305609284, |
| "grad_norm": 0.4673040211200714, |
| "learning_rate": 4.887269000149907e-06, |
| "loss": 0.4401, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.6204061895551257, |
| "grad_norm": 0.4323924779891968, |
| "learning_rate": 4.887079462536003e-06, |
| "loss": 0.472, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.620889748549323, |
| "grad_norm": 0.4398268759250641, |
| "learning_rate": 4.886889769400166e-06, |
| "loss": 0.4595, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.6213733075435203, |
| "grad_norm": 0.6603145599365234, |
| "learning_rate": 4.886699920754755e-06, |
| "loss": 0.46, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.6218568665377177, |
| "grad_norm": 0.4661974608898163, |
| "learning_rate": 4.88650991661214e-06, |
| "loss": 0.4592, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.6223404255319149, |
| "grad_norm": 0.45898857712745667, |
| "learning_rate": 4.886319756984699e-06, |
| "loss": 0.474, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.6228239845261122, |
| "grad_norm": 0.4523521959781647, |
| "learning_rate": 4.886129441884822e-06, |
| "loss": 0.4629, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.6233075435203095, |
| "grad_norm": 0.4217586815357208, |
| "learning_rate": 4.8859389713249076e-06, |
| "loss": 0.4734, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.6237911025145068, |
| "grad_norm": 0.47298648953437805, |
| "learning_rate": 4.885748345317365e-06, |
| "loss": 0.4841, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.624274661508704, |
| "grad_norm": 0.49093514680862427, |
| "learning_rate": 4.885557563874614e-06, |
| "loss": 0.4458, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.6247582205029013, |
| "grad_norm": 0.4375835359096527, |
| "learning_rate": 4.885366627009085e-06, |
| "loss": 0.452, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.6252417794970987, |
| "grad_norm": 0.4391387701034546, |
| "learning_rate": 4.885175534733217e-06, |
| "loss": 0.4428, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.625725338491296, |
| "grad_norm": 0.4366895854473114, |
| "learning_rate": 4.88498428705946e-06, |
| "loss": 0.4591, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.6262088974854932, |
| "grad_norm": 0.44298285245895386, |
| "learning_rate": 4.8847928840002755e-06, |
| "loss": 0.4694, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.6266924564796905, |
| "grad_norm": 0.43226316571235657, |
| "learning_rate": 4.884601325568132e-06, |
| "loss": 0.438, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.6271760154738878, |
| "grad_norm": 0.42019107937812805, |
| "learning_rate": 4.88440961177551e-06, |
| "loss": 0.4494, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.6276595744680851, |
| "grad_norm": 0.44551870226860046, |
| "learning_rate": 4.884217742634901e-06, |
| "loss": 0.4768, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.6281431334622823, |
| "grad_norm": 0.48216238617897034, |
| "learning_rate": 4.884025718158806e-06, |
| "loss": 0.4603, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.6286266924564797, |
| "grad_norm": 0.42420119047164917, |
| "learning_rate": 4.883833538359733e-06, |
| "loss": 0.4574, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.629110251450677, |
| "grad_norm": 0.43553632497787476, |
| "learning_rate": 4.883641203250205e-06, |
| "loss": 0.4514, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.6295938104448743, |
| "grad_norm": 0.4379255473613739, |
| "learning_rate": 4.883448712842752e-06, |
| "loss": 0.4462, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.6300773694390716, |
| "grad_norm": 0.41880786418914795, |
| "learning_rate": 4.883256067149917e-06, |
| "loss": 0.4563, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.6305609284332688, |
| "grad_norm": 0.4387166500091553, |
| "learning_rate": 4.883063266184248e-06, |
| "loss": 0.4721, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.6310444874274661, |
| "grad_norm": 0.42293262481689453, |
| "learning_rate": 4.8828703099583086e-06, |
| "loss": 0.4489, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.6315280464216635, |
| "grad_norm": 0.5480451583862305, |
| "learning_rate": 4.882677198484669e-06, |
| "loss": 0.4717, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.6320116054158608, |
| "grad_norm": 0.4439707398414612, |
| "learning_rate": 4.8824839317759115e-06, |
| "loss": 0.4536, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.632495164410058, |
| "grad_norm": 0.5086662173271179, |
| "learning_rate": 4.882290509844627e-06, |
| "loss": 0.4607, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.6329787234042553, |
| "grad_norm": 0.42350244522094727, |
| "learning_rate": 4.882096932703418e-06, |
| "loss": 0.4621, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.6334622823984526, |
| "grad_norm": 0.4228970408439636, |
| "learning_rate": 4.881903200364897e-06, |
| "loss": 0.4841, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.6339458413926499, |
| "grad_norm": 0.4056392312049866, |
| "learning_rate": 4.881709312841684e-06, |
| "loss": 0.4572, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.6344294003868471, |
| "grad_norm": 0.427141934633255, |
| "learning_rate": 4.881515270146412e-06, |
| "loss": 0.4505, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.6349129593810445, |
| "grad_norm": 0.4352380335330963, |
| "learning_rate": 4.881321072291724e-06, |
| "loss": 0.4686, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.6353965183752418, |
| "grad_norm": 0.445075124502182, |
| "learning_rate": 4.88112671929027e-06, |
| "loss": 0.4724, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.6358800773694391, |
| "grad_norm": 0.42873072624206543, |
| "learning_rate": 4.880932211154715e-06, |
| "loss": 0.4748, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.6363636363636364, |
| "grad_norm": 0.44988590478897095, |
| "learning_rate": 4.880737547897731e-06, |
| "loss": 0.4551, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.6368471953578336, |
| "grad_norm": 0.9747801423072815, |
| "learning_rate": 4.880542729532e-06, |
| "loss": 0.4517, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.6373307543520309, |
| "grad_norm": 0.6301749348640442, |
| "learning_rate": 4.880347756070214e-06, |
| "loss": 0.4702, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.6378143133462283, |
| "grad_norm": 0.6370202302932739, |
| "learning_rate": 4.880152627525076e-06, |
| "loss": 0.4593, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.6382978723404256, |
| "grad_norm": 0.4780006408691406, |
| "learning_rate": 4.879957343909301e-06, |
| "loss": 0.4849, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.6387814313346228, |
| "grad_norm": 0.4865046739578247, |
| "learning_rate": 4.87976190523561e-06, |
| "loss": 0.4814, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.6392649903288201, |
| "grad_norm": 0.41326484084129333, |
| "learning_rate": 4.879566311516737e-06, |
| "loss": 0.4544, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.6397485493230174, |
| "grad_norm": 0.4544488489627838, |
| "learning_rate": 4.879370562765424e-06, |
| "loss": 0.4717, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.6402321083172147, |
| "grad_norm": 0.42522361874580383, |
| "learning_rate": 4.879174658994425e-06, |
| "loss": 0.4625, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.6407156673114119, |
| "grad_norm": 0.42701852321624756, |
| "learning_rate": 4.8789786002165055e-06, |
| "loss": 0.4857, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.6411992263056093, |
| "grad_norm": 0.45991528034210205, |
| "learning_rate": 4.8787823864444365e-06, |
| "loss": 0.4528, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.6416827852998066, |
| "grad_norm": 0.4402574896812439, |
| "learning_rate": 4.878586017691002e-06, |
| "loss": 0.4682, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.6421663442940039, |
| "grad_norm": 0.4804152846336365, |
| "learning_rate": 4.878389493968996e-06, |
| "loss": 0.4557, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.6426499032882012, |
| "grad_norm": 0.4432368874549866, |
| "learning_rate": 4.878192815291223e-06, |
| "loss": 0.4439, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.6431334622823984, |
| "grad_norm": 0.4438154697418213, |
| "learning_rate": 4.8779959816704955e-06, |
| "loss": 0.4455, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.6436170212765957, |
| "grad_norm": 0.4515506327152252, |
| "learning_rate": 4.877798993119639e-06, |
| "loss": 0.4624, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.6441005802707931, |
| "grad_norm": 0.4431227743625641, |
| "learning_rate": 4.877601849651487e-06, |
| "loss": 0.4571, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.6445841392649904, |
| "grad_norm": 0.4530756175518036, |
| "learning_rate": 4.877404551278883e-06, |
| "loss": 0.4572, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.6450676982591876, |
| "grad_norm": 0.43097323179244995, |
| "learning_rate": 4.877207098014682e-06, |
| "loss": 0.4582, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.6455512572533849, |
| "grad_norm": 0.43735161423683167, |
| "learning_rate": 4.8770094898717494e-06, |
| "loss": 0.4356, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.6460348162475822, |
| "grad_norm": 0.43248477578163147, |
| "learning_rate": 4.876811726862958e-06, |
| "loss": 0.4386, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.6465183752417795, |
| "grad_norm": 0.5172285437583923, |
| "learning_rate": 4.876613809001193e-06, |
| "loss": 0.4326, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.6470019342359767, |
| "grad_norm": 0.4423380494117737, |
| "learning_rate": 4.876415736299349e-06, |
| "loss": 0.4597, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.6474854932301741, |
| "grad_norm": 0.4481392204761505, |
| "learning_rate": 4.87621750877033e-06, |
| "loss": 0.4322, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.6479690522243714, |
| "grad_norm": 0.4456692636013031, |
| "learning_rate": 4.8760191264270525e-06, |
| "loss": 0.4702, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6484526112185687, |
| "grad_norm": 1.2792326211929321, |
| "learning_rate": 4.8758205892824415e-06, |
| "loss": 0.4617, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.648936170212766, |
| "grad_norm": 0.5256432294845581, |
| "learning_rate": 4.8756218973494296e-06, |
| "loss": 0.4695, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.6494197292069632, |
| "grad_norm": 0.47011733055114746, |
| "learning_rate": 4.875423050640964e-06, |
| "loss": 0.4551, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.6499032882011605, |
| "grad_norm": 0.7151491641998291, |
| "learning_rate": 4.8752240491699985e-06, |
| "loss": 0.4633, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.6503868471953579, |
| "grad_norm": 0.6129742860794067, |
| "learning_rate": 4.875024892949499e-06, |
| "loss": 0.4528, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.6508704061895552, |
| "grad_norm": 0.43616563081741333, |
| "learning_rate": 4.874825581992442e-06, |
| "loss": 0.4596, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.6513539651837524, |
| "grad_norm": 0.43546509742736816, |
| "learning_rate": 4.874626116311812e-06, |
| "loss": 0.4571, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.6518375241779497, |
| "grad_norm": 0.47251904010772705, |
| "learning_rate": 4.874426495920603e-06, |
| "loss": 0.449, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.652321083172147, |
| "grad_norm": 0.45273882150650024, |
| "learning_rate": 4.874226720831823e-06, |
| "loss": 0.4543, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.6528046421663443, |
| "grad_norm": 0.4764840006828308, |
| "learning_rate": 4.874026791058486e-06, |
| "loss": 0.4888, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6532882011605415, |
| "grad_norm": 0.4244127869606018, |
| "learning_rate": 4.873826706613618e-06, |
| "loss": 0.4724, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.6537717601547389, |
| "grad_norm": 0.455299973487854, |
| "learning_rate": 4.873626467510255e-06, |
| "loss": 0.4601, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.6542553191489362, |
| "grad_norm": 0.4337126910686493, |
| "learning_rate": 4.8734260737614435e-06, |
| "loss": 0.4518, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.6547388781431335, |
| "grad_norm": 0.46671974658966064, |
| "learning_rate": 4.873225525380239e-06, |
| "loss": 0.4776, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.6552224371373307, |
| "grad_norm": 0.40789374709129333, |
| "learning_rate": 4.873024822379707e-06, |
| "loss": 0.4425, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.655705996131528, |
| "grad_norm": 0.45303189754486084, |
| "learning_rate": 4.872823964772925e-06, |
| "loss": 0.4533, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.6561895551257253, |
| "grad_norm": 0.4961181879043579, |
| "learning_rate": 4.872622952572977e-06, |
| "loss": 0.4571, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.6566731141199227, |
| "grad_norm": 0.4303774833679199, |
| "learning_rate": 4.872421785792962e-06, |
| "loss": 0.4806, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.65715667311412, |
| "grad_norm": 0.4572507441043854, |
| "learning_rate": 4.872220464445983e-06, |
| "loss": 0.446, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.6576402321083172, |
| "grad_norm": 0.4358389973640442, |
| "learning_rate": 4.8720189885451605e-06, |
| "loss": 0.48, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.6581237911025145, |
| "grad_norm": 0.4195079505443573, |
| "learning_rate": 4.871817358103617e-06, |
| "loss": 0.4288, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.6586073500967118, |
| "grad_norm": 0.4293639659881592, |
| "learning_rate": 4.871615573134492e-06, |
| "loss": 0.4592, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.6590909090909091, |
| "grad_norm": 0.4390144646167755, |
| "learning_rate": 4.87141363365093e-06, |
| "loss": 0.4645, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.6595744680851063, |
| "grad_norm": 0.44983401894569397, |
| "learning_rate": 4.871211539666089e-06, |
| "loss": 0.4734, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.6600580270793037, |
| "grad_norm": 0.4398081600666046, |
| "learning_rate": 4.871009291193135e-06, |
| "loss": 0.4654, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.660541586073501, |
| "grad_norm": 0.43590307235717773, |
| "learning_rate": 4.870806888245245e-06, |
| "loss": 0.4594, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.6610251450676983, |
| "grad_norm": 0.4521521329879761, |
| "learning_rate": 4.870604330835606e-06, |
| "loss": 0.4633, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.6615087040618955, |
| "grad_norm": 0.46335622668266296, |
| "learning_rate": 4.870401618977415e-06, |
| "loss": 0.4625, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.6619922630560928, |
| "grad_norm": 0.42654484510421753, |
| "learning_rate": 4.870198752683879e-06, |
| "loss": 0.4518, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.6624758220502901, |
| "grad_norm": 0.45265164971351624, |
| "learning_rate": 4.869995731968214e-06, |
| "loss": 0.4798, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.6629593810444874, |
| "grad_norm": 0.4795035123825073, |
| "learning_rate": 4.86979255684365e-06, |
| "loss": 0.4721, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.6634429400386848, |
| "grad_norm": 0.44764506816864014, |
| "learning_rate": 4.869589227323421e-06, |
| "loss": 0.4578, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.663926499032882, |
| "grad_norm": 0.43399766087532043, |
| "learning_rate": 4.869385743420775e-06, |
| "loss": 0.4698, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.6644100580270793, |
| "grad_norm": 0.4521014094352722, |
| "learning_rate": 4.869182105148971e-06, |
| "loss": 0.465, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.6648936170212766, |
| "grad_norm": 0.41134724020957947, |
| "learning_rate": 4.868978312521274e-06, |
| "loss": 0.4454, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.6653771760154739, |
| "grad_norm": 0.47302302718162537, |
| "learning_rate": 4.868774365550963e-06, |
| "loss": 0.4777, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.6658607350096711, |
| "grad_norm": 0.42681750655174255, |
| "learning_rate": 4.868570264251324e-06, |
| "loss": 0.4767, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.6663442940038685, |
| "grad_norm": 0.4561176896095276, |
| "learning_rate": 4.868366008635657e-06, |
| "loss": 0.4527, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.6668278529980658, |
| "grad_norm": 0.5029308199882507, |
| "learning_rate": 4.868161598717267e-06, |
| "loss": 0.4567, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.6673114119922631, |
| "grad_norm": 0.41503024101257324, |
| "learning_rate": 4.867957034509473e-06, |
| "loss": 0.4495, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6677949709864603, |
| "grad_norm": 0.6687749624252319, |
| "learning_rate": 4.867752316025602e-06, |
| "loss": 0.4604, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.6682785299806576, |
| "grad_norm": 0.4590419828891754, |
| "learning_rate": 4.867547443278993e-06, |
| "loss": 0.462, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.6687620889748549, |
| "grad_norm": 0.4743969142436981, |
| "learning_rate": 4.867342416282992e-06, |
| "loss": 0.4702, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.6692456479690522, |
| "grad_norm": 0.5056655406951904, |
| "learning_rate": 4.867137235050958e-06, |
| "loss": 0.4616, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.6697292069632496, |
| "grad_norm": 0.5750028491020203, |
| "learning_rate": 4.866931899596259e-06, |
| "loss": 0.4651, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.6702127659574468, |
| "grad_norm": 0.538818895816803, |
| "learning_rate": 4.866726409932272e-06, |
| "loss": 0.4619, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.6706963249516441, |
| "grad_norm": 0.5920456051826477, |
| "learning_rate": 4.866520766072385e-06, |
| "loss": 0.4565, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.6711798839458414, |
| "grad_norm": 0.6138905882835388, |
| "learning_rate": 4.866314968029997e-06, |
| "loss": 0.4764, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.6716634429400387, |
| "grad_norm": 0.4083728492259979, |
| "learning_rate": 4.866109015818515e-06, |
| "loss": 0.4402, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.6721470019342359, |
| "grad_norm": 0.5708425045013428, |
| "learning_rate": 4.865902909451358e-06, |
| "loss": 0.4363, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.6726305609284333, |
| "grad_norm": 0.4998311698436737, |
| "learning_rate": 4.865696648941954e-06, |
| "loss": 0.4493, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.6731141199226306, |
| "grad_norm": 0.42642462253570557, |
| "learning_rate": 4.865490234303741e-06, |
| "loss": 0.4605, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.6735976789168279, |
| "grad_norm": 0.42192861437797546, |
| "learning_rate": 4.865283665550167e-06, |
| "loss": 0.4624, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.6740812379110251, |
| "grad_norm": 0.4162391424179077, |
| "learning_rate": 4.8650769426946905e-06, |
| "loss": 0.4666, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.6745647969052224, |
| "grad_norm": 0.41674554347991943, |
| "learning_rate": 4.8648700657507794e-06, |
| "loss": 0.4637, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.6750483558994197, |
| "grad_norm": 0.40383294224739075, |
| "learning_rate": 4.864663034731913e-06, |
| "loss": 0.4698, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.675531914893617, |
| "grad_norm": 0.44404447078704834, |
| "learning_rate": 4.864455849651579e-06, |
| "loss": 0.4517, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.6760154738878144, |
| "grad_norm": 0.4714096784591675, |
| "learning_rate": 4.8642485105232766e-06, |
| "loss": 0.4562, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.6764990328820116, |
| "grad_norm": 0.41663625836372375, |
| "learning_rate": 4.864041017360512e-06, |
| "loss": 0.4449, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.6769825918762089, |
| "grad_norm": 0.5686074495315552, |
| "learning_rate": 4.863833370176807e-06, |
| "loss": 0.4554, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6774661508704062, |
| "grad_norm": 0.41894927620887756, |
| "learning_rate": 4.863625568985688e-06, |
| "loss": 0.4749, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.6779497098646035, |
| "grad_norm": 0.4760285019874573, |
| "learning_rate": 4.863417613800694e-06, |
| "loss": 0.4605, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.6784332688588007, |
| "grad_norm": 0.6658610701560974, |
| "learning_rate": 4.863209504635373e-06, |
| "loss": 0.4447, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.6789168278529981, |
| "grad_norm": 0.46141868829727173, |
| "learning_rate": 4.863001241503285e-06, |
| "loss": 0.4561, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.6794003868471954, |
| "grad_norm": 1.1174705028533936, |
| "learning_rate": 4.862792824417998e-06, |
| "loss": 0.4542, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.6798839458413927, |
| "grad_norm": 0.4388240873813629, |
| "learning_rate": 4.86258425339309e-06, |
| "loss": 0.4666, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.6803675048355899, |
| "grad_norm": 0.4310016930103302, |
| "learning_rate": 4.862375528442152e-06, |
| "loss": 0.4604, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.6808510638297872, |
| "grad_norm": 0.47581034898757935, |
| "learning_rate": 4.862166649578779e-06, |
| "loss": 0.4608, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.6813346228239845, |
| "grad_norm": 0.47496527433395386, |
| "learning_rate": 4.861957616816583e-06, |
| "loss": 0.4674, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.6818181818181818, |
| "grad_norm": 0.45721524953842163, |
| "learning_rate": 4.861748430169181e-06, |
| "loss": 0.4589, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6823017408123792, |
| "grad_norm": 0.43196386098861694, |
| "learning_rate": 4.8615390896502034e-06, |
| "loss": 0.4612, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.6827852998065764, |
| "grad_norm": 0.44146570563316345, |
| "learning_rate": 4.861329595273288e-06, |
| "loss": 0.4651, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.6832688588007737, |
| "grad_norm": 0.4383741021156311, |
| "learning_rate": 4.8611199470520844e-06, |
| "loss": 0.444, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.683752417794971, |
| "grad_norm": 0.41839542984962463, |
| "learning_rate": 4.86091014500025e-06, |
| "loss": 0.4783, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.6842359767891683, |
| "grad_norm": 0.41911396384239197, |
| "learning_rate": 4.860700189131456e-06, |
| "loss": 0.455, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.6847195357833655, |
| "grad_norm": 0.7411275506019592, |
| "learning_rate": 4.8604900794593786e-06, |
| "loss": 0.4546, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.6852030947775629, |
| "grad_norm": 0.4487959146499634, |
| "learning_rate": 4.860279815997709e-06, |
| "loss": 0.4693, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.6856866537717602, |
| "grad_norm": 0.4634031057357788, |
| "learning_rate": 4.860069398760146e-06, |
| "loss": 0.4527, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.6861702127659575, |
| "grad_norm": 0.42531654238700867, |
| "learning_rate": 4.859858827760396e-06, |
| "loss": 0.4653, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.6866537717601547, |
| "grad_norm": 0.43067359924316406, |
| "learning_rate": 4.859648103012183e-06, |
| "loss": 0.4571, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.687137330754352, |
| "grad_norm": 0.41355863213539124, |
| "learning_rate": 4.859437224529231e-06, |
| "loss": 0.4402, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.6876208897485493, |
| "grad_norm": 0.512670636177063, |
| "learning_rate": 4.859226192325281e-06, |
| "loss": 0.4648, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.6881044487427466, |
| "grad_norm": 0.42603832483291626, |
| "learning_rate": 4.859015006414084e-06, |
| "loss": 0.4761, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.688588007736944, |
| "grad_norm": 0.4183642268180847, |
| "learning_rate": 4.858803666809396e-06, |
| "loss": 0.4457, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.6890715667311412, |
| "grad_norm": 0.41630667448043823, |
| "learning_rate": 4.858592173524988e-06, |
| "loss": 0.4729, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.6895551257253385, |
| "grad_norm": 0.4732525646686554, |
| "learning_rate": 4.858380526574639e-06, |
| "loss": 0.4431, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.6900386847195358, |
| "grad_norm": 0.4732808470726013, |
| "learning_rate": 4.8581687259721375e-06, |
| "loss": 0.4718, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.690522243713733, |
| "grad_norm": 0.5042643547058105, |
| "learning_rate": 4.857956771731282e-06, |
| "loss": 0.4574, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.6910058027079303, |
| "grad_norm": 0.4248146414756775, |
| "learning_rate": 4.857744663865883e-06, |
| "loss": 0.4548, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.6914893617021277, |
| "grad_norm": 0.4656713902950287, |
| "learning_rate": 4.85753240238976e-06, |
| "loss": 0.4588, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.691972920696325, |
| "grad_norm": 0.5286734104156494, |
| "learning_rate": 4.85731998731674e-06, |
| "loss": 0.4612, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.6924564796905223, |
| "grad_norm": 0.40691936016082764, |
| "learning_rate": 4.857107418660664e-06, |
| "loss": 0.4375, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.6929400386847195, |
| "grad_norm": 0.41909584403038025, |
| "learning_rate": 4.85689469643538e-06, |
| "loss": 0.4766, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.6934235976789168, |
| "grad_norm": 0.45573991537094116, |
| "learning_rate": 4.85668182065475e-06, |
| "loss": 0.4252, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.6939071566731141, |
| "grad_norm": 0.44050395488739014, |
| "learning_rate": 4.856468791332638e-06, |
| "loss": 0.4725, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.6943907156673114, |
| "grad_norm": 0.4421895742416382, |
| "learning_rate": 4.8562556084829285e-06, |
| "loss": 0.4481, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.6948742746615088, |
| "grad_norm": 0.41216611862182617, |
| "learning_rate": 4.856042272119508e-06, |
| "loss": 0.4545, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.695357833655706, |
| "grad_norm": 0.8512147068977356, |
| "learning_rate": 4.8558287822562755e-06, |
| "loss": 0.4703, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.6958413926499033, |
| "grad_norm": 0.48798003792762756, |
| "learning_rate": 4.855615138907141e-06, |
| "loss": 0.4742, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.6963249516441006, |
| "grad_norm": 0.7163406014442444, |
| "learning_rate": 4.855401342086024e-06, |
| "loss": 0.4574, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6968085106382979, |
| "grad_norm": 0.4780515432357788, |
| "learning_rate": 4.8551873918068525e-06, |
| "loss": 0.4829, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.6972920696324951, |
| "grad_norm": 0.4141700863838196, |
| "learning_rate": 4.854973288083566e-06, |
| "loss": 0.4712, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.6977756286266924, |
| "grad_norm": 0.43325740098953247, |
| "learning_rate": 4.854759030930115e-06, |
| "loss": 0.4399, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.6982591876208898, |
| "grad_norm": 0.44285354018211365, |
| "learning_rate": 4.854544620360458e-06, |
| "loss": 0.4502, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.6987427466150871, |
| "grad_norm": 0.44633156061172485, |
| "learning_rate": 4.854330056388563e-06, |
| "loss": 0.4764, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.6992263056092843, |
| "grad_norm": 0.40800923109054565, |
| "learning_rate": 4.854115339028411e-06, |
| "loss": 0.4415, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.6997098646034816, |
| "grad_norm": 0.44708821177482605, |
| "learning_rate": 4.85390046829399e-06, |
| "loss": 0.4656, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.7001934235976789, |
| "grad_norm": 0.4148036241531372, |
| "learning_rate": 4.853685444199299e-06, |
| "loss": 0.4406, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.7006769825918762, |
| "grad_norm": 0.4434484839439392, |
| "learning_rate": 4.853470266758348e-06, |
| "loss": 0.4481, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.7011605415860735, |
| "grad_norm": 0.4499351382255554, |
| "learning_rate": 4.8532549359851555e-06, |
| "loss": 0.4581, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.7016441005802708, |
| "grad_norm": 0.5162652730941772, |
| "learning_rate": 4.853039451893752e-06, |
| "loss": 0.4687, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.7021276595744681, |
| "grad_norm": 0.45000189542770386, |
| "learning_rate": 4.8528238144981745e-06, |
| "loss": 0.4345, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.7026112185686654, |
| "grad_norm": 0.4324718117713928, |
| "learning_rate": 4.852608023812473e-06, |
| "loss": 0.4595, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.7030947775628626, |
| "grad_norm": 0.4326983392238617, |
| "learning_rate": 4.852392079850707e-06, |
| "loss": 0.4457, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.7035783365570599, |
| "grad_norm": 0.431071400642395, |
| "learning_rate": 4.852175982626945e-06, |
| "loss": 0.4737, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.7040618955512572, |
| "grad_norm": 0.4474363923072815, |
| "learning_rate": 4.8519597321552666e-06, |
| "loss": 0.4716, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.7045454545454546, |
| "grad_norm": 0.4463392198085785, |
| "learning_rate": 4.85174332844976e-06, |
| "loss": 0.4482, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.7050290135396519, |
| "grad_norm": 0.5177990794181824, |
| "learning_rate": 4.851526771524526e-06, |
| "loss": 0.4768, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.7055125725338491, |
| "grad_norm": 0.4245111346244812, |
| "learning_rate": 4.8513100613936725e-06, |
| "loss": 0.4732, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.7059961315280464, |
| "grad_norm": 0.4291518032550812, |
| "learning_rate": 4.851093198071318e-06, |
| "loss": 0.4747, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.7064796905222437, |
| "grad_norm": 0.42464911937713623, |
| "learning_rate": 4.850876181571592e-06, |
| "loss": 0.4702, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.706963249516441, |
| "grad_norm": 0.4318511486053467, |
| "learning_rate": 4.850659011908633e-06, |
| "loss": 0.4663, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.7074468085106383, |
| "grad_norm": 0.42098772525787354, |
| "learning_rate": 4.850441689096591e-06, |
| "loss": 0.4641, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.7079303675048356, |
| "grad_norm": 0.4562412202358246, |
| "learning_rate": 4.850224213149624e-06, |
| "loss": 0.4346, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.7084139264990329, |
| "grad_norm": 0.43204471468925476, |
| "learning_rate": 4.850006584081901e-06, |
| "loss": 0.4743, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.7088974854932302, |
| "grad_norm": 0.4134417772293091, |
| "learning_rate": 4.849788801907602e-06, |
| "loss": 0.4462, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.7093810444874274, |
| "grad_norm": 0.472341388463974, |
| "learning_rate": 4.8495708666409135e-06, |
| "loss": 0.4558, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.7098646034816247, |
| "grad_norm": 0.6017382740974426, |
| "learning_rate": 4.849352778296037e-06, |
| "loss": 0.4409, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.710348162475822, |
| "grad_norm": 0.6916100978851318, |
| "learning_rate": 4.849134536887179e-06, |
| "loss": 0.4521, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.7108317214700194, |
| "grad_norm": 0.44142261147499084, |
| "learning_rate": 4.84891614242856e-06, |
| "loss": 0.4614, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.7113152804642167, |
| "grad_norm": 0.45912495255470276, |
| "learning_rate": 4.848697594934407e-06, |
| "loss": 0.4604, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.7117988394584139, |
| "grad_norm": 0.43075597286224365, |
| "learning_rate": 4.848478894418961e-06, |
| "loss": 0.4434, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.7122823984526112, |
| "grad_norm": 0.43654683232307434, |
| "learning_rate": 4.848260040896469e-06, |
| "loss": 0.4868, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.7127659574468085, |
| "grad_norm": 0.42006418108940125, |
| "learning_rate": 4.84804103438119e-06, |
| "loss": 0.4593, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.7132495164410058, |
| "grad_norm": 0.4998641610145569, |
| "learning_rate": 4.847821874887393e-06, |
| "loss": 0.4583, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.7137330754352031, |
| "grad_norm": 0.4955298900604248, |
| "learning_rate": 4.847602562429356e-06, |
| "loss": 0.4709, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.7142166344294004, |
| "grad_norm": 0.4439548850059509, |
| "learning_rate": 4.847383097021368e-06, |
| "loss": 0.465, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.7147001934235977, |
| "grad_norm": 0.44863393902778625, |
| "learning_rate": 4.847163478677726e-06, |
| "loss": 0.45, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.715183752417795, |
| "grad_norm": 0.42579901218414307, |
| "learning_rate": 4.846943707412741e-06, |
| "loss": 0.4527, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.7156673114119922, |
| "grad_norm": 0.4317091703414917, |
| "learning_rate": 4.84672378324073e-06, |
| "loss": 0.444, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.7161508704061895, |
| "grad_norm": 0.48940885066986084, |
| "learning_rate": 4.846503706176021e-06, |
| "loss": 0.4844, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.7166344294003868, |
| "grad_norm": 0.4427342414855957, |
| "learning_rate": 4.846283476232954e-06, |
| "loss": 0.4432, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.7171179883945842, |
| "grad_norm": 0.42922696471214294, |
| "learning_rate": 4.846063093425876e-06, |
| "loss": 0.4236, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.7176015473887815, |
| "grad_norm": 0.4381331503391266, |
| "learning_rate": 4.845842557769146e-06, |
| "loss": 0.4499, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.7180851063829787, |
| "grad_norm": 0.4614485204219818, |
| "learning_rate": 4.845621869277131e-06, |
| "loss": 0.4674, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.718568665377176, |
| "grad_norm": 0.4606797993183136, |
| "learning_rate": 4.8454010279642105e-06, |
| "loss": 0.4485, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.7190522243713733, |
| "grad_norm": 0.4316904842853546, |
| "learning_rate": 4.845180033844772e-06, |
| "loss": 0.447, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.7195357833655706, |
| "grad_norm": 0.45125576853752136, |
| "learning_rate": 4.844958886933215e-06, |
| "loss": 0.4676, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.7200193423597679, |
| "grad_norm": 0.4640989899635315, |
| "learning_rate": 4.844737587243944e-06, |
| "loss": 0.4608, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.7205029013539652, |
| "grad_norm": 0.4750809371471405, |
| "learning_rate": 4.844516134791381e-06, |
| "loss": 0.4561, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.7209864603481625, |
| "grad_norm": 0.42236974835395813, |
| "learning_rate": 4.844294529589952e-06, |
| "loss": 0.4595, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.7214700193423598, |
| "grad_norm": 0.5446487665176392, |
| "learning_rate": 4.8440727716540944e-06, |
| "loss": 0.4485, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.721953578336557, |
| "grad_norm": 0.43948227167129517, |
| "learning_rate": 4.843850860998258e-06, |
| "loss": 0.4565, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.7224371373307543, |
| "grad_norm": 0.45492276549339294, |
| "learning_rate": 4.8436287976369e-06, |
| "loss": 0.4523, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.7229206963249516, |
| "grad_norm": 0.4555799961090088, |
| "learning_rate": 4.843406581584487e-06, |
| "loss": 0.4535, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.723404255319149, |
| "grad_norm": 0.4507673382759094, |
| "learning_rate": 4.843184212855498e-06, |
| "loss": 0.4795, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.7238878143133463, |
| "grad_norm": 0.4655287265777588, |
| "learning_rate": 4.842961691464419e-06, |
| "loss": 0.4492, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.7243713733075435, |
| "grad_norm": 0.46638283133506775, |
| "learning_rate": 4.842739017425749e-06, |
| "loss": 0.4439, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.7248549323017408, |
| "grad_norm": 0.8781645894050598, |
| "learning_rate": 4.842516190753996e-06, |
| "loss": 0.4528, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.7253384912959381, |
| "grad_norm": 0.5471943616867065, |
| "learning_rate": 4.842293211463677e-06, |
| "loss": 0.4521, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7258220502901354, |
| "grad_norm": 0.5306839942932129, |
| "learning_rate": 4.842070079569319e-06, |
| "loss": 0.4478, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.7263056092843327, |
| "grad_norm": 0.48267731070518494, |
| "learning_rate": 4.841846795085459e-06, |
| "loss": 0.4689, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.72678916827853, |
| "grad_norm": 0.4297829270362854, |
| "learning_rate": 4.841623358026646e-06, |
| "loss": 0.4576, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.47736454010009766, |
| "learning_rate": 4.8413997684074355e-06, |
| "loss": 0.4657, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.7277562862669246, |
| "grad_norm": 0.47262609004974365, |
| "learning_rate": 4.841176026242396e-06, |
| "loss": 0.439, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.7282398452611218, |
| "grad_norm": 0.4317893981933594, |
| "learning_rate": 4.840952131546103e-06, |
| "loss": 0.4468, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.7287234042553191, |
| "grad_norm": 0.4293960630893707, |
| "learning_rate": 4.8407280843331456e-06, |
| "loss": 0.4464, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.7292069632495164, |
| "grad_norm": 0.4995443522930145, |
| "learning_rate": 4.8405038846181195e-06, |
| "loss": 0.4596, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.7296905222437138, |
| "grad_norm": 0.4192506670951843, |
| "learning_rate": 4.840279532415633e-06, |
| "loss": 0.4655, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.730174081237911, |
| "grad_norm": 0.43838778138160706, |
| "learning_rate": 4.840055027740301e-06, |
| "loss": 0.4441, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.7306576402321083, |
| "grad_norm": 0.4230515956878662, |
| "learning_rate": 4.839830370606751e-06, |
| "loss": 0.4616, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.7311411992263056, |
| "grad_norm": 0.4396006464958191, |
| "learning_rate": 4.839605561029622e-06, |
| "loss": 0.4462, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.7316247582205029, |
| "grad_norm": 0.436576783657074, |
| "learning_rate": 4.839380599023558e-06, |
| "loss": 0.4694, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.7321083172147002, |
| "grad_norm": 0.5072314143180847, |
| "learning_rate": 4.839155484603216e-06, |
| "loss": 0.4641, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.7325918762088974, |
| "grad_norm": 0.42909497022628784, |
| "learning_rate": 4.838930217783263e-06, |
| "loss": 0.4506, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.7330754352030948, |
| "grad_norm": 0.42269250750541687, |
| "learning_rate": 4.838704798578377e-06, |
| "loss": 0.4555, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.7335589941972921, |
| "grad_norm": 0.4334566295146942, |
| "learning_rate": 4.838479227003241e-06, |
| "loss": 0.4756, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.7340425531914894, |
| "grad_norm": 0.4950511157512665, |
| "learning_rate": 4.838253503072554e-06, |
| "loss": 0.4704, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.7345261121856866, |
| "grad_norm": 0.44494107365608215, |
| "learning_rate": 4.838027626801021e-06, |
| "loss": 0.4502, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.7350096711798839, |
| "grad_norm": 0.45712941884994507, |
| "learning_rate": 4.83780159820336e-06, |
| "loss": 0.4722, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.7354932301740812, |
| "grad_norm": 0.4459114670753479, |
| "learning_rate": 4.837575417294295e-06, |
| "loss": 0.4578, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.7359767891682786, |
| "grad_norm": 0.557133138179779, |
| "learning_rate": 4.837349084088563e-06, |
| "loss": 0.4691, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.7364603481624759, |
| "grad_norm": 0.4571788012981415, |
| "learning_rate": 4.8371225986009104e-06, |
| "loss": 0.4564, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.7369439071566731, |
| "grad_norm": 0.4255046546459198, |
| "learning_rate": 4.836895960846092e-06, |
| "loss": 0.4501, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.7374274661508704, |
| "grad_norm": 0.43040698766708374, |
| "learning_rate": 4.836669170838874e-06, |
| "loss": 0.4522, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.7379110251450677, |
| "grad_norm": 0.4929506778717041, |
| "learning_rate": 4.836442228594032e-06, |
| "loss": 0.4553, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.738394584139265, |
| "grad_norm": 0.4765097200870514, |
| "learning_rate": 4.8362151341263515e-06, |
| "loss": 0.4727, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.7388781431334622, |
| "grad_norm": 0.4605850279331207, |
| "learning_rate": 4.83598788745063e-06, |
| "loss": 0.4484, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.7393617021276596, |
| "grad_norm": 0.7684630751609802, |
| "learning_rate": 4.83576048858167e-06, |
| "loss": 0.4534, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.7398452611218569, |
| "grad_norm": 0.42882004380226135, |
| "learning_rate": 4.835532937534289e-06, |
| "loss": 0.458, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.7403288201160542, |
| "grad_norm": 0.43994221091270447, |
| "learning_rate": 4.835305234323311e-06, |
| "loss": 0.4485, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.7408123791102514, |
| "grad_norm": 0.4888148903846741, |
| "learning_rate": 4.835077378963573e-06, |
| "loss": 0.4437, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.7412959381044487, |
| "grad_norm": 0.4104488492012024, |
| "learning_rate": 4.834849371469917e-06, |
| "loss": 0.454, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.741779497098646, |
| "grad_norm": 0.45566514134407043, |
| "learning_rate": 4.834621211857202e-06, |
| "loss": 0.4422, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.7422630560928434, |
| "grad_norm": 0.4125506579875946, |
| "learning_rate": 4.83439290014029e-06, |
| "loss": 0.4513, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.7427466150870407, |
| "grad_norm": 0.5421583652496338, |
| "learning_rate": 4.834164436334057e-06, |
| "loss": 0.4367, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.7432301740812379, |
| "grad_norm": 0.4610116481781006, |
| "learning_rate": 4.8339358204533874e-06, |
| "loss": 0.485, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.7437137330754352, |
| "grad_norm": 0.45787009596824646, |
| "learning_rate": 4.8337070525131755e-06, |
| "loss": 0.4576, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.7441972920696325, |
| "grad_norm": 0.43323206901550293, |
| "learning_rate": 4.833478132528328e-06, |
| "loss": 0.4725, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.7446808510638298, |
| "grad_norm": 0.46115002036094666, |
| "learning_rate": 4.833249060513756e-06, |
| "loss": 0.4678, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.745164410058027, |
| "grad_norm": 0.43544140458106995, |
| "learning_rate": 4.833019836484387e-06, |
| "loss": 0.4791, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.7456479690522244, |
| "grad_norm": 0.4073650538921356, |
| "learning_rate": 4.832790460455153e-06, |
| "loss": 0.4429, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.7461315280464217, |
| "grad_norm": 0.4155426621437073, |
| "learning_rate": 4.832560932441e-06, |
| "loss": 0.4519, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.746615087040619, |
| "grad_norm": 0.43398088216781616, |
| "learning_rate": 4.8323312524568825e-06, |
| "loss": 0.4478, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.7470986460348162, |
| "grad_norm": 0.4437168836593628, |
| "learning_rate": 4.832101420517761e-06, |
| "loss": 0.4553, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.7475822050290135, |
| "grad_norm": 0.46254605054855347, |
| "learning_rate": 4.831871436638613e-06, |
| "loss": 0.4623, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.7480657640232108, |
| "grad_norm": 0.4210186004638672, |
| "learning_rate": 4.8316413008344206e-06, |
| "loss": 0.4508, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.7485493230174082, |
| "grad_norm": 0.5048815608024597, |
| "learning_rate": 4.831411013120179e-06, |
| "loss": 0.4592, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.7490328820116054, |
| "grad_norm": 0.644477128982544, |
| "learning_rate": 4.83118057351089e-06, |
| "loss": 0.4583, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.7495164410058027, |
| "grad_norm": 0.5244016647338867, |
| "learning_rate": 4.830949982021568e-06, |
| "loss": 0.4583, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.41632694005966187, |
| "learning_rate": 4.8307192386672365e-06, |
| "loss": 0.4528, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.7504835589941973, |
| "grad_norm": 1.1326220035552979, |
| "learning_rate": 4.8304883434629276e-06, |
| "loss": 0.4665, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.7509671179883946, |
| "grad_norm": 0.42171135544776917, |
| "learning_rate": 4.830257296423686e-06, |
| "loss": 0.4496, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.7514506769825918, |
| "grad_norm": 0.45120969414711, |
| "learning_rate": 4.830026097564564e-06, |
| "loss": 0.4775, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.7519342359767892, |
| "grad_norm": 0.4309103786945343, |
| "learning_rate": 4.829794746900626e-06, |
| "loss": 0.4561, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.7524177949709865, |
| "grad_norm": 0.4334292411804199, |
| "learning_rate": 4.829563244446942e-06, |
| "loss": 0.4518, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.7529013539651838, |
| "grad_norm": 0.42995062470436096, |
| "learning_rate": 4.829331590218597e-06, |
| "loss": 0.479, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.753384912959381, |
| "grad_norm": 0.6324965953826904, |
| "learning_rate": 4.829099784230683e-06, |
| "loss": 0.4869, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.7538684719535783, |
| "grad_norm": 0.45993176102638245, |
| "learning_rate": 4.828867826498302e-06, |
| "loss": 0.4686, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.7543520309477756, |
| "grad_norm": 0.4506562352180481, |
| "learning_rate": 4.828635717036569e-06, |
| "loss": 0.4779, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.754835589941973, |
| "grad_norm": 1.4067552089691162, |
| "learning_rate": 4.828403455860602e-06, |
| "loss": 0.4255, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.7553191489361702, |
| "grad_norm": 0.40801528096199036, |
| "learning_rate": 4.828171042985536e-06, |
| "loss": 0.4418, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.7558027079303675, |
| "grad_norm": 0.42918655276298523, |
| "learning_rate": 4.8279384784265124e-06, |
| "loss": 0.4633, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.7562862669245648, |
| "grad_norm": 0.43478161096572876, |
| "learning_rate": 4.827705762198683e-06, |
| "loss": 0.452, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.7567698259187621, |
| "grad_norm": 0.46473461389541626, |
| "learning_rate": 4.8274728943172105e-06, |
| "loss": 0.453, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.7572533849129593, |
| "grad_norm": 0.41810253262519836, |
| "learning_rate": 4.827239874797266e-06, |
| "loss": 0.4643, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.7577369439071566, |
| "grad_norm": 0.4875235855579376, |
| "learning_rate": 4.8270067036540305e-06, |
| "loss": 0.468, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.758220502901354, |
| "grad_norm": 0.4377727210521698, |
| "learning_rate": 4.826773380902696e-06, |
| "loss": 0.452, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.7587040618955513, |
| "grad_norm": 0.46963176131248474, |
| "learning_rate": 4.826539906558464e-06, |
| "loss": 0.4712, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.7591876208897486, |
| "grad_norm": 0.42545080184936523, |
| "learning_rate": 4.826306280636545e-06, |
| "loss": 0.4779, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.7596711798839458, |
| "grad_norm": 0.4469500184059143, |
| "learning_rate": 4.826072503152161e-06, |
| "loss": 0.4632, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.7601547388781431, |
| "grad_norm": 0.6386076807975769, |
| "learning_rate": 4.825838574120543e-06, |
| "loss": 0.4561, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.7606382978723404, |
| "grad_norm": 0.42124369740486145, |
| "learning_rate": 4.825604493556931e-06, |
| "loss": 0.4579, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.7611218568665378, |
| "grad_norm": 0.4487420320510864, |
| "learning_rate": 4.825370261476576e-06, |
| "loss": 0.4544, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.761605415860735, |
| "grad_norm": 0.4654780328273773, |
| "learning_rate": 4.825135877894739e-06, |
| "loss": 0.44, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.7620889748549323, |
| "grad_norm": 0.45695367455482483, |
| "learning_rate": 4.82490134282669e-06, |
| "loss": 0.4618, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.7625725338491296, |
| "grad_norm": 0.4306217133998871, |
| "learning_rate": 4.824666656287709e-06, |
| "loss": 0.4574, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.7630560928433269, |
| "grad_norm": 0.4209129512310028, |
| "learning_rate": 4.824431818293088e-06, |
| "loss": 0.4506, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.7635396518375241, |
| "grad_norm": 0.4458996653556824, |
| "learning_rate": 4.824196828858124e-06, |
| "loss": 0.4359, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.7640232108317214, |
| "grad_norm": 0.46759527921676636, |
| "learning_rate": 4.82396168799813e-06, |
| "loss": 0.445, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.7645067698259188, |
| "grad_norm": 0.4400273859500885, |
| "learning_rate": 4.823726395728424e-06, |
| "loss": 0.4393, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.7649903288201161, |
| "grad_norm": 0.49958503246307373, |
| "learning_rate": 4.823490952064337e-06, |
| "loss": 0.4619, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.7654738878143134, |
| "grad_norm": 0.4214346408843994, |
| "learning_rate": 4.823255357021206e-06, |
| "loss": 0.4671, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.7659574468085106, |
| "grad_norm": 0.4155096411705017, |
| "learning_rate": 4.8230196106143835e-06, |
| "loss": 0.4803, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.7664410058027079, |
| "grad_norm": 0.45440050959587097, |
| "learning_rate": 4.822783712859227e-06, |
| "loss": 0.4746, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.7669245647969052, |
| "grad_norm": 0.42145711183547974, |
| "learning_rate": 4.8225476637711055e-06, |
| "loss": 0.4658, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.7674081237911026, |
| "grad_norm": 0.9519943594932556, |
| "learning_rate": 4.8223114633653975e-06, |
| "loss": 0.4358, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.7678916827852998, |
| "grad_norm": 0.4257833659648895, |
| "learning_rate": 4.822075111657494e-06, |
| "loss": 0.4566, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.7683752417794971, |
| "grad_norm": 0.41692131757736206, |
| "learning_rate": 4.821838608662792e-06, |
| "loss": 0.4732, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.7688588007736944, |
| "grad_norm": 0.4908398687839508, |
| "learning_rate": 4.821601954396701e-06, |
| "loss": 0.4633, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.7693423597678917, |
| "grad_norm": 0.4296092987060547, |
| "learning_rate": 4.821365148874637e-06, |
| "loss": 0.4597, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.769825918762089, |
| "grad_norm": 0.4359409511089325, |
| "learning_rate": 4.821128192112031e-06, |
| "loss": 0.4615, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.7703094777562862, |
| "grad_norm": 0.4576115608215332, |
| "learning_rate": 4.820891084124321e-06, |
| "loss": 0.4521, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.7707930367504836, |
| "grad_norm": 0.43757978081703186, |
| "learning_rate": 4.820653824926953e-06, |
| "loss": 0.4576, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.7712765957446809, |
| "grad_norm": 0.40932992100715637, |
| "learning_rate": 4.820416414535386e-06, |
| "loss": 0.4452, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.7717601547388782, |
| "grad_norm": 0.43040093779563904, |
| "learning_rate": 4.820178852965088e-06, |
| "loss": 0.4562, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.7722437137330754, |
| "grad_norm": 0.48488855361938477, |
| "learning_rate": 4.8199411402315356e-06, |
| "loss": 0.4612, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.7727272727272727, |
| "grad_norm": 0.43302029371261597, |
| "learning_rate": 4.819703276350217e-06, |
| "loss": 0.4573, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.77321083172147, |
| "grad_norm": 0.49940812587738037, |
| "learning_rate": 4.819465261336629e-06, |
| "loss": 0.4455, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.7736943907156673, |
| "grad_norm": 0.4195043444633484, |
| "learning_rate": 4.819227095206278e-06, |
| "loss": 0.4618, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7741779497098646, |
| "grad_norm": 0.4266219735145569, |
| "learning_rate": 4.818988777974682e-06, |
| "loss": 0.4634, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.7746615087040619, |
| "grad_norm": 0.44857192039489746, |
| "learning_rate": 4.8187503096573674e-06, |
| "loss": 0.4555, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.7751450676982592, |
| "grad_norm": 0.444608211517334, |
| "learning_rate": 4.81851169026987e-06, |
| "loss": 0.4562, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.7756286266924565, |
| "grad_norm": 0.4272661507129669, |
| "learning_rate": 4.818272919827737e-06, |
| "loss": 0.4651, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.7761121856866537, |
| "grad_norm": 0.41964107751846313, |
| "learning_rate": 4.8180339983465256e-06, |
| "loss": 0.4425, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.776595744680851, |
| "grad_norm": 0.6876490116119385, |
| "learning_rate": 4.8177949258418e-06, |
| "loss": 0.4552, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.7770793036750484, |
| "grad_norm": 0.6213021874427795, |
| "learning_rate": 4.8175557023291365e-06, |
| "loss": 0.444, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.7775628626692457, |
| "grad_norm": 0.44791480898857117, |
| "learning_rate": 4.817316327824122e-06, |
| "loss": 0.4477, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.778046421663443, |
| "grad_norm": 0.43226101994514465, |
| "learning_rate": 4.817076802342352e-06, |
| "loss": 0.4505, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.7785299806576402, |
| "grad_norm": 0.42529061436653137, |
| "learning_rate": 4.8168371258994305e-06, |
| "loss": 0.4762, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.7790135396518375, |
| "grad_norm": 0.43460187315940857, |
| "learning_rate": 4.816597298510974e-06, |
| "loss": 0.4569, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.7794970986460348, |
| "grad_norm": 0.9211375117301941, |
| "learning_rate": 4.816357320192608e-06, |
| "loss": 0.4413, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.7799806576402321, |
| "grad_norm": 0.4256466031074524, |
| "learning_rate": 4.816117190959966e-06, |
| "loss": 0.4705, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.7804642166344294, |
| "grad_norm": 0.45035386085510254, |
| "learning_rate": 4.815876910828694e-06, |
| "loss": 0.4625, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.7809477756286267, |
| "grad_norm": 0.446243017911911, |
| "learning_rate": 4.815636479814447e-06, |
| "loss": 0.4587, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.781431334622824, |
| "grad_norm": 0.46117037534713745, |
| "learning_rate": 4.815395897932888e-06, |
| "loss": 0.4632, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.7819148936170213, |
| "grad_norm": 0.417878121137619, |
| "learning_rate": 4.815155165199692e-06, |
| "loss": 0.462, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.7823984526112185, |
| "grad_norm": 0.43329668045043945, |
| "learning_rate": 4.814914281630543e-06, |
| "loss": 0.4555, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.7828820116054158, |
| "grad_norm": 0.41156476736068726, |
| "learning_rate": 4.814673247241135e-06, |
| "loss": 0.4582, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.7833655705996132, |
| "grad_norm": 0.5099841356277466, |
| "learning_rate": 4.814432062047172e-06, |
| "loss": 0.4259, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7838491295938105, |
| "grad_norm": 0.41067326068878174, |
| "learning_rate": 4.814190726064367e-06, |
| "loss": 0.4444, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.7843326885880078, |
| "grad_norm": 0.4209578335285187, |
| "learning_rate": 4.813949239308444e-06, |
| "loss": 0.4651, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.784816247582205, |
| "grad_norm": 0.42823606729507446, |
| "learning_rate": 4.813707601795136e-06, |
| "loss": 0.4594, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.7852998065764023, |
| "grad_norm": 0.4377743899822235, |
| "learning_rate": 4.813465813540186e-06, |
| "loss": 0.4661, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.7857833655705996, |
| "grad_norm": 0.4125308692455292, |
| "learning_rate": 4.8132238745593474e-06, |
| "loss": 0.4528, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.7862669245647969, |
| "grad_norm": 0.42725464701652527, |
| "learning_rate": 4.812981784868383e-06, |
| "loss": 0.4549, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.7867504835589942, |
| "grad_norm": 0.49512672424316406, |
| "learning_rate": 4.812739544483064e-06, |
| "loss": 0.4482, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.7872340425531915, |
| "grad_norm": 0.42611178755760193, |
| "learning_rate": 4.812497153419173e-06, |
| "loss": 0.4514, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.7877176015473888, |
| "grad_norm": 0.4450073838233948, |
| "learning_rate": 4.812254611692504e-06, |
| "loss": 0.4625, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.7882011605415861, |
| "grad_norm": 0.6700217723846436, |
| "learning_rate": 4.812011919318857e-06, |
| "loss": 0.4854, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7886847195357833, |
| "grad_norm": 0.4856039881706238, |
| "learning_rate": 4.811769076314044e-06, |
| "loss": 0.4487, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.7891682785299806, |
| "grad_norm": 0.4218294024467468, |
| "learning_rate": 4.811526082693888e-06, |
| "loss": 0.4488, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.789651837524178, |
| "grad_norm": 0.42976853251457214, |
| "learning_rate": 4.811282938474219e-06, |
| "loss": 0.4566, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.7901353965183753, |
| "grad_norm": 0.4193245470523834, |
| "learning_rate": 4.811039643670878e-06, |
| "loss": 0.4567, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.7906189555125726, |
| "grad_norm": 0.4194696247577667, |
| "learning_rate": 4.810796198299717e-06, |
| "loss": 0.4405, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.7911025145067698, |
| "grad_norm": 0.4444844424724579, |
| "learning_rate": 4.810552602376597e-06, |
| "loss": 0.4647, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.7915860735009671, |
| "grad_norm": 0.4571591019630432, |
| "learning_rate": 4.810308855917388e-06, |
| "loss": 0.4434, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.7920696324951644, |
| "grad_norm": 0.399015337228775, |
| "learning_rate": 4.810064958937971e-06, |
| "loss": 0.431, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.7925531914893617, |
| "grad_norm": 0.45619863271713257, |
| "learning_rate": 4.809820911454236e-06, |
| "loss": 0.4509, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.793036750483559, |
| "grad_norm": 0.427679181098938, |
| "learning_rate": 4.809576713482082e-06, |
| "loss": 0.468, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7935203094777563, |
| "grad_norm": 0.5514840483665466, |
| "learning_rate": 4.809332365037421e-06, |
| "loss": 0.4567, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.7940038684719536, |
| "grad_norm": 0.45355403423309326, |
| "learning_rate": 4.809087866136172e-06, |
| "loss": 0.4705, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.7944874274661509, |
| "grad_norm": 0.4178369343280792, |
| "learning_rate": 4.8088432167942625e-06, |
| "loss": 0.4618, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.7949709864603481, |
| "grad_norm": 0.42189842462539673, |
| "learning_rate": 4.808598417027634e-06, |
| "loss": 0.4481, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.7954545454545454, |
| "grad_norm": 0.4578203856945038, |
| "learning_rate": 4.8083534668522345e-06, |
| "loss": 0.4324, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.7959381044487428, |
| "grad_norm": 0.449862003326416, |
| "learning_rate": 4.808108366284024e-06, |
| "loss": 0.4495, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.7964216634429401, |
| "grad_norm": 0.4230690598487854, |
| "learning_rate": 4.807863115338971e-06, |
| "loss": 0.4615, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.7969052224371374, |
| "grad_norm": 0.4719443917274475, |
| "learning_rate": 4.807617714033053e-06, |
| "loss": 0.4599, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.7973887814313346, |
| "grad_norm": 0.44040265679359436, |
| "learning_rate": 4.807372162382258e-06, |
| "loss": 0.4459, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.7978723404255319, |
| "grad_norm": 0.45189377665519714, |
| "learning_rate": 4.807126460402585e-06, |
| "loss": 0.4593, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7983558994197292, |
| "grad_norm": 0.4428291618824005, |
| "learning_rate": 4.806880608110042e-06, |
| "loss": 0.456, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.7988394584139265, |
| "grad_norm": 0.4866504371166229, |
| "learning_rate": 4.8066346055206465e-06, |
| "loss": 0.4172, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.7993230174081238, |
| "grad_norm": 0.43505120277404785, |
| "learning_rate": 4.806388452650426e-06, |
| "loss": 0.4516, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.7998065764023211, |
| "grad_norm": 0.45473140478134155, |
| "learning_rate": 4.806142149515416e-06, |
| "loss": 0.4614, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.8002901353965184, |
| "grad_norm": 0.5264351963996887, |
| "learning_rate": 4.8058956961316675e-06, |
| "loss": 0.4461, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.8007736943907157, |
| "grad_norm": 0.435194730758667, |
| "learning_rate": 4.805649092515232e-06, |
| "loss": 0.4469, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.8012572533849129, |
| "grad_norm": 0.45554107427597046, |
| "learning_rate": 4.805402338682181e-06, |
| "loss": 0.461, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.8017408123791102, |
| "grad_norm": 0.4560002088546753, |
| "learning_rate": 4.8051554346485885e-06, |
| "loss": 0.4592, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.8022243713733076, |
| "grad_norm": 0.47332048416137695, |
| "learning_rate": 4.804908380430542e-06, |
| "loss": 0.4441, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.8027079303675049, |
| "grad_norm": 0.43564561009407043, |
| "learning_rate": 4.804661176044134e-06, |
| "loss": 0.474, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.8031914893617021, |
| "grad_norm": 0.43015816807746887, |
| "learning_rate": 4.8044138215054755e-06, |
| "loss": 0.4503, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.8036750483558994, |
| "grad_norm": 0.4809452295303345, |
| "learning_rate": 4.804166316830678e-06, |
| "loss": 0.4514, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.8041586073500967, |
| "grad_norm": 0.4462874233722687, |
| "learning_rate": 4.803918662035868e-06, |
| "loss": 0.463, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.804642166344294, |
| "grad_norm": 0.41628143191337585, |
| "learning_rate": 4.803670857137181e-06, |
| "loss": 0.4339, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.8051257253384912, |
| "grad_norm": 0.4212723672389984, |
| "learning_rate": 4.803422902150762e-06, |
| "loss": 0.4722, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.8056092843326886, |
| "grad_norm": 0.44856181740760803, |
| "learning_rate": 4.8031747970927645e-06, |
| "loss": 0.4467, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.8060928433268859, |
| "grad_norm": 0.45024457573890686, |
| "learning_rate": 4.802926541979354e-06, |
| "loss": 0.4616, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.8065764023210832, |
| "grad_norm": 0.42908668518066406, |
| "learning_rate": 4.802678136826704e-06, |
| "loss": 0.4625, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.8070599613152805, |
| "grad_norm": 0.4267566204071045, |
| "learning_rate": 4.8024295816509995e-06, |
| "loss": 0.4548, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.8075435203094777, |
| "grad_norm": 0.4682227373123169, |
| "learning_rate": 4.802180876468433e-06, |
| "loss": 0.4555, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.808027079303675, |
| "grad_norm": 0.7029394507408142, |
| "learning_rate": 4.801932021295209e-06, |
| "loss": 0.4631, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.8085106382978723, |
| "grad_norm": 0.5212095975875854, |
| "learning_rate": 4.801683016147541e-06, |
| "loss": 0.4471, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.8089941972920697, |
| "grad_norm": 0.4278648793697357, |
| "learning_rate": 4.801433861041651e-06, |
| "loss": 0.4652, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.809477756286267, |
| "grad_norm": 0.45294439792633057, |
| "learning_rate": 4.801184555993772e-06, |
| "loss": 0.4419, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.8099613152804642, |
| "grad_norm": 0.4857720136642456, |
| "learning_rate": 4.800935101020148e-06, |
| "loss": 0.4649, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.8104448742746615, |
| "grad_norm": 0.4251551926136017, |
| "learning_rate": 4.800685496137029e-06, |
| "loss": 0.4667, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.8109284332688588, |
| "grad_norm": 0.5644770860671997, |
| "learning_rate": 4.800435741360679e-06, |
| "loss": 0.4417, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.811411992263056, |
| "grad_norm": 0.44446080923080444, |
| "learning_rate": 4.80018583670737e-06, |
| "loss": 0.4592, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.8118955512572534, |
| "grad_norm": 0.42635470628738403, |
| "learning_rate": 4.799935782193383e-06, |
| "loss": 0.4415, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.8123791102514507, |
| "grad_norm": 0.472168892621994, |
| "learning_rate": 4.799685577835009e-06, |
| "loss": 0.4577, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.812862669245648, |
| "grad_norm": 0.40940558910369873, |
| "learning_rate": 4.79943522364855e-06, |
| "loss": 0.4394, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.8133462282398453, |
| "grad_norm": 0.4531543254852295, |
| "learning_rate": 4.799184719650316e-06, |
| "loss": 0.458, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.8138297872340425, |
| "grad_norm": 0.5482609868049622, |
| "learning_rate": 4.79893406585663e-06, |
| "loss": 0.4498, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.8143133462282398, |
| "grad_norm": 0.4192523956298828, |
| "learning_rate": 4.7986832622838195e-06, |
| "loss": 0.4456, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.8147969052224371, |
| "grad_norm": 0.4526176452636719, |
| "learning_rate": 4.798432308948227e-06, |
| "loss": 0.4259, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.8152804642166345, |
| "grad_norm": 0.4501959979534149, |
| "learning_rate": 4.798181205866201e-06, |
| "loss": 0.442, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.8157640232108317, |
| "grad_norm": 0.42557254433631897, |
| "learning_rate": 4.797929953054102e-06, |
| "loss": 0.4685, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.816247582205029, |
| "grad_norm": 0.457403302192688, |
| "learning_rate": 4.7976785505283e-06, |
| "loss": 0.4717, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.8167311411992263, |
| "grad_norm": 0.5547893047332764, |
| "learning_rate": 4.797426998305172e-06, |
| "loss": 0.4608, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.8172147001934236, |
| "grad_norm": 0.43235495686531067, |
| "learning_rate": 4.7971752964011105e-06, |
| "loss": 0.469, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.8176982591876208, |
| "grad_norm": 0.6597929000854492, |
| "learning_rate": 4.796923444832512e-06, |
| "loss": 0.4495, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.8181818181818182, |
| "grad_norm": 0.6541535258293152, |
| "learning_rate": 4.796671443615785e-06, |
| "loss": 0.4659, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.8186653771760155, |
| "grad_norm": 0.45156872272491455, |
| "learning_rate": 4.796419292767349e-06, |
| "loss": 0.4666, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.8191489361702128, |
| "grad_norm": 0.41430842876434326, |
| "learning_rate": 4.7961669923036304e-06, |
| "loss": 0.4534, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.8196324951644101, |
| "grad_norm": 1.6840633153915405, |
| "learning_rate": 4.795914542241069e-06, |
| "loss": 0.4598, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.8201160541586073, |
| "grad_norm": 0.44279929995536804, |
| "learning_rate": 4.7956619425961095e-06, |
| "loss": 0.449, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.8205996131528046, |
| "grad_norm": 0.48392534255981445, |
| "learning_rate": 4.7954091933852124e-06, |
| "loss": 0.4634, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.8210831721470019, |
| "grad_norm": 0.4240199029445648, |
| "learning_rate": 4.795156294624842e-06, |
| "loss": 0.4418, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.8215667311411993, |
| "grad_norm": 0.4629557430744171, |
| "learning_rate": 4.794903246331477e-06, |
| "loss": 0.4592, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.8220502901353965, |
| "grad_norm": 0.41459253430366516, |
| "learning_rate": 4.794650048521603e-06, |
| "loss": 0.4622, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.8225338491295938, |
| "grad_norm": 0.4223582148551941, |
| "learning_rate": 4.794396701211715e-06, |
| "loss": 0.4542, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.8230174081237911, |
| "grad_norm": 0.41140425205230713, |
| "learning_rate": 4.794143204418322e-06, |
| "loss": 0.4479, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.8235009671179884, |
| "grad_norm": 0.45532315969467163, |
| "learning_rate": 4.793889558157937e-06, |
| "loss": 0.4446, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.8239845261121856, |
| "grad_norm": 0.4596729576587677, |
| "learning_rate": 4.793635762447086e-06, |
| "loss": 0.4658, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.824468085106383, |
| "grad_norm": 0.4528654217720032, |
| "learning_rate": 4.793381817302306e-06, |
| "loss": 0.4689, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.8249516441005803, |
| "grad_norm": 0.4420747458934784, |
| "learning_rate": 4.793127722740139e-06, |
| "loss": 0.4459, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.8254352030947776, |
| "grad_norm": 0.4936273992061615, |
| "learning_rate": 4.792873478777143e-06, |
| "loss": 0.4551, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.8259187620889749, |
| "grad_norm": 0.41701599955558777, |
| "learning_rate": 4.792619085429879e-06, |
| "loss": 0.4189, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.8264023210831721, |
| "grad_norm": 0.7268118262290955, |
| "learning_rate": 4.792364542714923e-06, |
| "loss": 0.4482, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.8268858800773694, |
| "grad_norm": 0.4214200973510742, |
| "learning_rate": 4.792109850648859e-06, |
| "loss": 0.4618, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.8273694390715667, |
| "grad_norm": 0.4370262920856476, |
| "learning_rate": 4.791855009248279e-06, |
| "loss": 0.4581, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.8278529980657641, |
| "grad_norm": 0.4265212118625641, |
| "learning_rate": 4.7916000185297885e-06, |
| "loss": 0.4483, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.8283365570599613, |
| "grad_norm": 0.4249550402164459, |
| "learning_rate": 4.791344878509999e-06, |
| "loss": 0.4545, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.8288201160541586, |
| "grad_norm": 0.4179043173789978, |
| "learning_rate": 4.791089589205534e-06, |
| "loss": 0.4481, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.8293036750483559, |
| "grad_norm": 0.42232656478881836, |
| "learning_rate": 4.790834150633025e-06, |
| "loss": 0.4495, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.8297872340425532, |
| "grad_norm": 0.4011172652244568, |
| "learning_rate": 4.790578562809116e-06, |
| "loss": 0.4571, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.8302707930367504, |
| "grad_norm": 0.701311469078064, |
| "learning_rate": 4.7903228257504574e-06, |
| "loss": 0.4622, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.8307543520309478, |
| "grad_norm": 0.4265159070491791, |
| "learning_rate": 4.790066939473711e-06, |
| "loss": 0.4398, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.8312379110251451, |
| "grad_norm": 0.48582372069358826, |
| "learning_rate": 4.78981090399555e-06, |
| "loss": 0.4812, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.8317214700193424, |
| "grad_norm": 0.4251188337802887, |
| "learning_rate": 4.789554719332652e-06, |
| "loss": 0.4429, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.8322050290135397, |
| "grad_norm": 0.4369942843914032, |
| "learning_rate": 4.789298385501712e-06, |
| "loss": 0.4698, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.8326885880077369, |
| "grad_norm": 0.4649386405944824, |
| "learning_rate": 4.789041902519427e-06, |
| "loss": 0.4595, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.8331721470019342, |
| "grad_norm": 0.43647441267967224, |
| "learning_rate": 4.788785270402508e-06, |
| "loss": 0.4301, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.8336557059961315, |
| "grad_norm": 0.4348291754722595, |
| "learning_rate": 4.788528489167677e-06, |
| "loss": 0.4698, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.8341392649903289, |
| "grad_norm": 0.4395955204963684, |
| "learning_rate": 4.788271558831663e-06, |
| "loss": 0.4602, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.8346228239845261, |
| "grad_norm": 0.4474172592163086, |
| "learning_rate": 4.788014479411203e-06, |
| "loss": 0.4531, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.8351063829787234, |
| "grad_norm": 0.4625321924686432, |
| "learning_rate": 4.787757250923049e-06, |
| "loss": 0.4687, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.8355899419729207, |
| "grad_norm": 0.4593714773654938, |
| "learning_rate": 4.7874998733839585e-06, |
| "loss": 0.4457, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.836073500967118, |
| "grad_norm": 0.46115872263908386, |
| "learning_rate": 4.7872423468107e-06, |
| "loss": 0.4609, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.8365570599613152, |
| "grad_norm": 0.4356870949268341, |
| "learning_rate": 4.786984671220053e-06, |
| "loss": 0.474, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.8370406189555126, |
| "grad_norm": 0.4250693619251251, |
| "learning_rate": 4.786726846628804e-06, |
| "loss": 0.448, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.8375241779497099, |
| "grad_norm": 0.433578222990036, |
| "learning_rate": 4.786468873053751e-06, |
| "loss": 0.4563, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.8380077369439072, |
| "grad_norm": 0.40608447790145874, |
| "learning_rate": 4.786210750511701e-06, |
| "loss": 0.4776, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.8384912959381045, |
| "grad_norm": 0.42531365156173706, |
| "learning_rate": 4.785952479019472e-06, |
| "loss": 0.4628, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.8389748549323017, |
| "grad_norm": 0.4353698790073395, |
| "learning_rate": 4.785694058593891e-06, |
| "loss": 0.4355, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.839458413926499, |
| "grad_norm": 0.48642927408218384, |
| "learning_rate": 4.785435489251794e-06, |
| "loss": 0.4357, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.8399419729206963, |
| "grad_norm": 0.4886138439178467, |
| "learning_rate": 4.785176771010026e-06, |
| "loss": 0.4432, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.8404255319148937, |
| "grad_norm": 0.45023098587989807, |
| "learning_rate": 4.784917903885445e-06, |
| "loss": 0.4606, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.8409090909090909, |
| "grad_norm": 0.432458758354187, |
| "learning_rate": 4.7846588878949155e-06, |
| "loss": 0.4678, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.8413926499032882, |
| "grad_norm": 0.8182100653648376, |
| "learning_rate": 4.784399723055313e-06, |
| "loss": 0.4517, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.8418762088974855, |
| "grad_norm": 0.40394729375839233, |
| "learning_rate": 4.784140409383522e-06, |
| "loss": 0.4487, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.8423597678916828, |
| "grad_norm": 0.4333181083202362, |
| "learning_rate": 4.783880946896438e-06, |
| "loss": 0.4647, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.84284332688588, |
| "grad_norm": 0.4345736801624298, |
| "learning_rate": 4.783621335610965e-06, |
| "loss": 0.4709, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.8433268858800773, |
| "grad_norm": 0.4557689130306244, |
| "learning_rate": 4.783361575544017e-06, |
| "loss": 0.4335, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.8438104448742747, |
| "grad_norm": 0.46906688809394836, |
| "learning_rate": 4.783101666712517e-06, |
| "loss": 0.4603, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.844294003868472, |
| "grad_norm": 0.41164135932922363, |
| "learning_rate": 4.7828416091334e-06, |
| "loss": 0.4465, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.8447775628626693, |
| "grad_norm": 0.4292045533657074, |
| "learning_rate": 4.782581402823608e-06, |
| "loss": 0.4614, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.8452611218568665, |
| "grad_norm": 0.4357629716396332, |
| "learning_rate": 4.782321047800094e-06, |
| "loss": 0.4654, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.8457446808510638, |
| "grad_norm": 0.4188762307167053, |
| "learning_rate": 4.782060544079822e-06, |
| "loss": 0.4631, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.8462282398452611, |
| "grad_norm": 0.42249998450279236, |
| "learning_rate": 4.781799891679763e-06, |
| "loss": 0.4467, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.8467117988394585, |
| "grad_norm": 0.42382362484931946, |
| "learning_rate": 4.781539090616898e-06, |
| "loss": 0.4286, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.8471953578336557, |
| "grad_norm": 0.45076820254325867, |
| "learning_rate": 4.78127814090822e-06, |
| "loss": 0.4471, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.847678916827853, |
| "grad_norm": 0.4701208770275116, |
| "learning_rate": 4.781017042570729e-06, |
| "loss": 0.4637, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.8481624758220503, |
| "grad_norm": 0.44319573044776917, |
| "learning_rate": 4.780755795621438e-06, |
| "loss": 0.4604, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.8486460348162476, |
| "grad_norm": 0.4308934509754181, |
| "learning_rate": 4.7804944000773665e-06, |
| "loss": 0.4554, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.8491295938104448, |
| "grad_norm": 0.4575637876987457, |
| "learning_rate": 4.780232855955544e-06, |
| "loss": 0.4506, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.8496131528046421, |
| "grad_norm": 0.43875017762184143, |
| "learning_rate": 4.779971163273012e-06, |
| "loss": 0.4441, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.8500967117988395, |
| "grad_norm": 0.4851578176021576, |
| "learning_rate": 4.779709322046818e-06, |
| "loss": 0.4551, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.8505802707930368, |
| "grad_norm": 0.46814751625061035, |
| "learning_rate": 4.7794473322940234e-06, |
| "loss": 0.407, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 0.4497595727443695, |
| "learning_rate": 4.779185194031698e-06, |
| "loss": 0.4621, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.8515473887814313, |
| "grad_norm": 0.41520148515701294, |
| "learning_rate": 4.778922907276917e-06, |
| "loss": 0.4521, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.8520309477756286, |
| "grad_norm": 0.4281027019023895, |
| "learning_rate": 4.778660472046773e-06, |
| "loss": 0.4625, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.8525145067698259, |
| "grad_norm": 0.4215813875198364, |
| "learning_rate": 4.77839788835836e-06, |
| "loss": 0.4404, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.8529980657640233, |
| "grad_norm": 0.4445689022541046, |
| "learning_rate": 4.77813515622879e-06, |
| "loss": 0.4363, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.8534816247582205, |
| "grad_norm": 0.6075159311294556, |
| "learning_rate": 4.777872275675176e-06, |
| "loss": 0.4429, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.8539651837524178, |
| "grad_norm": 0.4195761978626251, |
| "learning_rate": 4.777609246714648e-06, |
| "loss": 0.4488, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.8544487427466151, |
| "grad_norm": 0.46260061860084534, |
| "learning_rate": 4.777346069364343e-06, |
| "loss": 0.4588, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.8549323017408124, |
| "grad_norm": 0.44670024514198303, |
| "learning_rate": 4.777082743641406e-06, |
| "loss": 0.4381, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.8554158607350096, |
| "grad_norm": 0.44087886810302734, |
| "learning_rate": 4.776819269562992e-06, |
| "loss": 0.4298, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.8558994197292069, |
| "grad_norm": 0.42692020535469055, |
| "learning_rate": 4.776555647146269e-06, |
| "loss": 0.4662, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.8563829787234043, |
| "grad_norm": 0.4216541349887848, |
| "learning_rate": 4.776291876408412e-06, |
| "loss": 0.4815, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.8568665377176016, |
| "grad_norm": 0.4323381781578064, |
| "learning_rate": 4.776027957366605e-06, |
| "loss": 0.4588, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.8573500967117988, |
| "grad_norm": 0.5027583241462708, |
| "learning_rate": 4.775763890038045e-06, |
| "loss": 0.4692, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.8578336557059961, |
| "grad_norm": 0.42592036724090576, |
| "learning_rate": 4.775499674439934e-06, |
| "loss": 0.4538, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.8583172147001934, |
| "grad_norm": 0.40400874614715576, |
| "learning_rate": 4.775235310589487e-06, |
| "loss": 0.4186, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.8588007736943907, |
| "grad_norm": 0.5430548191070557, |
| "learning_rate": 4.774970798503926e-06, |
| "loss": 0.453, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.8592843326885881, |
| "grad_norm": 0.43877115845680237, |
| "learning_rate": 4.774706138200488e-06, |
| "loss": 0.4208, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.8597678916827853, |
| "grad_norm": 0.47961005568504333, |
| "learning_rate": 4.774441329696413e-06, |
| "loss": 0.4471, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.8602514506769826, |
| "grad_norm": 0.4484774172306061, |
| "learning_rate": 4.774176373008955e-06, |
| "loss": 0.4639, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.8607350096711799, |
| "grad_norm": 0.4233231544494629, |
| "learning_rate": 4.7739112681553754e-06, |
| "loss": 0.4733, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.8612185686653772, |
| "grad_norm": 0.4225790500640869, |
| "learning_rate": 4.773646015152947e-06, |
| "loss": 0.475, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.8617021276595744, |
| "grad_norm": 0.45184823870658875, |
| "learning_rate": 4.773380614018952e-06, |
| "loss": 0.4395, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.8621856866537717, |
| "grad_norm": 0.4106232523918152, |
| "learning_rate": 4.773115064770681e-06, |
| "loss": 0.4586, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.8626692456479691, |
| "grad_norm": 0.4108845293521881, |
| "learning_rate": 4.772849367425434e-06, |
| "loss": 0.4443, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.8631528046421664, |
| "grad_norm": 0.43157774209976196, |
| "learning_rate": 4.7725835220005235e-06, |
| "loss": 0.4535, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.8636363636363636, |
| "grad_norm": 0.4426245391368866, |
| "learning_rate": 4.772317528513268e-06, |
| "loss": 0.4661, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.8641199226305609, |
| "grad_norm": 0.4643140137195587, |
| "learning_rate": 4.772051386980998e-06, |
| "loss": 0.4537, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.8646034816247582, |
| "grad_norm": 0.4075002372264862, |
| "learning_rate": 4.7717850974210536e-06, |
| "loss": 0.4448, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.8650870406189555, |
| "grad_norm": 0.42226505279541016, |
| "learning_rate": 4.771518659850784e-06, |
| "loss": 0.4657, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.8655705996131529, |
| "grad_norm": 0.5288849472999573, |
| "learning_rate": 4.7712520742875465e-06, |
| "loss": 0.444, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.8660541586073501, |
| "grad_norm": 0.5637319087982178, |
| "learning_rate": 4.7709853407487105e-06, |
| "loss": 0.4764, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.8665377176015474, |
| "grad_norm": 0.4180159866809845, |
| "learning_rate": 4.770718459251655e-06, |
| "loss": 0.4605, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.8670212765957447, |
| "grad_norm": 0.543476402759552, |
| "learning_rate": 4.770451429813767e-06, |
| "loss": 0.4558, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.867504835589942, |
| "grad_norm": 0.42809557914733887, |
| "learning_rate": 4.770184252452443e-06, |
| "loss": 0.4566, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.8679883945841392, |
| "grad_norm": 0.4013577103614807, |
| "learning_rate": 4.769916927185092e-06, |
| "loss": 0.4363, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.8684719535783365, |
| "grad_norm": 0.40591180324554443, |
| "learning_rate": 4.7696494540291295e-06, |
| "loss": 0.4361, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.8689555125725339, |
| "grad_norm": 0.4472590386867523, |
| "learning_rate": 4.769381833001981e-06, |
| "loss": 0.4439, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.8694390715667312, |
| "grad_norm": 0.435757040977478, |
| "learning_rate": 4.769114064121083e-06, |
| "loss": 0.4494, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.8699226305609284, |
| "grad_norm": 0.45399346947669983, |
| "learning_rate": 4.768846147403883e-06, |
| "loss": 0.4543, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.8704061895551257, |
| "grad_norm": 0.46117937564849854, |
| "learning_rate": 4.768578082867833e-06, |
| "loss": 0.4442, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.870889748549323, |
| "grad_norm": 0.42407071590423584, |
| "learning_rate": 4.7683098705303995e-06, |
| "loss": 0.4448, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.8713733075435203, |
| "grad_norm": 0.44393467903137207, |
| "learning_rate": 4.7680415104090576e-06, |
| "loss": 0.4615, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.8718568665377177, |
| "grad_norm": 0.424594908952713, |
| "learning_rate": 4.767773002521289e-06, |
| "loss": 0.4566, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.8723404255319149, |
| "grad_norm": 0.4583720862865448, |
| "learning_rate": 4.76750434688459e-06, |
| "loss": 0.4618, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.8728239845261122, |
| "grad_norm": 0.3963436484336853, |
| "learning_rate": 4.767235543516463e-06, |
| "loss": 0.4199, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.8733075435203095, |
| "grad_norm": 0.41985222697257996, |
| "learning_rate": 4.7669665924344205e-06, |
| "loss": 0.4495, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.8737911025145068, |
| "grad_norm": 0.47478315234184265, |
| "learning_rate": 4.766697493655985e-06, |
| "loss": 0.4612, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.874274661508704, |
| "grad_norm": 0.42178353667259216, |
| "learning_rate": 4.76642824719869e-06, |
| "loss": 0.4431, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.8747582205029013, |
| "grad_norm": 0.42118799686431885, |
| "learning_rate": 4.766158853080076e-06, |
| "loss": 0.465, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.8752417794970987, |
| "grad_norm": 0.42525774240493774, |
| "learning_rate": 4.765889311317695e-06, |
| "loss": 0.4516, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.875725338491296, |
| "grad_norm": 0.4339666962623596, |
| "learning_rate": 4.765619621929108e-06, |
| "loss": 0.4615, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.8762088974854932, |
| "grad_norm": 0.43213528394699097, |
| "learning_rate": 4.765349784931885e-06, |
| "loss": 0.4276, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.8766924564796905, |
| "grad_norm": 0.406515896320343, |
| "learning_rate": 4.765079800343608e-06, |
| "loss": 0.4411, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.8771760154738878, |
| "grad_norm": 0.47391143441200256, |
| "learning_rate": 4.764809668181866e-06, |
| "loss": 0.4577, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.8776595744680851, |
| "grad_norm": 0.4145541489124298, |
| "learning_rate": 4.764539388464257e-06, |
| "loss": 0.4672, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.8781431334622823, |
| "grad_norm": 0.4604397416114807, |
| "learning_rate": 4.764268961208393e-06, |
| "loss": 0.4697, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.8786266924564797, |
| "grad_norm": 0.4669099748134613, |
| "learning_rate": 4.76399838643189e-06, |
| "loss": 0.4518, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.879110251450677, |
| "grad_norm": 0.4663965702056885, |
| "learning_rate": 4.763727664152378e-06, |
| "loss": 0.4523, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.8795938104448743, |
| "grad_norm": 0.44860151410102844, |
| "learning_rate": 4.763456794387495e-06, |
| "loss": 0.459, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.8800773694390716, |
| "grad_norm": 0.47004613280296326, |
| "learning_rate": 4.7631857771548875e-06, |
| "loss": 0.4593, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.8805609284332688, |
| "grad_norm": 0.4314551055431366, |
| "learning_rate": 4.762914612472214e-06, |
| "loss": 0.4233, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.8810444874274661, |
| "grad_norm": 0.416069895029068, |
| "learning_rate": 4.762643300357141e-06, |
| "loss": 0.4555, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.8815280464216635, |
| "grad_norm": 0.41660386323928833, |
| "learning_rate": 4.762371840827344e-06, |
| "loss": 0.4466, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.8820116054158608, |
| "grad_norm": 0.4352911412715912, |
| "learning_rate": 4.76210023390051e-06, |
| "loss": 0.4431, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.882495164410058, |
| "grad_norm": 0.4292300343513489, |
| "learning_rate": 4.761828479594334e-06, |
| "loss": 0.4661, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.8829787234042553, |
| "grad_norm": 0.4347653388977051, |
| "learning_rate": 4.761556577926522e-06, |
| "loss": 0.4352, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.8834622823984526, |
| "grad_norm": 0.4411576986312866, |
| "learning_rate": 4.761284528914787e-06, |
| "loss": 0.4621, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.8839458413926499, |
| "grad_norm": 0.43334683775901794, |
| "learning_rate": 4.761012332576856e-06, |
| "loss": 0.4608, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.8844294003868471, |
| "grad_norm": 0.450206995010376, |
| "learning_rate": 4.76073998893046e-06, |
| "loss": 0.46, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.8849129593810445, |
| "grad_norm": 0.42695486545562744, |
| "learning_rate": 4.760467497993347e-06, |
| "loss": 0.4461, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.8853965183752418, |
| "grad_norm": 0.4570879638195038, |
| "learning_rate": 4.760194859783266e-06, |
| "loss": 0.4412, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.8858800773694391, |
| "grad_norm": 0.44017043709754944, |
| "learning_rate": 4.759922074317981e-06, |
| "loss": 0.4665, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.8863636363636364, |
| "grad_norm": 0.42846250534057617, |
| "learning_rate": 4.759649141615265e-06, |
| "loss": 0.4623, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.8868471953578336, |
| "grad_norm": 0.4540502727031708, |
| "learning_rate": 4.759376061692899e-06, |
| "loss": 0.4458, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.8873307543520309, |
| "grad_norm": 0.42256003618240356, |
| "learning_rate": 4.7591028345686765e-06, |
| "loss": 0.4248, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.8878143133462283, |
| "grad_norm": 0.4467964470386505, |
| "learning_rate": 4.758829460260397e-06, |
| "loss": 0.4554, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.8882978723404256, |
| "grad_norm": 0.5088194608688354, |
| "learning_rate": 4.758555938785872e-06, |
| "loss": 0.4552, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.8887814313346228, |
| "grad_norm": 0.44091877341270447, |
| "learning_rate": 4.758282270162921e-06, |
| "loss": 0.4683, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.8892649903288201, |
| "grad_norm": 0.4545842111110687, |
| "learning_rate": 4.758008454409374e-06, |
| "loss": 0.4618, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.8897485493230174, |
| "grad_norm": 0.4302009046077728, |
| "learning_rate": 4.757734491543072e-06, |
| "loss": 0.4503, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8902321083172147, |
| "grad_norm": 0.41503089666366577, |
| "learning_rate": 4.7574603815818624e-06, |
| "loss": 0.4373, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.8907156673114119, |
| "grad_norm": 0.5335795283317566, |
| "learning_rate": 4.7571861245436054e-06, |
| "loss": 0.4632, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.8911992263056093, |
| "grad_norm": 0.557690441608429, |
| "learning_rate": 4.756911720446168e-06, |
| "loss": 0.4666, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.8916827852998066, |
| "grad_norm": 0.40970084071159363, |
| "learning_rate": 4.756637169307429e-06, |
| "loss": 0.4389, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.8921663442940039, |
| "grad_norm": 0.4315798282623291, |
| "learning_rate": 4.756362471145275e-06, |
| "loss": 0.4389, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.8926499032882012, |
| "grad_norm": 0.4490053057670593, |
| "learning_rate": 4.756087625977603e-06, |
| "loss": 0.4486, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.8931334622823984, |
| "grad_norm": 0.41619452834129333, |
| "learning_rate": 4.755812633822321e-06, |
| "loss": 0.475, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.8936170212765957, |
| "grad_norm": 0.4929364323616028, |
| "learning_rate": 4.755537494697343e-06, |
| "loss": 0.4422, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.8941005802707931, |
| "grad_norm": 0.4437929093837738, |
| "learning_rate": 4.755262208620597e-06, |
| "loss": 0.4742, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.8945841392649904, |
| "grad_norm": 0.48250436782836914, |
| "learning_rate": 4.7549867756100155e-06, |
| "loss": 0.4388, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.8950676982591876, |
| "grad_norm": 0.41630637645721436, |
| "learning_rate": 4.754711195683547e-06, |
| "loss": 0.4263, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.8955512572533849, |
| "grad_norm": 0.4048013389110565, |
| "learning_rate": 4.754435468859143e-06, |
| "loss": 0.4483, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.8960348162475822, |
| "grad_norm": 0.4773472249507904, |
| "learning_rate": 4.754159595154768e-06, |
| "loss": 0.445, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.8965183752417795, |
| "grad_norm": 0.43505337834358215, |
| "learning_rate": 4.753883574588397e-06, |
| "loss": 0.4539, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.8970019342359767, |
| "grad_norm": 0.42329397797584534, |
| "learning_rate": 4.753607407178012e-06, |
| "loss": 0.4382, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.8974854932301741, |
| "grad_norm": 0.4487643241882324, |
| "learning_rate": 4.753331092941606e-06, |
| "loss": 0.4593, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.8979690522243714, |
| "grad_norm": 0.43058884143829346, |
| "learning_rate": 4.753054631897183e-06, |
| "loss": 0.4562, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.8984526112185687, |
| "grad_norm": 0.440266877412796, |
| "learning_rate": 4.752778024062752e-06, |
| "loss": 0.4274, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.898936170212766, |
| "grad_norm": 0.44400760531425476, |
| "learning_rate": 4.752501269456336e-06, |
| "loss": 0.4624, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.8994197292069632, |
| "grad_norm": 0.4999360740184784, |
| "learning_rate": 4.752224368095965e-06, |
| "loss": 0.4312, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8999032882011605, |
| "grad_norm": 0.4733029901981354, |
| "learning_rate": 4.7519473199996806e-06, |
| "loss": 0.4529, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.9003868471953579, |
| "grad_norm": 0.41087576746940613, |
| "learning_rate": 4.751670125185532e-06, |
| "loss": 0.4427, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.9008704061895552, |
| "grad_norm": 0.42453569173812866, |
| "learning_rate": 4.75139278367158e-06, |
| "loss": 0.4547, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.9013539651837524, |
| "grad_norm": 0.4299073815345764, |
| "learning_rate": 4.751115295475893e-06, |
| "loss": 0.4491, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.9018375241779497, |
| "grad_norm": 0.4565071165561676, |
| "learning_rate": 4.75083766061655e-06, |
| "loss": 0.4319, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.902321083172147, |
| "grad_norm": 0.4117959141731262, |
| "learning_rate": 4.7505598791116395e-06, |
| "loss": 0.4272, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.9028046421663443, |
| "grad_norm": 0.435011625289917, |
| "learning_rate": 4.75028195097926e-06, |
| "loss": 0.4468, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.9032882011605415, |
| "grad_norm": 0.45105016231536865, |
| "learning_rate": 4.750003876237517e-06, |
| "loss": 0.4576, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.9037717601547389, |
| "grad_norm": 0.425686776638031, |
| "learning_rate": 4.749725654904529e-06, |
| "loss": 0.4793, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.9042553191489362, |
| "grad_norm": 0.5148751735687256, |
| "learning_rate": 4.749447286998422e-06, |
| "loss": 0.4732, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.9047388781431335, |
| "grad_norm": 0.41432178020477295, |
| "learning_rate": 4.749168772537333e-06, |
| "loss": 0.44, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.9052224371373307, |
| "grad_norm": 0.4313696324825287, |
| "learning_rate": 4.748890111539407e-06, |
| "loss": 0.4627, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.905705996131528, |
| "grad_norm": 0.43462052941322327, |
| "learning_rate": 4.748611304022799e-06, |
| "loss": 0.4246, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.9061895551257253, |
| "grad_norm": 0.4207950234413147, |
| "learning_rate": 4.748332350005674e-06, |
| "loss": 0.4393, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.9066731141199227, |
| "grad_norm": 0.42336976528167725, |
| "learning_rate": 4.748053249506206e-06, |
| "loss": 0.4682, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.90715667311412, |
| "grad_norm": 0.4382014572620392, |
| "learning_rate": 4.74777400254258e-06, |
| "loss": 0.4258, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.9076402321083172, |
| "grad_norm": 0.48035499453544617, |
| "learning_rate": 4.747494609132987e-06, |
| "loss": 0.4445, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.9081237911025145, |
| "grad_norm": 0.4145694077014923, |
| "learning_rate": 4.747215069295632e-06, |
| "loss": 0.4587, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.9086073500967118, |
| "grad_norm": 0.4445246756076813, |
| "learning_rate": 4.746935383048728e-06, |
| "loss": 0.4547, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.40601107478141785, |
| "learning_rate": 4.746655550410494e-06, |
| "loss": 0.4509, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.9095744680851063, |
| "grad_norm": 0.45228636264801025, |
| "learning_rate": 4.746375571399164e-06, |
| "loss": 0.4469, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.9100580270793037, |
| "grad_norm": 0.4875979721546173, |
| "learning_rate": 4.7460954460329775e-06, |
| "loss": 0.4196, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.910541586073501, |
| "grad_norm": 0.46213170886039734, |
| "learning_rate": 4.7458151743301876e-06, |
| "loss": 0.4709, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.9110251450676983, |
| "grad_norm": 0.47638139128685, |
| "learning_rate": 4.745534756309052e-06, |
| "loss": 0.4437, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.9115087040618955, |
| "grad_norm": 0.43488895893096924, |
| "learning_rate": 4.745254191987842e-06, |
| "loss": 0.4387, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.9119922630560928, |
| "grad_norm": 0.43625906109809875, |
| "learning_rate": 4.744973481384834e-06, |
| "loss": 0.4461, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.9124758220502901, |
| "grad_norm": 0.4320892095565796, |
| "learning_rate": 4.7446926245183215e-06, |
| "loss": 0.4588, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.9129593810444874, |
| "grad_norm": 0.4240933358669281, |
| "learning_rate": 4.744411621406598e-06, |
| "loss": 0.449, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.9134429400386848, |
| "grad_norm": 0.4831683337688446, |
| "learning_rate": 4.744130472067974e-06, |
| "loss": 0.4752, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.913926499032882, |
| "grad_norm": 0.4373767375946045, |
| "learning_rate": 4.743849176520766e-06, |
| "loss": 0.4529, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.9144100580270793, |
| "grad_norm": 0.42155158519744873, |
| "learning_rate": 4.743567734783301e-06, |
| "loss": 0.4264, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.9148936170212766, |
| "grad_norm": 0.42250940203666687, |
| "learning_rate": 4.7432861468739156e-06, |
| "loss": 0.4627, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.9153771760154739, |
| "grad_norm": 0.8477792739868164, |
| "learning_rate": 4.743004412810956e-06, |
| "loss": 0.4652, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.9158607350096711, |
| "grad_norm": 0.42357683181762695, |
| "learning_rate": 4.742722532612775e-06, |
| "loss": 0.4729, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.9163442940038685, |
| "grad_norm": 0.4757964611053467, |
| "learning_rate": 4.7424405062977404e-06, |
| "loss": 0.4491, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.9168278529980658, |
| "grad_norm": 0.4081232249736786, |
| "learning_rate": 4.742158333884227e-06, |
| "loss": 0.4434, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.9173114119922631, |
| "grad_norm": 0.4559086263179779, |
| "learning_rate": 4.741876015390616e-06, |
| "loss": 0.4418, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.9177949709864603, |
| "grad_norm": 0.405843049287796, |
| "learning_rate": 4.741593550835303e-06, |
| "loss": 0.4567, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.9182785299806576, |
| "grad_norm": 0.41238856315612793, |
| "learning_rate": 4.741310940236691e-06, |
| "loss": 0.4312, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.9187620889748549, |
| "grad_norm": 0.4161728620529175, |
| "learning_rate": 4.741028183613192e-06, |
| "loss": 0.4549, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.9192456479690522, |
| "grad_norm": 0.4297192394733429, |
| "learning_rate": 4.7407452809832275e-06, |
| "loss": 0.4578, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.9197292069632496, |
| "grad_norm": 0.5853244066238403, |
| "learning_rate": 4.7404622323652296e-06, |
| "loss": 0.4455, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.9202127659574468, |
| "grad_norm": 0.4154956638813019, |
| "learning_rate": 4.740179037777639e-06, |
| "loss": 0.4658, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.9206963249516441, |
| "grad_norm": 0.41744962334632874, |
| "learning_rate": 4.7398956972389074e-06, |
| "loss": 0.4431, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.9211798839458414, |
| "grad_norm": 0.44503679871559143, |
| "learning_rate": 4.7396122107674935e-06, |
| "loss": 0.4396, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.9216634429400387, |
| "grad_norm": 0.4330887794494629, |
| "learning_rate": 4.739328578381868e-06, |
| "loss": 0.4341, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.9221470019342359, |
| "grad_norm": 0.4313158690929413, |
| "learning_rate": 4.739044800100509e-06, |
| "loss": 0.4565, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.9226305609284333, |
| "grad_norm": 0.43022119998931885, |
| "learning_rate": 4.738760875941905e-06, |
| "loss": 0.4479, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.9231141199226306, |
| "grad_norm": 0.4282204806804657, |
| "learning_rate": 4.738476805924555e-06, |
| "loss": 0.468, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.9235976789168279, |
| "grad_norm": 0.5046052932739258, |
| "learning_rate": 4.738192590066967e-06, |
| "loss": 0.4633, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.9240812379110251, |
| "grad_norm": 0.4380984902381897, |
| "learning_rate": 4.737908228387656e-06, |
| "loss": 0.451, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.9245647969052224, |
| "grad_norm": 0.41640403866767883, |
| "learning_rate": 4.737623720905151e-06, |
| "loss": 0.4307, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.9250483558994197, |
| "grad_norm": 0.4518442153930664, |
| "learning_rate": 4.737339067637987e-06, |
| "loss": 0.4715, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.925531914893617, |
| "grad_norm": 0.4299834966659546, |
| "learning_rate": 4.737054268604709e-06, |
| "loss": 0.4693, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.9260154738878144, |
| "grad_norm": 0.41833609342575073, |
| "learning_rate": 4.736769323823873e-06, |
| "loss": 0.4533, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.9264990328820116, |
| "grad_norm": 0.4507361054420471, |
| "learning_rate": 4.7364842333140436e-06, |
| "loss": 0.4674, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.9269825918762089, |
| "grad_norm": 0.4408568739891052, |
| "learning_rate": 4.736198997093795e-06, |
| "loss": 0.4366, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.9274661508704062, |
| "grad_norm": 0.457612007856369, |
| "learning_rate": 4.7359136151817095e-06, |
| "loss": 0.4611, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.9279497098646035, |
| "grad_norm": 0.45070797204971313, |
| "learning_rate": 4.7356280875963814e-06, |
| "loss": 0.4364, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.9284332688588007, |
| "grad_norm": 0.7539900541305542, |
| "learning_rate": 4.735342414356413e-06, |
| "loss": 0.4404, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.9289168278529981, |
| "grad_norm": 0.43759679794311523, |
| "learning_rate": 4.735056595480417e-06, |
| "loss": 0.4603, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.9294003868471954, |
| "grad_norm": 0.44140011072158813, |
| "learning_rate": 4.734770630987013e-06, |
| "loss": 0.4505, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.9298839458413927, |
| "grad_norm": 0.43896445631980896, |
| "learning_rate": 4.734484520894834e-06, |
| "loss": 0.4397, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.9303675048355899, |
| "grad_norm": 0.4464511573314667, |
| "learning_rate": 4.73419826522252e-06, |
| "loss": 0.4594, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.9308510638297872, |
| "grad_norm": 0.5006834864616394, |
| "learning_rate": 4.7339118639887204e-06, |
| "loss": 0.4569, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.9313346228239845, |
| "grad_norm": 0.4364486038684845, |
| "learning_rate": 4.733625317212095e-06, |
| "loss": 0.4611, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.9318181818181818, |
| "grad_norm": 0.4558824598789215, |
| "learning_rate": 4.733338624911313e-06, |
| "loss": 0.4313, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.9323017408123792, |
| "grad_norm": 0.4253576099872589, |
| "learning_rate": 4.733051787105053e-06, |
| "loss": 0.4533, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.9327852998065764, |
| "grad_norm": 0.44522860646247864, |
| "learning_rate": 4.732764803812002e-06, |
| "loss": 0.4526, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.9332688588007737, |
| "grad_norm": 0.4420976936817169, |
| "learning_rate": 4.73247767505086e-06, |
| "loss": 0.452, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.933752417794971, |
| "grad_norm": 0.493862122297287, |
| "learning_rate": 4.73219040084033e-06, |
| "loss": 0.4504, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.9342359767891683, |
| "grad_norm": 0.4543628990650177, |
| "learning_rate": 4.73190298119913e-06, |
| "loss": 0.4687, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.9347195357833655, |
| "grad_norm": 0.46449723839759827, |
| "learning_rate": 4.731615416145987e-06, |
| "loss": 0.454, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.9352030947775629, |
| "grad_norm": 0.4249908924102783, |
| "learning_rate": 4.731327705699636e-06, |
| "loss": 0.4437, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.9356866537717602, |
| "grad_norm": 0.43488219380378723, |
| "learning_rate": 4.73103984987882e-06, |
| "loss": 0.4578, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.9361702127659575, |
| "grad_norm": 0.4787844121456146, |
| "learning_rate": 4.730751848702294e-06, |
| "loss": 0.4676, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.9366537717601547, |
| "grad_norm": 0.4363221824169159, |
| "learning_rate": 4.730463702188824e-06, |
| "loss": 0.4416, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.937137330754352, |
| "grad_norm": 0.42467185854911804, |
| "learning_rate": 4.73017541035718e-06, |
| "loss": 0.456, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.9376208897485493, |
| "grad_norm": 0.41111892461776733, |
| "learning_rate": 4.729886973226146e-06, |
| "loss": 0.4532, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.9381044487427466, |
| "grad_norm": 0.44362354278564453, |
| "learning_rate": 4.729598390814515e-06, |
| "loss": 0.4422, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.938588007736944, |
| "grad_norm": 0.43182167410850525, |
| "learning_rate": 4.7293096631410875e-06, |
| "loss": 0.4481, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.9390715667311412, |
| "grad_norm": 0.4451581835746765, |
| "learning_rate": 4.729020790224675e-06, |
| "loss": 0.4526, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.9395551257253385, |
| "grad_norm": 0.4309098422527313, |
| "learning_rate": 4.7287317720840974e-06, |
| "loss": 0.4394, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.9400386847195358, |
| "grad_norm": 0.4834847152233124, |
| "learning_rate": 4.728442608738185e-06, |
| "loss": 0.4521, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.940522243713733, |
| "grad_norm": 0.4676097631454468, |
| "learning_rate": 4.728153300205778e-06, |
| "loss": 0.4322, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.9410058027079303, |
| "grad_norm": 0.46261999011039734, |
| "learning_rate": 4.727863846505725e-06, |
| "loss": 0.4534, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.9414893617021277, |
| "grad_norm": 0.5883946418762207, |
| "learning_rate": 4.727574247656883e-06, |
| "loss": 0.4541, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.941972920696325, |
| "grad_norm": 0.41333869099617004, |
| "learning_rate": 4.727284503678121e-06, |
| "loss": 0.453, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.9424564796905223, |
| "grad_norm": 0.45521169900894165, |
| "learning_rate": 4.726994614588316e-06, |
| "loss": 0.4537, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.9429400386847195, |
| "grad_norm": 0.40193212032318115, |
| "learning_rate": 4.726704580406355e-06, |
| "loss": 0.4308, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.9434235976789168, |
| "grad_norm": 0.44298529624938965, |
| "learning_rate": 4.726414401151135e-06, |
| "loss": 0.466, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.9439071566731141, |
| "grad_norm": 0.436103880405426, |
| "learning_rate": 4.7261240768415595e-06, |
| "loss": 0.4465, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.9443907156673114, |
| "grad_norm": 0.414192795753479, |
| "learning_rate": 4.725833607496545e-06, |
| "loss": 0.4574, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.9448742746615088, |
| "grad_norm": 0.41243085265159607, |
| "learning_rate": 4.725542993135015e-06, |
| "loss": 0.4458, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.945357833655706, |
| "grad_norm": 0.4826000928878784, |
| "learning_rate": 4.725252233775905e-06, |
| "loss": 0.4551, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.9458413926499033, |
| "grad_norm": 0.42552217841148376, |
| "learning_rate": 4.724961329438158e-06, |
| "loss": 0.4476, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.9463249516441006, |
| "grad_norm": 0.4663180410861969, |
| "learning_rate": 4.724670280140726e-06, |
| "loss": 0.4567, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.9468085106382979, |
| "grad_norm": 0.6530309319496155, |
| "learning_rate": 4.7243790859025715e-06, |
| "loss": 0.4212, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.9472920696324951, |
| "grad_norm": 0.4663379192352295, |
| "learning_rate": 4.724087746742667e-06, |
| "loss": 0.4397, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.9477756286266924, |
| "grad_norm": 0.4135099947452545, |
| "learning_rate": 4.723796262679994e-06, |
| "loss": 0.4371, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.9482591876208898, |
| "grad_norm": 0.44753551483154297, |
| "learning_rate": 4.7235046337335415e-06, |
| "loss": 0.4543, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.9487427466150871, |
| "grad_norm": 0.42846837639808655, |
| "learning_rate": 4.7232128599223106e-06, |
| "loss": 0.4447, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.9492263056092843, |
| "grad_norm": 0.44100338220596313, |
| "learning_rate": 4.72292094126531e-06, |
| "loss": 0.463, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.9497098646034816, |
| "grad_norm": 0.4311354160308838, |
| "learning_rate": 4.722628877781561e-06, |
| "loss": 0.4407, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.9501934235976789, |
| "grad_norm": 0.44324174523353577, |
| "learning_rate": 4.722336669490089e-06, |
| "loss": 0.449, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.9506769825918762, |
| "grad_norm": 0.4119623601436615, |
| "learning_rate": 4.7220443164099335e-06, |
| "loss": 0.4584, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.9511605415860735, |
| "grad_norm": 0.41248446702957153, |
| "learning_rate": 4.721751818560142e-06, |
| "loss": 0.4311, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.9516441005802708, |
| "grad_norm": 0.40915167331695557, |
| "learning_rate": 4.721459175959769e-06, |
| "loss": 0.4563, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.9521276595744681, |
| "grad_norm": 0.41791391372680664, |
| "learning_rate": 4.721166388627884e-06, |
| "loss": 0.4518, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.9526112185686654, |
| "grad_norm": 0.4374818205833435, |
| "learning_rate": 4.72087345658356e-06, |
| "loss": 0.4581, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.9530947775628626, |
| "grad_norm": 0.38834431767463684, |
| "learning_rate": 4.720580379845884e-06, |
| "loss": 0.4391, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.9535783365570599, |
| "grad_norm": 0.4291421175003052, |
| "learning_rate": 4.720287158433947e-06, |
| "loss": 0.4566, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.9540618955512572, |
| "grad_norm": 0.45957908034324646, |
| "learning_rate": 4.719993792366857e-06, |
| "loss": 0.4325, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.9545454545454546, |
| "grad_norm": 0.4221996068954468, |
| "learning_rate": 4.7197002816637235e-06, |
| "loss": 0.4362, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.9550290135396519, |
| "grad_norm": 0.4279549717903137, |
| "learning_rate": 4.719406626343672e-06, |
| "loss": 0.4326, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.9555125725338491, |
| "grad_norm": 0.4264352023601532, |
| "learning_rate": 4.719112826425834e-06, |
| "loss": 0.455, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.9559961315280464, |
| "grad_norm": 0.43064695596694946, |
| "learning_rate": 4.71881888192935e-06, |
| "loss": 0.4504, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.9564796905222437, |
| "grad_norm": 0.4660034477710724, |
| "learning_rate": 4.718524792873371e-06, |
| "loss": 0.4453, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.956963249516441, |
| "grad_norm": 0.4238419830799103, |
| "learning_rate": 4.718230559277059e-06, |
| "loss": 0.4575, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.9574468085106383, |
| "grad_norm": 0.425527423620224, |
| "learning_rate": 4.717936181159581e-06, |
| "loss": 0.4301, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.9579303675048356, |
| "grad_norm": 0.39952564239501953, |
| "learning_rate": 4.7176416585401195e-06, |
| "loss": 0.4511, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.9584139264990329, |
| "grad_norm": 0.47706300020217896, |
| "learning_rate": 4.717346991437861e-06, |
| "loss": 0.4705, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.9588974854932302, |
| "grad_norm": 0.44544240832328796, |
| "learning_rate": 4.717052179872004e-06, |
| "loss": 0.4625, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.9593810444874274, |
| "grad_norm": 0.422228068113327, |
| "learning_rate": 4.716757223861755e-06, |
| "loss": 0.4447, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.9598646034816247, |
| "grad_norm": 1.7472119331359863, |
| "learning_rate": 4.7164621234263324e-06, |
| "loss": 0.4393, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.960348162475822, |
| "grad_norm": 0.45185375213623047, |
| "learning_rate": 4.716166878584962e-06, |
| "loss": 0.4627, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.9608317214700194, |
| "grad_norm": 0.9011144638061523, |
| "learning_rate": 4.715871489356879e-06, |
| "loss": 0.4511, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.9613152804642167, |
| "grad_norm": 0.4393264949321747, |
| "learning_rate": 4.715575955761328e-06, |
| "loss": 0.4374, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.9617988394584139, |
| "grad_norm": 0.4429907500743866, |
| "learning_rate": 4.715280277817565e-06, |
| "loss": 0.4458, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.9622823984526112, |
| "grad_norm": 0.5249834060668945, |
| "learning_rate": 4.714984455544853e-06, |
| "loss": 0.4126, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.9627659574468085, |
| "grad_norm": 1.696258544921875, |
| "learning_rate": 4.714688488962465e-06, |
| "loss": 0.4596, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.9632495164410058, |
| "grad_norm": 0.4275670647621155, |
| "learning_rate": 4.714392378089684e-06, |
| "loss": 0.4515, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.9637330754352031, |
| "grad_norm": 0.4399387538433075, |
| "learning_rate": 4.7140961229458025e-06, |
| "loss": 0.4523, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.9642166344294004, |
| "grad_norm": 0.4412606358528137, |
| "learning_rate": 4.713799723550121e-06, |
| "loss": 0.431, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.9647001934235977, |
| "grad_norm": 0.4216758906841278, |
| "learning_rate": 4.713503179921951e-06, |
| "loss": 0.4459, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.965183752417795, |
| "grad_norm": 0.432327002286911, |
| "learning_rate": 4.713206492080613e-06, |
| "loss": 0.4641, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.9656673114119922, |
| "grad_norm": 0.4335901141166687, |
| "learning_rate": 4.7129096600454375e-06, |
| "loss": 0.45, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.9661508704061895, |
| "grad_norm": 0.4222829341888428, |
| "learning_rate": 4.712612683835761e-06, |
| "loss": 0.4307, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.9666344294003868, |
| "grad_norm": 0.5455124378204346, |
| "learning_rate": 4.712315563470934e-06, |
| "loss": 0.4572, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.9671179883945842, |
| "grad_norm": 0.4141272008419037, |
| "learning_rate": 4.7120182989703136e-06, |
| "loss": 0.4536, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 12408, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.7155155942178816e+20, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|