| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1491, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002012072434607646, |
| "grad_norm": 12.363840103149414, |
| "learning_rate": 0.0, |
| "loss": 1.2598, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004024144869215292, |
| "grad_norm": 12.238037109375, |
| "learning_rate": 6.666666666666668e-08, |
| "loss": 1.2856, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006036217303822937, |
| "grad_norm": 12.225936889648438, |
| "learning_rate": 1.3333333333333336e-07, |
| "loss": 1.3008, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.008048289738430584, |
| "grad_norm": 11.544827461242676, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.2326, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01006036217303823, |
| "grad_norm": 12.333775520324707, |
| "learning_rate": 2.666666666666667e-07, |
| "loss": 1.2295, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.012072434607645875, |
| "grad_norm": 12.383609771728516, |
| "learning_rate": 3.3333333333333335e-07, |
| "loss": 1.2722, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.014084507042253521, |
| "grad_norm": 11.465510368347168, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.1603, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01609657947686117, |
| "grad_norm": 12.0714750289917, |
| "learning_rate": 4.666666666666667e-07, |
| "loss": 1.2336, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.018108651911468814, |
| "grad_norm": 11.47994327545166, |
| "learning_rate": 5.333333333333335e-07, |
| "loss": 1.2026, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02012072434607646, |
| "grad_norm": 10.58621883392334, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.1641, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.022132796780684104, |
| "grad_norm": 10.854904174804688, |
| "learning_rate": 6.666666666666667e-07, |
| "loss": 1.2054, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02414486921529175, |
| "grad_norm": 10.539307594299316, |
| "learning_rate": 7.333333333333334e-07, |
| "loss": 1.1917, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.026156941649899398, |
| "grad_norm": 8.997591972351074, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.1712, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.028169014084507043, |
| "grad_norm": 8.63853931427002, |
| "learning_rate": 8.666666666666668e-07, |
| "loss": 1.1661, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.030181086519114688, |
| "grad_norm": 8.33820915222168, |
| "learning_rate": 9.333333333333334e-07, |
| "loss": 1.1192, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03219315895372234, |
| "grad_norm": 8.019930839538574, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.1022, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03420523138832998, |
| "grad_norm": 6.020583629608154, |
| "learning_rate": 1.066666666666667e-06, |
| "loss": 1.0401, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03621730382293763, |
| "grad_norm": 5.38732385635376, |
| "learning_rate": 1.1333333333333334e-06, |
| "loss": 0.9759, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03822937625754527, |
| "grad_norm": 5.1723833084106445, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 0.96, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04024144869215292, |
| "grad_norm": 5.272335052490234, |
| "learning_rate": 1.2666666666666669e-06, |
| "loss": 0.9716, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04225352112676056, |
| "grad_norm": 4.846181869506836, |
| "learning_rate": 1.3333333333333334e-06, |
| "loss": 0.942, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04426559356136821, |
| "grad_norm": 4.733026504516602, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 0.9423, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04627766599597585, |
| "grad_norm": 4.3946990966796875, |
| "learning_rate": 1.4666666666666669e-06, |
| "loss": 0.9051, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0482897384305835, |
| "grad_norm": 4.081869125366211, |
| "learning_rate": 1.5333333333333334e-06, |
| "loss": 0.838, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05030181086519115, |
| "grad_norm": 4.333906173706055, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.8396, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.052313883299798795, |
| "grad_norm": 4.418334007263184, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.8865, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05432595573440644, |
| "grad_norm": 3.9022629261016846, |
| "learning_rate": 1.7333333333333336e-06, |
| "loss": 0.8417, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.056338028169014086, |
| "grad_norm": 3.718716621398926, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.792, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.05835010060362173, |
| "grad_norm": 3.3578712940216064, |
| "learning_rate": 1.8666666666666669e-06, |
| "loss": 0.8466, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.060362173038229376, |
| "grad_norm": 3.447502374649048, |
| "learning_rate": 1.9333333333333336e-06, |
| "loss": 0.8104, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06237424547283702, |
| "grad_norm": 3.520570993423462, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.8379, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06438631790744467, |
| "grad_norm": 3.2361371517181396, |
| "learning_rate": 2.0666666666666666e-06, |
| "loss": 0.8015, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06639839034205232, |
| "grad_norm": 3.283750534057617, |
| "learning_rate": 2.133333333333334e-06, |
| "loss": 0.7935, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06841046277665996, |
| "grad_norm": 3.19575834274292, |
| "learning_rate": 2.2e-06, |
| "loss": 0.7224, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07042253521126761, |
| "grad_norm": 3.1006929874420166, |
| "learning_rate": 2.266666666666667e-06, |
| "loss": 0.7664, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07243460764587525, |
| "grad_norm": 3.1626148223876953, |
| "learning_rate": 2.3333333333333336e-06, |
| "loss": 0.7612, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0744466800804829, |
| "grad_norm": 3.043454170227051, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.7359, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.07645875251509054, |
| "grad_norm": 2.9314467906951904, |
| "learning_rate": 2.466666666666667e-06, |
| "loss": 0.7721, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07847082494969819, |
| "grad_norm": 3.1366055011749268, |
| "learning_rate": 2.5333333333333338e-06, |
| "loss": 0.719, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.08048289738430583, |
| "grad_norm": 3.107473850250244, |
| "learning_rate": 2.6e-06, |
| "loss": 0.8246, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08249496981891348, |
| "grad_norm": 2.6806769371032715, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 0.6973, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.08450704225352113, |
| "grad_norm": 2.670893430709839, |
| "learning_rate": 2.7333333333333336e-06, |
| "loss": 0.7593, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.08651911468812877, |
| "grad_norm": 2.6718692779541016, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.7396, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.08853118712273642, |
| "grad_norm": 2.9941153526306152, |
| "learning_rate": 2.866666666666667e-06, |
| "loss": 0.7654, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.09054325955734406, |
| "grad_norm": 2.6897857189178467, |
| "learning_rate": 2.9333333333333338e-06, |
| "loss": 0.6612, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0925553319919517, |
| "grad_norm": 2.886622667312622, |
| "learning_rate": 3e-06, |
| "loss": 0.7474, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.09456740442655935, |
| "grad_norm": 2.6397299766540527, |
| "learning_rate": 3.066666666666667e-06, |
| "loss": 0.704, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.096579476861167, |
| "grad_norm": 2.64058780670166, |
| "learning_rate": 3.133333333333334e-06, |
| "loss": 0.6708, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.09859154929577464, |
| "grad_norm": 3.1715197563171387, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.6346, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.1006036217303823, |
| "grad_norm": 2.7641634941101074, |
| "learning_rate": 3.266666666666667e-06, |
| "loss": 0.6568, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10261569416498995, |
| "grad_norm": 2.6137845516204834, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.6276, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.10462776659959759, |
| "grad_norm": 2.7980453968048096, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.6974, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.10663983903420524, |
| "grad_norm": 2.5735130310058594, |
| "learning_rate": 3.4666666666666672e-06, |
| "loss": 0.7257, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.10865191146881288, |
| "grad_norm": 2.5075342655181885, |
| "learning_rate": 3.5333333333333335e-06, |
| "loss": 0.6615, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.11066398390342053, |
| "grad_norm": 2.779794454574585, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.642, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.11267605633802817, |
| "grad_norm": 2.7019553184509277, |
| "learning_rate": 3.6666666666666666e-06, |
| "loss": 0.6855, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.11468812877263582, |
| "grad_norm": 2.685800075531006, |
| "learning_rate": 3.7333333333333337e-06, |
| "loss": 0.7083, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.11670020120724346, |
| "grad_norm": 2.5412144660949707, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.768, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.11871227364185111, |
| "grad_norm": 2.727508783340454, |
| "learning_rate": 3.866666666666667e-06, |
| "loss": 0.6644, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.12072434607645875, |
| "grad_norm": 2.6290087699890137, |
| "learning_rate": 3.9333333333333335e-06, |
| "loss": 0.7471, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1227364185110664, |
| "grad_norm": 2.714343547821045, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.7178, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.12474849094567404, |
| "grad_norm": 2.59692645072937, |
| "learning_rate": 4.066666666666667e-06, |
| "loss": 0.6653, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1267605633802817, |
| "grad_norm": 2.5672385692596436, |
| "learning_rate": 4.133333333333333e-06, |
| "loss": 0.6388, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.12877263581488935, |
| "grad_norm": 2.3973758220672607, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.6807, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.13078470824949698, |
| "grad_norm": 2.5640907287597656, |
| "learning_rate": 4.266666666666668e-06, |
| "loss": 0.6079, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.13279678068410464, |
| "grad_norm": 2.399198055267334, |
| "learning_rate": 4.333333333333334e-06, |
| "loss": 0.6361, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.13480885311871227, |
| "grad_norm": 2.62172794342041, |
| "learning_rate": 4.4e-06, |
| "loss": 0.6594, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.13682092555331993, |
| "grad_norm": 2.631462574005127, |
| "learning_rate": 4.4666666666666665e-06, |
| "loss": 0.6447, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.13883299798792756, |
| "grad_norm": 2.4494845867156982, |
| "learning_rate": 4.533333333333334e-06, |
| "loss": 0.5988, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.14084507042253522, |
| "grad_norm": 2.6119790077209473, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.6501, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 2.604640483856201, |
| "learning_rate": 4.666666666666667e-06, |
| "loss": 0.6714, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1448692152917505, |
| "grad_norm": 2.5536398887634277, |
| "learning_rate": 4.7333333333333335e-06, |
| "loss": 0.649, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.14688128772635814, |
| "grad_norm": 2.456615686416626, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.6269, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1488933601609658, |
| "grad_norm": 2.5996391773223877, |
| "learning_rate": 4.866666666666667e-06, |
| "loss": 0.6658, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.15090543259557343, |
| "grad_norm": 2.334995985031128, |
| "learning_rate": 4.933333333333334e-06, |
| "loss": 0.579, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1529175050301811, |
| "grad_norm": 2.5622453689575195, |
| "learning_rate": 5e-06, |
| "loss": 0.6378, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.15492957746478872, |
| "grad_norm": 2.5427086353302, |
| "learning_rate": 5.0666666666666676e-06, |
| "loss": 0.6418, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.15694164989939638, |
| "grad_norm": 2.5094008445739746, |
| "learning_rate": 5.133333333333334e-06, |
| "loss": 0.6471, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.158953722334004, |
| "grad_norm": 2.7548811435699463, |
| "learning_rate": 5.2e-06, |
| "loss": 0.6631, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.16096579476861167, |
| "grad_norm": 2.6184520721435547, |
| "learning_rate": 5.2666666666666665e-06, |
| "loss": 0.6779, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.16297786720321933, |
| "grad_norm": 2.7182962894439697, |
| "learning_rate": 5.333333333333334e-06, |
| "loss": 0.6641, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.16498993963782696, |
| "grad_norm": 2.5088016986846924, |
| "learning_rate": 5.400000000000001e-06, |
| "loss": 0.6771, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.16700201207243462, |
| "grad_norm": 2.573153495788574, |
| "learning_rate": 5.466666666666667e-06, |
| "loss": 0.6399, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.16901408450704225, |
| "grad_norm": 2.4727790355682373, |
| "learning_rate": 5.533333333333334e-06, |
| "loss": 0.6326, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1710261569416499, |
| "grad_norm": 2.5456035137176514, |
| "learning_rate": 5.600000000000001e-06, |
| "loss": 0.648, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.17303822937625754, |
| "grad_norm": 2.272167444229126, |
| "learning_rate": 5.666666666666667e-06, |
| "loss": 0.5667, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1750503018108652, |
| "grad_norm": 2.5209145545959473, |
| "learning_rate": 5.733333333333334e-06, |
| "loss": 0.6212, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.17706237424547283, |
| "grad_norm": 2.570265054702759, |
| "learning_rate": 5.8e-06, |
| "loss": 0.6653, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.1790744466800805, |
| "grad_norm": 2.527291774749756, |
| "learning_rate": 5.8666666666666675e-06, |
| "loss": 0.6653, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.18108651911468812, |
| "grad_norm": 2.5215442180633545, |
| "learning_rate": 5.933333333333335e-06, |
| "loss": 0.6097, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.18309859154929578, |
| "grad_norm": 2.7476773262023926, |
| "learning_rate": 6e-06, |
| "loss": 0.6701, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1851106639839034, |
| "grad_norm": 2.5635337829589844, |
| "learning_rate": 6.066666666666667e-06, |
| "loss": 0.6136, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.18712273641851107, |
| "grad_norm": 2.61013126373291, |
| "learning_rate": 6.133333333333334e-06, |
| "loss": 0.6636, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.1891348088531187, |
| "grad_norm": 2.596705198287964, |
| "learning_rate": 6.200000000000001e-06, |
| "loss": 0.6145, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.19114688128772636, |
| "grad_norm": 2.6027705669403076, |
| "learning_rate": 6.266666666666668e-06, |
| "loss": 0.6379, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.193158953722334, |
| "grad_norm": 2.587643623352051, |
| "learning_rate": 6.333333333333333e-06, |
| "loss": 0.6574, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.19517102615694165, |
| "grad_norm": 2.42325758934021, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 0.6293, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.19718309859154928, |
| "grad_norm": 2.4672398567199707, |
| "learning_rate": 6.466666666666667e-06, |
| "loss": 0.5981, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.19919517102615694, |
| "grad_norm": 2.478847026824951, |
| "learning_rate": 6.533333333333334e-06, |
| "loss": 0.6141, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2012072434607646, |
| "grad_norm": 2.40338397026062, |
| "learning_rate": 6.600000000000001e-06, |
| "loss": 0.6513, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.20321931589537223, |
| "grad_norm": 2.523690700531006, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.6469, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2052313883299799, |
| "grad_norm": 2.5886921882629395, |
| "learning_rate": 6.733333333333334e-06, |
| "loss": 0.6215, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.20724346076458752, |
| "grad_norm": 2.5619165897369385, |
| "learning_rate": 6.800000000000001e-06, |
| "loss": 0.645, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.20925553319919518, |
| "grad_norm": 2.5696005821228027, |
| "learning_rate": 6.866666666666667e-06, |
| "loss": 0.6091, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.2112676056338028, |
| "grad_norm": 2.725149393081665, |
| "learning_rate": 6.9333333333333344e-06, |
| "loss": 0.697, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.21327967806841047, |
| "grad_norm": 2.7614009380340576, |
| "learning_rate": 7e-06, |
| "loss": 0.6294, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.2152917505030181, |
| "grad_norm": 2.488131523132324, |
| "learning_rate": 7.066666666666667e-06, |
| "loss": 0.625, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.21730382293762576, |
| "grad_norm": 2.4684252738952637, |
| "learning_rate": 7.133333333333334e-06, |
| "loss": 0.6594, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.2193158953722334, |
| "grad_norm": 2.5597586631774902, |
| "learning_rate": 7.2000000000000005e-06, |
| "loss": 0.6692, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.22132796780684105, |
| "grad_norm": 2.429131031036377, |
| "learning_rate": 7.266666666666668e-06, |
| "loss": 0.6566, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.22334004024144868, |
| "grad_norm": 2.4478707313537598, |
| "learning_rate": 7.333333333333333e-06, |
| "loss": 0.6316, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.22535211267605634, |
| "grad_norm": 2.4292643070220947, |
| "learning_rate": 7.4e-06, |
| "loss": 0.6437, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.22736418511066397, |
| "grad_norm": 2.5762760639190674, |
| "learning_rate": 7.4666666666666675e-06, |
| "loss": 0.6179, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.22937625754527163, |
| "grad_norm": 2.5746638774871826, |
| "learning_rate": 7.533333333333334e-06, |
| "loss": 0.6415, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.23138832997987926, |
| "grad_norm": 2.685413122177124, |
| "learning_rate": 7.600000000000001e-06, |
| "loss": 0.6635, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.23340040241448692, |
| "grad_norm": 2.37715744972229, |
| "learning_rate": 7.666666666666667e-06, |
| "loss": 0.6124, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.23541247484909456, |
| "grad_norm": 2.478545904159546, |
| "learning_rate": 7.733333333333334e-06, |
| "loss": 0.6226, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.23742454728370221, |
| "grad_norm": 2.634754180908203, |
| "learning_rate": 7.800000000000002e-06, |
| "loss": 0.6772, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.23943661971830985, |
| "grad_norm": 2.674330949783325, |
| "learning_rate": 7.866666666666667e-06, |
| "loss": 0.657, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2414486921529175, |
| "grad_norm": 2.785724401473999, |
| "learning_rate": 7.933333333333334e-06, |
| "loss": 0.5971, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.24346076458752516, |
| "grad_norm": 2.7215487957000732, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.6228, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.2454728370221328, |
| "grad_norm": 2.535701274871826, |
| "learning_rate": 8.066666666666667e-06, |
| "loss": 0.6479, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.24748490945674045, |
| "grad_norm": 2.7002458572387695, |
| "learning_rate": 8.133333333333334e-06, |
| "loss": 0.6286, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.24949698189134809, |
| "grad_norm": 2.5765464305877686, |
| "learning_rate": 8.2e-06, |
| "loss": 0.5909, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2515090543259557, |
| "grad_norm": 2.7955777645111084, |
| "learning_rate": 8.266666666666667e-06, |
| "loss": 0.6271, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2535211267605634, |
| "grad_norm": 2.583167552947998, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.6438, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.25553319919517103, |
| "grad_norm": 2.5065181255340576, |
| "learning_rate": 8.400000000000001e-06, |
| "loss": 0.6421, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.2575452716297787, |
| "grad_norm": 2.5843505859375, |
| "learning_rate": 8.466666666666668e-06, |
| "loss": 0.6277, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2595573440643863, |
| "grad_norm": 2.729172468185425, |
| "learning_rate": 8.533333333333335e-06, |
| "loss": 0.6338, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.26156941649899396, |
| "grad_norm": 2.566673755645752, |
| "learning_rate": 8.6e-06, |
| "loss": 0.6624, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2635814889336016, |
| "grad_norm": 2.436913251876831, |
| "learning_rate": 8.666666666666668e-06, |
| "loss": 0.5772, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.2655935613682093, |
| "grad_norm": 2.3920977115631104, |
| "learning_rate": 8.733333333333333e-06, |
| "loss": 0.6181, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2676056338028169, |
| "grad_norm": 2.6135761737823486, |
| "learning_rate": 8.8e-06, |
| "loss": 0.6439, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.26961770623742454, |
| "grad_norm": 2.5263290405273438, |
| "learning_rate": 8.866666666666668e-06, |
| "loss": 0.6164, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.2716297786720322, |
| "grad_norm": 2.5991945266723633, |
| "learning_rate": 8.933333333333333e-06, |
| "loss": 0.6352, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.27364185110663986, |
| "grad_norm": 2.720930576324463, |
| "learning_rate": 9e-06, |
| "loss": 0.6283, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.27565392354124746, |
| "grad_norm": 2.5199596881866455, |
| "learning_rate": 9.066666666666667e-06, |
| "loss": 0.6374, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2776659959758551, |
| "grad_norm": 2.7811954021453857, |
| "learning_rate": 9.133333333333335e-06, |
| "loss": 0.6282, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.2796780684104628, |
| "grad_norm": 2.6453726291656494, |
| "learning_rate": 9.200000000000002e-06, |
| "loss": 0.6371, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.28169014084507044, |
| "grad_norm": 2.6417572498321533, |
| "learning_rate": 9.266666666666667e-06, |
| "loss": 0.6413, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2837022132796781, |
| "grad_norm": 2.5948872566223145, |
| "learning_rate": 9.333333333333334e-06, |
| "loss": 0.6444, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 2.462456464767456, |
| "learning_rate": 9.4e-06, |
| "loss": 0.6641, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.28772635814889336, |
| "grad_norm": 2.4424092769622803, |
| "learning_rate": 9.466666666666667e-06, |
| "loss": 0.611, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.289738430583501, |
| "grad_norm": 2.3720812797546387, |
| "learning_rate": 9.533333333333334e-06, |
| "loss": 0.5783, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.2917505030181087, |
| "grad_norm": 2.604189872741699, |
| "learning_rate": 9.600000000000001e-06, |
| "loss": 0.6717, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.2937625754527163, |
| "grad_norm": 2.4482827186584473, |
| "learning_rate": 9.666666666666667e-06, |
| "loss": 0.6396, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.29577464788732394, |
| "grad_norm": 2.364368200302124, |
| "learning_rate": 9.733333333333334e-06, |
| "loss": 0.6, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2977867203219316, |
| "grad_norm": 2.4040334224700928, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 0.6545, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.29979879275653926, |
| "grad_norm": 2.6192374229431152, |
| "learning_rate": 9.866666666666668e-06, |
| "loss": 0.6403, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.30181086519114686, |
| "grad_norm": 2.255182981491089, |
| "learning_rate": 9.933333333333334e-06, |
| "loss": 0.6026, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3038229376257545, |
| "grad_norm": 2.4689016342163086, |
| "learning_rate": 1e-05, |
| "loss": 0.6061, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.3058350100603622, |
| "grad_norm": 2.5769577026367188, |
| "learning_rate": 9.999986279118938e-06, |
| "loss": 0.6053, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.30784708249496984, |
| "grad_norm": 2.4138309955596924, |
| "learning_rate": 9.999945116551056e-06, |
| "loss": 0.6489, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.30985915492957744, |
| "grad_norm": 2.449880599975586, |
| "learning_rate": 9.999876512522269e-06, |
| "loss": 0.6062, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3118712273641851, |
| "grad_norm": 2.315631866455078, |
| "learning_rate": 9.9997804674091e-06, |
| "loss": 0.5896, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.31388329979879276, |
| "grad_norm": 2.8500030040740967, |
| "learning_rate": 9.999656981738679e-06, |
| "loss": 0.6558, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3158953722334004, |
| "grad_norm": 2.3704442977905273, |
| "learning_rate": 9.999506056188736e-06, |
| "loss": 0.6401, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.317907444668008, |
| "grad_norm": 2.323148488998413, |
| "learning_rate": 9.999327691587609e-06, |
| "loss": 0.6136, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3199195171026157, |
| "grad_norm": 2.4699552059173584, |
| "learning_rate": 9.99912188891422e-06, |
| "loss": 0.6254, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.32193158953722334, |
| "grad_norm": 2.135148525238037, |
| "learning_rate": 9.99888864929809e-06, |
| "loss": 0.6152, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.323943661971831, |
| "grad_norm": 2.483369827270508, |
| "learning_rate": 9.998627974019322e-06, |
| "loss": 0.5779, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.32595573440643866, |
| "grad_norm": 2.4658560752868652, |
| "learning_rate": 9.99833986450859e-06, |
| "loss": 0.6304, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.32796780684104626, |
| "grad_norm": 2.4425296783447266, |
| "learning_rate": 9.99802432234714e-06, |
| "loss": 0.6123, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3299798792756539, |
| "grad_norm": 2.291313648223877, |
| "learning_rate": 9.997681349266782e-06, |
| "loss": 0.6, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.3319919517102616, |
| "grad_norm": 2.4326376914978027, |
| "learning_rate": 9.997310947149872e-06, |
| "loss": 0.5919, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.33400402414486924, |
| "grad_norm": 2.344089984893799, |
| "learning_rate": 9.996913118029306e-06, |
| "loss": 0.6441, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.33601609657947684, |
| "grad_norm": 2.360903024673462, |
| "learning_rate": 9.996487864088512e-06, |
| "loss": 0.6209, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3380281690140845, |
| "grad_norm": 2.3726160526275635, |
| "learning_rate": 9.996035187661433e-06, |
| "loss": 0.5881, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.34004024144869216, |
| "grad_norm": 2.3640172481536865, |
| "learning_rate": 9.995555091232516e-06, |
| "loss": 0.6319, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.3420523138832998, |
| "grad_norm": 2.4159719944000244, |
| "learning_rate": 9.9950475774367e-06, |
| "loss": 0.6403, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3440643863179074, |
| "grad_norm": 2.472817897796631, |
| "learning_rate": 9.994512649059401e-06, |
| "loss": 0.592, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.3460764587525151, |
| "grad_norm": 2.400581121444702, |
| "learning_rate": 9.99395030903649e-06, |
| "loss": 0.6134, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.34808853118712274, |
| "grad_norm": 2.537536382675171, |
| "learning_rate": 9.993360560454293e-06, |
| "loss": 0.6273, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3501006036217304, |
| "grad_norm": 2.3792333602905273, |
| "learning_rate": 9.992743406549556e-06, |
| "loss": 0.6072, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.352112676056338, |
| "grad_norm": 2.3866188526153564, |
| "learning_rate": 9.992098850709434e-06, |
| "loss": 0.6412, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.35412474849094566, |
| "grad_norm": 2.3259477615356445, |
| "learning_rate": 9.99142689647148e-06, |
| "loss": 0.6485, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.3561368209255533, |
| "grad_norm": 2.6365954875946045, |
| "learning_rate": 9.990727547523616e-06, |
| "loss": 0.6391, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.358148893360161, |
| "grad_norm": 2.523137331008911, |
| "learning_rate": 9.990000807704114e-06, |
| "loss": 0.6138, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.36016096579476864, |
| "grad_norm": 2.461246967315674, |
| "learning_rate": 9.989246681001577e-06, |
| "loss": 0.6051, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.36217303822937624, |
| "grad_norm": 2.1713852882385254, |
| "learning_rate": 9.988465171554921e-06, |
| "loss": 0.5876, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.3641851106639839, |
| "grad_norm": 2.6124799251556396, |
| "learning_rate": 9.987656283653344e-06, |
| "loss": 0.6481, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.36619718309859156, |
| "grad_norm": 2.4890964031219482, |
| "learning_rate": 9.986820021736306e-06, |
| "loss": 0.6064, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3682092555331992, |
| "grad_norm": 2.3688108921051025, |
| "learning_rate": 9.985956390393511e-06, |
| "loss": 0.6046, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3702213279678068, |
| "grad_norm": 2.48551344871521, |
| "learning_rate": 9.985065394364869e-06, |
| "loss": 0.6489, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3722334004024145, |
| "grad_norm": 2.2956740856170654, |
| "learning_rate": 9.984147038540482e-06, |
| "loss": 0.6279, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.37424547283702214, |
| "grad_norm": 2.439910411834717, |
| "learning_rate": 9.983201327960607e-06, |
| "loss": 0.6137, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.3762575452716298, |
| "grad_norm": 2.5276641845703125, |
| "learning_rate": 9.982228267815644e-06, |
| "loss": 0.6155, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.3782696177062374, |
| "grad_norm": 2.286393165588379, |
| "learning_rate": 9.981227863446082e-06, |
| "loss": 0.5831, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.38028169014084506, |
| "grad_norm": 2.3950138092041016, |
| "learning_rate": 9.980200120342499e-06, |
| "loss": 0.6125, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.3822937625754527, |
| "grad_norm": 2.266212224960327, |
| "learning_rate": 9.979145044145506e-06, |
| "loss": 0.6074, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3843058350100604, |
| "grad_norm": 2.353178024291992, |
| "learning_rate": 9.978062640645737e-06, |
| "loss": 0.6534, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.386317907444668, |
| "grad_norm": 2.330751657485962, |
| "learning_rate": 9.976952915783804e-06, |
| "loss": 0.6001, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.38832997987927564, |
| "grad_norm": 2.3842129707336426, |
| "learning_rate": 9.975815875650265e-06, |
| "loss": 0.5859, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3903420523138833, |
| "grad_norm": 2.396599292755127, |
| "learning_rate": 9.9746515264856e-06, |
| "loss": 0.6097, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.39235412474849096, |
| "grad_norm": 2.323148250579834, |
| "learning_rate": 9.973459874680167e-06, |
| "loss": 0.6033, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.39436619718309857, |
| "grad_norm": 2.5869362354278564, |
| "learning_rate": 9.972240926774167e-06, |
| "loss": 0.5759, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3963782696177062, |
| "grad_norm": 2.671992540359497, |
| "learning_rate": 9.970994689457623e-06, |
| "loss": 0.5771, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.3983903420523139, |
| "grad_norm": 2.518411159515381, |
| "learning_rate": 9.969721169570319e-06, |
| "loss": 0.6368, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.40040241448692154, |
| "grad_norm": 2.2937800884246826, |
| "learning_rate": 9.968420374101782e-06, |
| "loss": 0.6316, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.4024144869215292, |
| "grad_norm": 2.3399505615234375, |
| "learning_rate": 9.967092310191237e-06, |
| "loss": 0.602, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4044265593561368, |
| "grad_norm": 2.2086527347564697, |
| "learning_rate": 9.965736985127568e-06, |
| "loss": 0.5983, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.40643863179074446, |
| "grad_norm": 2.1889898777008057, |
| "learning_rate": 9.964354406349272e-06, |
| "loss": 0.6179, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4084507042253521, |
| "grad_norm": 2.510423421859741, |
| "learning_rate": 9.962944581444433e-06, |
| "loss": 0.607, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.4104627766599598, |
| "grad_norm": 2.23941707611084, |
| "learning_rate": 9.961507518150666e-06, |
| "loss": 0.6072, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4124748490945674, |
| "grad_norm": 2.304394483566284, |
| "learning_rate": 9.960043224355081e-06, |
| "loss": 0.56, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.41448692152917505, |
| "grad_norm": 2.3394389152526855, |
| "learning_rate": 9.958551708094237e-06, |
| "loss": 0.5895, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.4164989939637827, |
| "grad_norm": 2.3602826595306396, |
| "learning_rate": 9.9570329775541e-06, |
| "loss": 0.6176, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.41851106639839036, |
| "grad_norm": 2.3634259700775146, |
| "learning_rate": 9.955487041070003e-06, |
| "loss": 0.6063, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.42052313883299797, |
| "grad_norm": 2.2722222805023193, |
| "learning_rate": 9.953913907126584e-06, |
| "loss": 0.5682, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.4225352112676056, |
| "grad_norm": 2.2542028427124023, |
| "learning_rate": 9.952313584357763e-06, |
| "loss": 0.6419, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4245472837022133, |
| "grad_norm": 2.452004909515381, |
| "learning_rate": 9.95068608154667e-06, |
| "loss": 0.5832, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.42655935613682094, |
| "grad_norm": 2.302015542984009, |
| "learning_rate": 9.949031407625616e-06, |
| "loss": 0.6485, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 2.347144842147827, |
| "learning_rate": 9.947349571676037e-06, |
| "loss": 0.6086, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.4305835010060362, |
| "grad_norm": 2.3000502586364746, |
| "learning_rate": 9.945640582928438e-06, |
| "loss": 0.5909, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.43259557344064387, |
| "grad_norm": 2.3875091075897217, |
| "learning_rate": 9.943904450762351e-06, |
| "loss": 0.6138, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.4346076458752515, |
| "grad_norm": 2.4650800228118896, |
| "learning_rate": 9.942141184706286e-06, |
| "loss": 0.5952, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.43661971830985913, |
| "grad_norm": 2.5489614009857178, |
| "learning_rate": 9.940350794437663e-06, |
| "loss": 0.6147, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.4386317907444668, |
| "grad_norm": 2.565382957458496, |
| "learning_rate": 9.938533289782778e-06, |
| "loss": 0.6167, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.44064386317907445, |
| "grad_norm": 2.7737607955932617, |
| "learning_rate": 9.936688680716737e-06, |
| "loss": 0.6235, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.4426559356136821, |
| "grad_norm": 2.4090120792388916, |
| "learning_rate": 9.934816977363404e-06, |
| "loss": 0.6042, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.44466800804828976, |
| "grad_norm": 2.378023147583008, |
| "learning_rate": 9.932918189995345e-06, |
| "loss": 0.5705, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.44668008048289737, |
| "grad_norm": 2.137075424194336, |
| "learning_rate": 9.930992329033777e-06, |
| "loss": 0.5828, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.448692152917505, |
| "grad_norm": 2.5460665225982666, |
| "learning_rate": 9.929039405048502e-06, |
| "loss": 0.6181, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.4507042253521127, |
| "grad_norm": 2.4987435340881348, |
| "learning_rate": 9.927059428757857e-06, |
| "loss": 0.5926, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.45271629778672035, |
| "grad_norm": 2.2808916568756104, |
| "learning_rate": 9.925052411028646e-06, |
| "loss": 0.5412, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.45472837022132795, |
| "grad_norm": 2.6912238597869873, |
| "learning_rate": 9.923018362876093e-06, |
| "loss": 0.6123, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.4567404426559356, |
| "grad_norm": 2.254833459854126, |
| "learning_rate": 9.920957295463772e-06, |
| "loss": 0.5876, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.45875251509054327, |
| "grad_norm": 2.572598695755005, |
| "learning_rate": 9.918869220103542e-06, |
| "loss": 0.6207, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4607645875251509, |
| "grad_norm": 2.448763608932495, |
| "learning_rate": 9.916754148255501e-06, |
| "loss": 0.6001, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.46277665995975853, |
| "grad_norm": 2.2648446559906006, |
| "learning_rate": 9.914612091527908e-06, |
| "loss": 0.6357, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4647887323943662, |
| "grad_norm": 2.6665232181549072, |
| "learning_rate": 9.912443061677125e-06, |
| "loss": 0.5835, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.46680080482897385, |
| "grad_norm": 2.5652852058410645, |
| "learning_rate": 9.91024707060755e-06, |
| "loss": 0.5807, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4688128772635815, |
| "grad_norm": 2.583991289138794, |
| "learning_rate": 9.90802413037156e-06, |
| "loss": 0.5885, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.4708249496981891, |
| "grad_norm": 2.55000638961792, |
| "learning_rate": 9.905774253169433e-06, |
| "loss": 0.5982, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.47283702213279677, |
| "grad_norm": 2.4194183349609375, |
| "learning_rate": 9.903497451349286e-06, |
| "loss": 0.5808, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.47484909456740443, |
| "grad_norm": 2.1875178813934326, |
| "learning_rate": 9.901193737407011e-06, |
| "loss": 0.6064, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.4768611670020121, |
| "grad_norm": 2.475219488143921, |
| "learning_rate": 9.898863123986203e-06, |
| "loss": 0.6258, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4788732394366197, |
| "grad_norm": 2.3584680557250977, |
| "learning_rate": 9.896505623878088e-06, |
| "loss": 0.5774, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.48088531187122735, |
| "grad_norm": 2.2012126445770264, |
| "learning_rate": 9.89412125002146e-06, |
| "loss": 0.5675, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.482897384305835, |
| "grad_norm": 2.501800537109375, |
| "learning_rate": 9.8917100155026e-06, |
| "loss": 0.5434, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.48490945674044267, |
| "grad_norm": 2.6421823501586914, |
| "learning_rate": 9.889271933555214e-06, |
| "loss": 0.6171, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.4869215291750503, |
| "grad_norm": 2.3850109577178955, |
| "learning_rate": 9.886807017560356e-06, |
| "loss": 0.5794, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.48893360160965793, |
| "grad_norm": 2.1486642360687256, |
| "learning_rate": 9.884315281046352e-06, |
| "loss": 0.5469, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.4909456740442656, |
| "grad_norm": 2.2797324657440186, |
| "learning_rate": 9.881796737688732e-06, |
| "loss": 0.5792, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.49295774647887325, |
| "grad_norm": 2.6412172317504883, |
| "learning_rate": 9.879251401310148e-06, |
| "loss": 0.6719, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.4949698189134809, |
| "grad_norm": 2.5547709465026855, |
| "learning_rate": 9.876679285880304e-06, |
| "loss": 0.6059, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.4969818913480885, |
| "grad_norm": 2.522813320159912, |
| "learning_rate": 9.874080405515874e-06, |
| "loss": 0.6261, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.49899396378269617, |
| "grad_norm": 2.457695960998535, |
| "learning_rate": 9.871454774480433e-06, |
| "loss": 0.6169, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5010060362173038, |
| "grad_norm": 2.424248456954956, |
| "learning_rate": 9.868802407184367e-06, |
| "loss": 0.5937, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5030181086519114, |
| "grad_norm": 2.377311944961548, |
| "learning_rate": 9.866123318184803e-06, |
| "loss": 0.5936, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5050301810865191, |
| "grad_norm": 2.4924814701080322, |
| "learning_rate": 9.863417522185525e-06, |
| "loss": 0.5936, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5070422535211268, |
| "grad_norm": 2.4591898918151855, |
| "learning_rate": 9.860685034036897e-06, |
| "loss": 0.611, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5090543259557344, |
| "grad_norm": 2.0778732299804688, |
| "learning_rate": 9.857925868735774e-06, |
| "loss": 0.5892, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5110663983903421, |
| "grad_norm": 2.6331255435943604, |
| "learning_rate": 9.855140041425428e-06, |
| "loss": 0.6307, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5130784708249497, |
| "grad_norm": 2.3365707397460938, |
| "learning_rate": 9.852327567395463e-06, |
| "loss": 0.5972, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5150905432595574, |
| "grad_norm": 2.3462042808532715, |
| "learning_rate": 9.84948846208173e-06, |
| "loss": 0.6209, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5171026156941649, |
| "grad_norm": 2.4540977478027344, |
| "learning_rate": 9.846622741066232e-06, |
| "loss": 0.6274, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5191146881287726, |
| "grad_norm": 2.2751944065093994, |
| "learning_rate": 9.843730420077061e-06, |
| "loss": 0.6026, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5211267605633803, |
| "grad_norm": 2.3845276832580566, |
| "learning_rate": 9.840811514988294e-06, |
| "loss": 0.6102, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5231388329979879, |
| "grad_norm": 2.46744966506958, |
| "learning_rate": 9.83786604181991e-06, |
| "loss": 0.6107, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5251509054325956, |
| "grad_norm": 2.3754844665527344, |
| "learning_rate": 9.834894016737705e-06, |
| "loss": 0.5667, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5271629778672032, |
| "grad_norm": 2.2515339851379395, |
| "learning_rate": 9.831895456053197e-06, |
| "loss": 0.6242, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5291750503018109, |
| "grad_norm": 2.5688209533691406, |
| "learning_rate": 9.828870376223546e-06, |
| "loss": 0.5932, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5311871227364185, |
| "grad_norm": 2.4301135540008545, |
| "learning_rate": 9.825818793851456e-06, |
| "loss": 0.616, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5331991951710262, |
| "grad_norm": 2.3733065128326416, |
| "learning_rate": 9.822740725685087e-06, |
| "loss": 0.5439, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5352112676056338, |
| "grad_norm": 2.2880313396453857, |
| "learning_rate": 9.819636188617961e-06, |
| "loss": 0.592, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5372233400402414, |
| "grad_norm": 2.3435163497924805, |
| "learning_rate": 9.81650519968887e-06, |
| "loss": 0.5901, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5392354124748491, |
| "grad_norm": 2.272224187850952, |
| "learning_rate": 9.81334777608179e-06, |
| "loss": 0.5828, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5412474849094567, |
| "grad_norm": 2.3393688201904297, |
| "learning_rate": 9.810163935125768e-06, |
| "loss": 0.6356, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5432595573440644, |
| "grad_norm": 2.552076578140259, |
| "learning_rate": 9.806953694294849e-06, |
| "loss": 0.5929, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.545271629778672, |
| "grad_norm": 2.2416205406188965, |
| "learning_rate": 9.803717071207965e-06, |
| "loss": 0.611, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.5472837022132797, |
| "grad_norm": 2.4686906337738037, |
| "learning_rate": 9.800454083628845e-06, |
| "loss": 0.6189, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5492957746478874, |
| "grad_norm": 2.3410513401031494, |
| "learning_rate": 9.797164749465915e-06, |
| "loss": 0.6166, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5513078470824949, |
| "grad_norm": 2.240062713623047, |
| "learning_rate": 9.793849086772198e-06, |
| "loss": 0.6308, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.5533199195171026, |
| "grad_norm": 1.9945600032806396, |
| "learning_rate": 9.790507113745222e-06, |
| "loss": 0.5439, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5553319919517102, |
| "grad_norm": 2.338785409927368, |
| "learning_rate": 9.787138848726912e-06, |
| "loss": 0.5622, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5573440643863179, |
| "grad_norm": 2.2766361236572266, |
| "learning_rate": 9.783744310203492e-06, |
| "loss": 0.6072, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.5593561368209256, |
| "grad_norm": 2.300431489944458, |
| "learning_rate": 9.780323516805386e-06, |
| "loss": 0.5599, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5613682092555332, |
| "grad_norm": 2.407275676727295, |
| "learning_rate": 9.776876487307115e-06, |
| "loss": 0.5904, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5633802816901409, |
| "grad_norm": 2.198110342025757, |
| "learning_rate": 9.77340324062719e-06, |
| "loss": 0.5778, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5653923541247485, |
| "grad_norm": 2.2225024700164795, |
| "learning_rate": 9.769903795828016e-06, |
| "loss": 0.5909, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.5674044265593562, |
| "grad_norm": 2.542376756668091, |
| "learning_rate": 9.766378172115775e-06, |
| "loss": 0.5907, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5694164989939637, |
| "grad_norm": 2.7032480239868164, |
| "learning_rate": 9.76282638884034e-06, |
| "loss": 0.6323, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 2.1654727458953857, |
| "learning_rate": 9.75924846549514e-06, |
| "loss": 0.582, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.5734406438631791, |
| "grad_norm": 2.6309971809387207, |
| "learning_rate": 9.755644421717083e-06, |
| "loss": 0.5845, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5754527162977867, |
| "grad_norm": 2.3880655765533447, |
| "learning_rate": 9.752014277286433e-06, |
| "loss": 0.5996, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.5774647887323944, |
| "grad_norm": 2.295335054397583, |
| "learning_rate": 9.7483580521267e-06, |
| "loss": 0.5756, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.579476861167002, |
| "grad_norm": 2.394613742828369, |
| "learning_rate": 9.744675766304538e-06, |
| "loss": 0.6449, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5814889336016097, |
| "grad_norm": 2.123344898223877, |
| "learning_rate": 9.740967440029628e-06, |
| "loss": 0.5853, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.5835010060362174, |
| "grad_norm": 2.532409191131592, |
| "learning_rate": 9.737233093654572e-06, |
| "loss": 0.6253, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5855130784708249, |
| "grad_norm": 2.29142689704895, |
| "learning_rate": 9.733472747674779e-06, |
| "loss": 0.6021, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.5875251509054326, |
| "grad_norm": 2.3614251613616943, |
| "learning_rate": 9.729686422728353e-06, |
| "loss": 0.5971, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.5895372233400402, |
| "grad_norm": 2.237323522567749, |
| "learning_rate": 9.725874139595978e-06, |
| "loss": 0.5917, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.5915492957746479, |
| "grad_norm": 2.2730329036712646, |
| "learning_rate": 9.722035919200812e-06, |
| "loss": 0.6119, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5935613682092555, |
| "grad_norm": 1.9858745336532593, |
| "learning_rate": 9.718171782608355e-06, |
| "loss": 0.5934, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5955734406438632, |
| "grad_norm": 2.39754581451416, |
| "learning_rate": 9.714281751026356e-06, |
| "loss": 0.5964, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5975855130784709, |
| "grad_norm": 2.244943857192993, |
| "learning_rate": 9.710365845804675e-06, |
| "loss": 0.5375, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5995975855130785, |
| "grad_norm": 2.48502254486084, |
| "learning_rate": 9.706424088435183e-06, |
| "loss": 0.6355, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6016096579476862, |
| "grad_norm": 2.49822735786438, |
| "learning_rate": 9.702456500551632e-06, |
| "loss": 0.5974, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6036217303822937, |
| "grad_norm": 2.8203203678131104, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 0.5476, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6056338028169014, |
| "grad_norm": 2.2995455265045166, |
| "learning_rate": 9.694443920486083e-06, |
| "loss": 0.5746, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.607645875251509, |
| "grad_norm": 2.167100429534912, |
| "learning_rate": 9.690398972279949e-06, |
| "loss": 0.5653, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6096579476861167, |
| "grad_norm": 2.3564436435699463, |
| "learning_rate": 9.686328281511241e-06, |
| "loss": 0.5767, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6116700201207244, |
| "grad_norm": 2.2094357013702393, |
| "learning_rate": 9.682231870521347e-06, |
| "loss": 0.6548, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.613682092555332, |
| "grad_norm": 2.4704389572143555, |
| "learning_rate": 9.67810976179281e-06, |
| "loss": 0.5766, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6156941649899397, |
| "grad_norm": 2.2543351650238037, |
| "learning_rate": 9.673961977949219e-06, |
| "loss": 0.6256, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6177062374245473, |
| "grad_norm": 2.216660737991333, |
| "learning_rate": 9.669788541755072e-06, |
| "loss": 0.5912, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.6197183098591549, |
| "grad_norm": 2.1589713096618652, |
| "learning_rate": 9.665589476115657e-06, |
| "loss": 0.5898, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6217303822937625, |
| "grad_norm": 2.309406042098999, |
| "learning_rate": 9.661364804076927e-06, |
| "loss": 0.6137, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.6237424547283702, |
| "grad_norm": 2.363293409347534, |
| "learning_rate": 9.657114548825372e-06, |
| "loss": 0.6052, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6257545271629779, |
| "grad_norm": 2.509986162185669, |
| "learning_rate": 9.652838733687888e-06, |
| "loss": 0.5869, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.6277665995975855, |
| "grad_norm": 2.23989200592041, |
| "learning_rate": 9.648537382131659e-06, |
| "loss": 0.5552, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6297786720321932, |
| "grad_norm": 2.3576748371124268, |
| "learning_rate": 9.644210517764014e-06, |
| "loss": 0.5931, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.6317907444668008, |
| "grad_norm": 2.3847086429595947, |
| "learning_rate": 9.639858164332314e-06, |
| "loss": 0.5895, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.6338028169014085, |
| "grad_norm": 2.3280272483825684, |
| "learning_rate": 9.635480345723805e-06, |
| "loss": 0.566, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.635814889336016, |
| "grad_norm": 2.1712679862976074, |
| "learning_rate": 9.631077085965501e-06, |
| "loss": 0.6073, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6378269617706237, |
| "grad_norm": 2.112177610397339, |
| "learning_rate": 9.626648409224041e-06, |
| "loss": 0.5855, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.6398390342052314, |
| "grad_norm": 2.2860617637634277, |
| "learning_rate": 9.622194339805565e-06, |
| "loss": 0.6272, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.641851106639839, |
| "grad_norm": 2.424269437789917, |
| "learning_rate": 9.617714902155576e-06, |
| "loss": 0.6146, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.6438631790744467, |
| "grad_norm": 2.1368589401245117, |
| "learning_rate": 9.613210120858805e-06, |
| "loss": 0.6157, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6458752515090543, |
| "grad_norm": 2.490374803543091, |
| "learning_rate": 9.608680020639081e-06, |
| "loss": 0.6139, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.647887323943662, |
| "grad_norm": 2.3180062770843506, |
| "learning_rate": 9.60412462635919e-06, |
| "loss": 0.6013, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.6498993963782697, |
| "grad_norm": 2.4402894973754883, |
| "learning_rate": 9.599543963020741e-06, |
| "loss": 0.6116, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.6519114688128773, |
| "grad_norm": 2.1556742191314697, |
| "learning_rate": 9.594938055764029e-06, |
| "loss": 0.5712, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.6539235412474849, |
| "grad_norm": 2.4181032180786133, |
| "learning_rate": 9.590306929867896e-06, |
| "loss": 0.6334, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6559356136820925, |
| "grad_norm": 2.138808250427246, |
| "learning_rate": 9.585650610749593e-06, |
| "loss": 0.6156, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6579476861167002, |
| "grad_norm": 2.266510248184204, |
| "learning_rate": 9.580969123964641e-06, |
| "loss": 0.5878, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.6599597585513078, |
| "grad_norm": 2.302675485610962, |
| "learning_rate": 9.576262495206689e-06, |
| "loss": 0.5439, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.6619718309859155, |
| "grad_norm": 2.0406110286712646, |
| "learning_rate": 9.571530750307374e-06, |
| "loss": 0.5612, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.6639839034205232, |
| "grad_norm": 2.294686794281006, |
| "learning_rate": 9.56677391523618e-06, |
| "loss": 0.6101, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6659959758551308, |
| "grad_norm": 2.3122353553771973, |
| "learning_rate": 9.561992016100293e-06, |
| "loss": 0.5615, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.6680080482897385, |
| "grad_norm": 2.389636754989624, |
| "learning_rate": 9.557185079144463e-06, |
| "loss": 0.5509, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.670020120724346, |
| "grad_norm": 2.0459258556365967, |
| "learning_rate": 9.552353130750852e-06, |
| "loss": 0.5769, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.6720321931589537, |
| "grad_norm": 2.269744396209717, |
| "learning_rate": 9.547496197438896e-06, |
| "loss": 0.6115, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.6740442655935613, |
| "grad_norm": 2.3433408737182617, |
| "learning_rate": 9.542614305865158e-06, |
| "loss": 0.5611, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.676056338028169, |
| "grad_norm": 2.231168270111084, |
| "learning_rate": 9.53770748282318e-06, |
| "loss": 0.5728, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6780684104627767, |
| "grad_norm": 2.2339141368865967, |
| "learning_rate": 9.532775755243334e-06, |
| "loss": 0.5661, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.6800804828973843, |
| "grad_norm": 2.384350061416626, |
| "learning_rate": 9.527819150192681e-06, |
| "loss": 0.65, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.682092555331992, |
| "grad_norm": 2.395918369293213, |
| "learning_rate": 9.522837694874814e-06, |
| "loss": 0.6252, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.6841046277665996, |
| "grad_norm": 2.5676770210266113, |
| "learning_rate": 9.517831416629717e-06, |
| "loss": 0.5988, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6861167002012073, |
| "grad_norm": 2.204547166824341, |
| "learning_rate": 9.512800342933608e-06, |
| "loss": 0.5708, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.6881287726358148, |
| "grad_norm": 2.5879533290863037, |
| "learning_rate": 9.507744501398794e-06, |
| "loss": 0.6058, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.6901408450704225, |
| "grad_norm": 2.14680814743042, |
| "learning_rate": 9.502663919773516e-06, |
| "loss": 0.5931, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.6921529175050302, |
| "grad_norm": 2.6863231658935547, |
| "learning_rate": 9.497558625941794e-06, |
| "loss": 0.6241, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.6941649899396378, |
| "grad_norm": 2.3913419246673584, |
| "learning_rate": 9.492428647923281e-06, |
| "loss": 0.591, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6961770623742455, |
| "grad_norm": 2.2461934089660645, |
| "learning_rate": 9.487274013873104e-06, |
| "loss": 0.5122, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.6981891348088531, |
| "grad_norm": 2.091630697250366, |
| "learning_rate": 9.482094752081711e-06, |
| "loss": 0.5787, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7002012072434608, |
| "grad_norm": 2.184694290161133, |
| "learning_rate": 9.47689089097472e-06, |
| "loss": 0.5839, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7022132796780685, |
| "grad_norm": 2.175163984298706, |
| "learning_rate": 9.471662459112747e-06, |
| "loss": 0.5782, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.704225352112676, |
| "grad_norm": 2.343888521194458, |
| "learning_rate": 9.466409485191275e-06, |
| "loss": 0.5909, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7062374245472837, |
| "grad_norm": 2.230376958847046, |
| "learning_rate": 9.461131998040473e-06, |
| "loss": 0.5791, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.7082494969818913, |
| "grad_norm": 2.165900707244873, |
| "learning_rate": 9.455830026625053e-06, |
| "loss": 0.5488, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.710261569416499, |
| "grad_norm": 2.151834487915039, |
| "learning_rate": 9.450503600044102e-06, |
| "loss": 0.5556, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.7122736418511066, |
| "grad_norm": 2.06109619140625, |
| "learning_rate": 9.445152747530922e-06, |
| "loss": 0.5415, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 2.3980467319488525, |
| "learning_rate": 9.439777498452883e-06, |
| "loss": 0.5612, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.716297786720322, |
| "grad_norm": 2.2379043102264404, |
| "learning_rate": 9.434377882311244e-06, |
| "loss": 0.6302, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.7183098591549296, |
| "grad_norm": 2.3387439250946045, |
| "learning_rate": 9.428953928741002e-06, |
| "loss": 0.6122, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.7203219315895373, |
| "grad_norm": 2.465433359146118, |
| "learning_rate": 9.423505667510724e-06, |
| "loss": 0.5993, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.7223340040241448, |
| "grad_norm": 2.0558369159698486, |
| "learning_rate": 9.41803312852239e-06, |
| "loss": 0.5871, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.7243460764587525, |
| "grad_norm": 2.2847893238067627, |
| "learning_rate": 9.41253634181122e-06, |
| "loss": 0.5773, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7263581488933601, |
| "grad_norm": 2.137911319732666, |
| "learning_rate": 9.40701533754552e-06, |
| "loss": 0.6051, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.7283702213279678, |
| "grad_norm": 1.9390573501586914, |
| "learning_rate": 9.401470146026504e-06, |
| "loss": 0.5161, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.7303822937625755, |
| "grad_norm": 2.056952714920044, |
| "learning_rate": 9.39590079768814e-06, |
| "loss": 0.5652, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.7323943661971831, |
| "grad_norm": 2.426093339920044, |
| "learning_rate": 9.390307323096972e-06, |
| "loss": 0.5756, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.7344064386317908, |
| "grad_norm": 2.0200657844543457, |
| "learning_rate": 9.384689752951961e-06, |
| "loss": 0.5601, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.7364185110663984, |
| "grad_norm": 1.9603573083877563, |
| "learning_rate": 9.379048118084312e-06, |
| "loss": 0.5586, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.738430583501006, |
| "grad_norm": 2.151219129562378, |
| "learning_rate": 9.373382449457305e-06, |
| "loss": 0.5368, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.7404426559356136, |
| "grad_norm": 2.253244161605835, |
| "learning_rate": 9.367692778166126e-06, |
| "loss": 0.563, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.7424547283702213, |
| "grad_norm": 2.4091432094573975, |
| "learning_rate": 9.361979135437697e-06, |
| "loss": 0.5909, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.744466800804829, |
| "grad_norm": 2.0590202808380127, |
| "learning_rate": 9.356241552630503e-06, |
| "loss": 0.5424, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7464788732394366, |
| "grad_norm": 2.2783074378967285, |
| "learning_rate": 9.350480061234419e-06, |
| "loss": 0.6102, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.7484909456740443, |
| "grad_norm": 2.28292179107666, |
| "learning_rate": 9.344694692870541e-06, |
| "loss": 0.5819, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7505030181086519, |
| "grad_norm": 2.3239924907684326, |
| "learning_rate": 9.338885479291012e-06, |
| "loss": 0.5518, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.7525150905432596, |
| "grad_norm": 2.2188453674316406, |
| "learning_rate": 9.333052452378838e-06, |
| "loss": 0.5808, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.7545271629778671, |
| "grad_norm": 2.173330783843994, |
| "learning_rate": 9.32719564414773e-06, |
| "loss": 0.6589, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.7565392354124748, |
| "grad_norm": 2.2564475536346436, |
| "learning_rate": 9.321315086741916e-06, |
| "loss": 0.5818, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.7585513078470825, |
| "grad_norm": 2.1442489624023438, |
| "learning_rate": 9.315410812435967e-06, |
| "loss": 0.6017, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.7605633802816901, |
| "grad_norm": 2.2043070793151855, |
| "learning_rate": 9.30948285363462e-06, |
| "loss": 0.5714, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.7625754527162978, |
| "grad_norm": 2.286181688308716, |
| "learning_rate": 9.303531242872606e-06, |
| "loss": 0.577, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.7645875251509054, |
| "grad_norm": 2.649578094482422, |
| "learning_rate": 9.297556012814457e-06, |
| "loss": 0.6219, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7665995975855131, |
| "grad_norm": 2.3334577083587646, |
| "learning_rate": 9.291557196254342e-06, |
| "loss": 0.627, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.7686116700201208, |
| "grad_norm": 2.107356548309326, |
| "learning_rate": 9.285534826115884e-06, |
| "loss": 0.5891, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.7706237424547284, |
| "grad_norm": 2.133880138397217, |
| "learning_rate": 9.279488935451971e-06, |
| "loss": 0.5658, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.772635814889336, |
| "grad_norm": 2.4783966541290283, |
| "learning_rate": 9.27341955744458e-06, |
| "loss": 0.5775, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.7746478873239436, |
| "grad_norm": 2.5501651763916016, |
| "learning_rate": 9.2673267254046e-06, |
| "loss": 0.5742, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.7766599597585513, |
| "grad_norm": 2.2829442024230957, |
| "learning_rate": 9.261210472771637e-06, |
| "loss": 0.5579, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.778672032193159, |
| "grad_norm": 2.3803324699401855, |
| "learning_rate": 9.255070833113845e-06, |
| "loss": 0.6267, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.7806841046277666, |
| "grad_norm": 2.5065324306488037, |
| "learning_rate": 9.248907840127726e-06, |
| "loss": 0.5967, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.7826961770623743, |
| "grad_norm": 2.320683240890503, |
| "learning_rate": 9.24272152763796e-06, |
| "loss": 0.5925, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.7847082494969819, |
| "grad_norm": 2.3530187606811523, |
| "learning_rate": 9.236511929597206e-06, |
| "loss": 0.6105, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7867203219315896, |
| "grad_norm": 2.618340253829956, |
| "learning_rate": 9.230279080085933e-06, |
| "loss": 0.5969, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.7887323943661971, |
| "grad_norm": 2.411909580230713, |
| "learning_rate": 9.224023013312212e-06, |
| "loss": 0.5556, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.7907444668008048, |
| "grad_norm": 2.401766061782837, |
| "learning_rate": 9.217743763611545e-06, |
| "loss": 0.5826, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.7927565392354124, |
| "grad_norm": 2.151867151260376, |
| "learning_rate": 9.211441365446661e-06, |
| "loss": 0.598, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.7947686116700201, |
| "grad_norm": 2.0797793865203857, |
| "learning_rate": 9.20511585340735e-06, |
| "loss": 0.578, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.7967806841046278, |
| "grad_norm": 2.3202261924743652, |
| "learning_rate": 9.198767262210244e-06, |
| "loss": 0.5966, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.7987927565392354, |
| "grad_norm": 2.244210720062256, |
| "learning_rate": 9.192395626698656e-06, |
| "loss": 0.5522, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.8008048289738431, |
| "grad_norm": 2.1149046421051025, |
| "learning_rate": 9.186000981842362e-06, |
| "loss": 0.5579, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.8028169014084507, |
| "grad_norm": 2.2352654933929443, |
| "learning_rate": 9.17958336273743e-06, |
| "loss": 0.5321, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.8048289738430584, |
| "grad_norm": 2.6794004440307617, |
| "learning_rate": 9.173142804606012e-06, |
| "loss": 0.5584, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.806841046277666, |
| "grad_norm": 1.9585459232330322, |
| "learning_rate": 9.166679342796162e-06, |
| "loss": 0.5313, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.8088531187122736, |
| "grad_norm": 2.3576083183288574, |
| "learning_rate": 9.160193012781639e-06, |
| "loss": 0.617, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.8108651911468813, |
| "grad_norm": 2.1958634853363037, |
| "learning_rate": 9.153683850161706e-06, |
| "loss": 0.6003, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.8128772635814889, |
| "grad_norm": 2.409407615661621, |
| "learning_rate": 9.147151890660942e-06, |
| "loss": 0.5722, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.8148893360160966, |
| "grad_norm": 2.3817203044891357, |
| "learning_rate": 9.140597170129041e-06, |
| "loss": 0.6051, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.8169014084507042, |
| "grad_norm": 2.1336405277252197, |
| "learning_rate": 9.13401972454062e-06, |
| "loss": 0.5703, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.8189134808853119, |
| "grad_norm": 2.2689437866210938, |
| "learning_rate": 9.12741958999502e-06, |
| "loss": 0.5767, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.8209255533199196, |
| "grad_norm": 2.151379346847534, |
| "learning_rate": 9.120796802716104e-06, |
| "loss": 0.5539, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.8229376257545271, |
| "grad_norm": 1.9756191968917847, |
| "learning_rate": 9.114151399052064e-06, |
| "loss": 0.5481, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.8249496981891348, |
| "grad_norm": 2.424356698989868, |
| "learning_rate": 9.107483415475216e-06, |
| "loss": 0.6311, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8269617706237424, |
| "grad_norm": 2.0553550720214844, |
| "learning_rate": 9.100792888581803e-06, |
| "loss": 0.5733, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.8289738430583501, |
| "grad_norm": 2.2919304370880127, |
| "learning_rate": 9.094079855091797e-06, |
| "loss": 0.5902, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.8309859154929577, |
| "grad_norm": 2.2795591354370117, |
| "learning_rate": 9.08734435184869e-06, |
| "loss": 0.5339, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.8329979879275654, |
| "grad_norm": 2.2266199588775635, |
| "learning_rate": 9.080586415819296e-06, |
| "loss": 0.5724, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.8350100603621731, |
| "grad_norm": 2.197139263153076, |
| "learning_rate": 9.073806084093556e-06, |
| "loss": 0.5668, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.8370221327967807, |
| "grad_norm": 2.386579751968384, |
| "learning_rate": 9.067003393884313e-06, |
| "loss": 0.6091, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.8390342052313883, |
| "grad_norm": 2.1007778644561768, |
| "learning_rate": 9.06017838252713e-06, |
| "loss": 0.5447, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.8410462776659959, |
| "grad_norm": 2.3940844535827637, |
| "learning_rate": 9.053331087480075e-06, |
| "loss": 0.613, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.8430583501006036, |
| "grad_norm": 2.0589396953582764, |
| "learning_rate": 9.046461546323519e-06, |
| "loss": 0.523, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.8450704225352113, |
| "grad_norm": 2.245084047317505, |
| "learning_rate": 9.039569796759921e-06, |
| "loss": 0.5571, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8470824949698189, |
| "grad_norm": 2.283914804458618, |
| "learning_rate": 9.032655876613636e-06, |
| "loss": 0.5937, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.8490945674044266, |
| "grad_norm": 2.2793750762939453, |
| "learning_rate": 9.02571982383069e-06, |
| "loss": 0.5811, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.8511066398390342, |
| "grad_norm": 2.318835735321045, |
| "learning_rate": 9.018761676478585e-06, |
| "loss": 0.5851, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.8531187122736419, |
| "grad_norm": 2.1775121688842773, |
| "learning_rate": 9.01178147274609e-06, |
| "loss": 0.5939, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.8551307847082495, |
| "grad_norm": 2.144890308380127, |
| "learning_rate": 9.00477925094302e-06, |
| "loss": 0.5606, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 2.165470838546753, |
| "learning_rate": 8.997755049500037e-06, |
| "loss": 0.6005, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.8591549295774648, |
| "grad_norm": 2.3857879638671875, |
| "learning_rate": 8.990708906968431e-06, |
| "loss": 0.6083, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.8611670020120724, |
| "grad_norm": 2.0287764072418213, |
| "learning_rate": 8.98364086201992e-06, |
| "loss": 0.5549, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.8631790744466801, |
| "grad_norm": 2.002955436706543, |
| "learning_rate": 8.976550953446426e-06, |
| "loss": 0.5845, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.8651911468812877, |
| "grad_norm": 2.124072551727295, |
| "learning_rate": 8.969439220159861e-06, |
| "loss": 0.5631, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8672032193158954, |
| "grad_norm": 2.3265137672424316, |
| "learning_rate": 8.962305701191927e-06, |
| "loss": 0.5627, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.869215291750503, |
| "grad_norm": 2.0870211124420166, |
| "learning_rate": 8.955150435693889e-06, |
| "loss": 0.5217, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.8712273641851107, |
| "grad_norm": 2.349735975265503, |
| "learning_rate": 8.947973462936366e-06, |
| "loss": 0.5817, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.8732394366197183, |
| "grad_norm": 2.4168457984924316, |
| "learning_rate": 8.940774822309116e-06, |
| "loss": 0.5642, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.8752515090543259, |
| "grad_norm": 2.447883367538452, |
| "learning_rate": 8.933554553320813e-06, |
| "loss": 0.588, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.8772635814889336, |
| "grad_norm": 2.1601593494415283, |
| "learning_rate": 8.926312695598837e-06, |
| "loss": 0.6093, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.8792756539235412, |
| "grad_norm": 2.3886845111846924, |
| "learning_rate": 8.919049288889058e-06, |
| "loss": 0.5617, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.8812877263581489, |
| "grad_norm": 2.295163154602051, |
| "learning_rate": 8.911764373055612e-06, |
| "loss": 0.5183, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.8832997987927566, |
| "grad_norm": 2.1284451484680176, |
| "learning_rate": 8.904457988080682e-06, |
| "loss": 0.5466, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.8853118712273642, |
| "grad_norm": 2.3324074745178223, |
| "learning_rate": 8.897130174064285e-06, |
| "loss": 0.5525, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8873239436619719, |
| "grad_norm": 2.279731035232544, |
| "learning_rate": 8.889780971224047e-06, |
| "loss": 0.6048, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.8893360160965795, |
| "grad_norm": 2.274237632751465, |
| "learning_rate": 8.882410419894983e-06, |
| "loss": 0.5566, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.8913480885311871, |
| "grad_norm": 2.2570290565490723, |
| "learning_rate": 8.875018560529275e-06, |
| "loss": 0.5492, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.8933601609657947, |
| "grad_norm": 2.310661554336548, |
| "learning_rate": 8.867605433696056e-06, |
| "loss": 0.5782, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.8953722334004024, |
| "grad_norm": 2.328352212905884, |
| "learning_rate": 8.860171080081174e-06, |
| "loss": 0.6308, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.89738430583501, |
| "grad_norm": 2.168409585952759, |
| "learning_rate": 8.852715540486986e-06, |
| "loss": 0.5418, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.8993963782696177, |
| "grad_norm": 2.200997829437256, |
| "learning_rate": 8.845238855832117e-06, |
| "loss": 0.6063, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.9014084507042254, |
| "grad_norm": 2.295320987701416, |
| "learning_rate": 8.837741067151251e-06, |
| "loss": 0.5874, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.903420523138833, |
| "grad_norm": 2.168964385986328, |
| "learning_rate": 8.83022221559489e-06, |
| "loss": 0.5664, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.9054325955734407, |
| "grad_norm": 2.1303305625915527, |
| "learning_rate": 8.822682342429147e-06, |
| "loss": 0.5336, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9074446680080482, |
| "grad_norm": 2.489168882369995, |
| "learning_rate": 8.8151214890355e-06, |
| "loss": 0.6388, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.9094567404426559, |
| "grad_norm": 2.106583595275879, |
| "learning_rate": 8.807539696910574e-06, |
| "loss": 0.5871, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.9114688128772636, |
| "grad_norm": 2.0476789474487305, |
| "learning_rate": 8.79993700766592e-06, |
| "loss": 0.5506, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.9134808853118712, |
| "grad_norm": 2.1992383003234863, |
| "learning_rate": 8.792313463027777e-06, |
| "loss": 0.5737, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.9154929577464789, |
| "grad_norm": 2.2196712493896484, |
| "learning_rate": 8.784669104836842e-06, |
| "loss": 0.5607, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.9175050301810865, |
| "grad_norm": 2.1786701679229736, |
| "learning_rate": 8.777003975048048e-06, |
| "loss": 0.5975, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.9195171026156942, |
| "grad_norm": 2.178668260574341, |
| "learning_rate": 8.76931811573033e-06, |
| "loss": 0.5721, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.9215291750503019, |
| "grad_norm": 2.1133759021759033, |
| "learning_rate": 8.761611569066388e-06, |
| "loss": 0.5687, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.9235412474849095, |
| "grad_norm": 2.18926739692688, |
| "learning_rate": 8.753884377352472e-06, |
| "loss": 0.5927, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.9255533199195171, |
| "grad_norm": 2.5471627712249756, |
| "learning_rate": 8.74613658299813e-06, |
| "loss": 0.5743, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9275653923541247, |
| "grad_norm": 2.43631649017334, |
| "learning_rate": 8.738368228525988e-06, |
| "loss": 0.6036, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.9295774647887324, |
| "grad_norm": 2.3415586948394775, |
| "learning_rate": 8.730579356571514e-06, |
| "loss": 0.5686, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.93158953722334, |
| "grad_norm": 2.2325901985168457, |
| "learning_rate": 8.72277000988278e-06, |
| "loss": 0.5742, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.9336016096579477, |
| "grad_norm": 2.3566064834594727, |
| "learning_rate": 8.714940231320237e-06, |
| "loss": 0.6196, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.9356136820925554, |
| "grad_norm": 2.469269275665283, |
| "learning_rate": 8.707090063856466e-06, |
| "loss": 0.5786, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.937625754527163, |
| "grad_norm": 2.319122552871704, |
| "learning_rate": 8.699219550575954e-06, |
| "loss": 0.5886, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.9396378269617707, |
| "grad_norm": 2.2840166091918945, |
| "learning_rate": 8.691328734674851e-06, |
| "loss": 0.5377, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.9416498993963782, |
| "grad_norm": 2.2281320095062256, |
| "learning_rate": 8.683417659460735e-06, |
| "loss": 0.5468, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.9436619718309859, |
| "grad_norm": 2.3349030017852783, |
| "learning_rate": 8.675486368352376e-06, |
| "loss": 0.6274, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.9456740442655935, |
| "grad_norm": 1.9517689943313599, |
| "learning_rate": 8.667534904879495e-06, |
| "loss": 0.536, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.9476861167002012, |
| "grad_norm": 2.1241378784179688, |
| "learning_rate": 8.659563312682524e-06, |
| "loss": 0.5384, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.9496981891348089, |
| "grad_norm": 2.210144281387329, |
| "learning_rate": 8.651571635512372e-06, |
| "loss": 0.5456, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.9517102615694165, |
| "grad_norm": 2.253452777862549, |
| "learning_rate": 8.64355991723018e-06, |
| "loss": 0.5862, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.9537223340040242, |
| "grad_norm": 2.534611225128174, |
| "learning_rate": 8.635528201807079e-06, |
| "loss": 0.6127, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.9557344064386318, |
| "grad_norm": 2.063807725906372, |
| "learning_rate": 8.627476533323957e-06, |
| "loss": 0.5489, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.9577464788732394, |
| "grad_norm": 2.166027784347534, |
| "learning_rate": 8.619404955971208e-06, |
| "loss": 0.5602, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.959758551307847, |
| "grad_norm": 2.0670714378356934, |
| "learning_rate": 8.61131351404849e-06, |
| "loss": 0.561, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.9617706237424547, |
| "grad_norm": 2.023287057876587, |
| "learning_rate": 8.603202251964492e-06, |
| "loss": 0.5245, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.9637826961770624, |
| "grad_norm": 2.208113431930542, |
| "learning_rate": 8.595071214236675e-06, |
| "loss": 0.5625, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.96579476861167, |
| "grad_norm": 2.1182444095611572, |
| "learning_rate": 8.586920445491043e-06, |
| "loss": 0.5861, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.9678068410462777, |
| "grad_norm": 2.0620083808898926, |
| "learning_rate": 8.578749990461884e-06, |
| "loss": 0.5696, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.9698189134808853, |
| "grad_norm": 2.276942014694214, |
| "learning_rate": 8.570559893991537e-06, |
| "loss": 0.5385, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.971830985915493, |
| "grad_norm": 2.8296422958374023, |
| "learning_rate": 8.562350201030139e-06, |
| "loss": 0.5484, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.9738430583501007, |
| "grad_norm": 2.4141933917999268, |
| "learning_rate": 8.554120956635375e-06, |
| "loss": 0.5563, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.9758551307847082, |
| "grad_norm": 2.302938938140869, |
| "learning_rate": 8.54587220597224e-06, |
| "loss": 0.6235, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.9778672032193159, |
| "grad_norm": 2.2329790592193604, |
| "learning_rate": 8.537603994312786e-06, |
| "loss": 0.5669, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.9798792756539235, |
| "grad_norm": 2.3101489543914795, |
| "learning_rate": 8.52931636703587e-06, |
| "loss": 0.5698, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.9818913480885312, |
| "grad_norm": 2.620720386505127, |
| "learning_rate": 8.521009369626914e-06, |
| "loss": 0.5333, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.9839034205231388, |
| "grad_norm": 2.2651710510253906, |
| "learning_rate": 8.512683047677644e-06, |
| "loss": 0.5524, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.9859154929577465, |
| "grad_norm": 2.12550687789917, |
| "learning_rate": 8.504337446885854e-06, |
| "loss": 0.5665, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.9879275653923542, |
| "grad_norm": 2.010809898376465, |
| "learning_rate": 8.495972613055137e-06, |
| "loss": 0.5295, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.9899396378269618, |
| "grad_norm": 2.183659553527832, |
| "learning_rate": 8.487588592094652e-06, |
| "loss": 0.5685, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.9919517102615694, |
| "grad_norm": 2.26654314994812, |
| "learning_rate": 8.47918543001886e-06, |
| "loss": 0.5872, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.993963782696177, |
| "grad_norm": 2.483935832977295, |
| "learning_rate": 8.470763172947276e-06, |
| "loss": 0.5938, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.9959758551307847, |
| "grad_norm": 2.136721611022949, |
| "learning_rate": 8.462321867104217e-06, |
| "loss": 0.5819, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.9979879275653923, |
| "grad_norm": 2.082050323486328, |
| "learning_rate": 8.453861558818542e-06, |
| "loss": 0.5132, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.1009018421173096, |
| "learning_rate": 8.445382294523406e-06, |
| "loss": 0.5328, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.0020120724346075, |
| "grad_norm": 2.176445722579956, |
| "learning_rate": 8.436884120755997e-06, |
| "loss": 0.4829, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.0040241448692153, |
| "grad_norm": 2.335286855697632, |
| "learning_rate": 8.428367084157292e-06, |
| "loss": 0.4892, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.0060362173038229, |
| "grad_norm": 2.0593619346618652, |
| "learning_rate": 8.419831231471785e-06, |
| "loss": 0.4445, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0080482897384306, |
| "grad_norm": 2.074493169784546, |
| "learning_rate": 8.411276609547246e-06, |
| "loss": 0.4707, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.0100603621730382, |
| "grad_norm": 1.8737417459487915, |
| "learning_rate": 8.402703265334455e-06, |
| "loss": 0.4441, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.012072434607646, |
| "grad_norm": 2.2214300632476807, |
| "learning_rate": 8.394111245886948e-06, |
| "loss": 0.4426, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.0140845070422535, |
| "grad_norm": 2.098071575164795, |
| "learning_rate": 8.385500598360752e-06, |
| "loss": 0.4542, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.0160965794768613, |
| "grad_norm": 2.084216356277466, |
| "learning_rate": 8.376871370014139e-06, |
| "loss": 0.4747, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.0181086519114688, |
| "grad_norm": 2.022986888885498, |
| "learning_rate": 8.368223608207351e-06, |
| "loss": 0.4475, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.0201207243460764, |
| "grad_norm": 2.2105493545532227, |
| "learning_rate": 8.359557360402357e-06, |
| "loss": 0.4508, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.0221327967806841, |
| "grad_norm": 2.076406240463257, |
| "learning_rate": 8.350872674162578e-06, |
| "loss": 0.4252, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.0241448692152917, |
| "grad_norm": 2.2363109588623047, |
| "learning_rate": 8.34216959715263e-06, |
| "loss": 0.4534, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.0261569416498995, |
| "grad_norm": 2.0087409019470215, |
| "learning_rate": 8.333448177138071e-06, |
| "loss": 0.4703, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.028169014084507, |
| "grad_norm": 2.0913915634155273, |
| "learning_rate": 8.324708461985124e-06, |
| "loss": 0.4365, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.0301810865191148, |
| "grad_norm": 2.1394097805023193, |
| "learning_rate": 8.315950499660427e-06, |
| "loss": 0.4716, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.0321931589537223, |
| "grad_norm": 2.2066264152526855, |
| "learning_rate": 8.307174338230765e-06, |
| "loss": 0.4548, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.0342052313883299, |
| "grad_norm": 2.1786088943481445, |
| "learning_rate": 8.298380025862805e-06, |
| "loss": 0.4606, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.0362173038229376, |
| "grad_norm": 2.3795361518859863, |
| "learning_rate": 8.28956761082283e-06, |
| "loss": 0.4754, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.0382293762575452, |
| "grad_norm": 2.1223013401031494, |
| "learning_rate": 8.280737141476482e-06, |
| "loss": 0.4541, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.040241448692153, |
| "grad_norm": 2.176300287246704, |
| "learning_rate": 8.271888666288488e-06, |
| "loss": 0.4617, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.0422535211267605, |
| "grad_norm": 1.9915233850479126, |
| "learning_rate": 8.263022233822397e-06, |
| "loss": 0.4617, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.0442655935613683, |
| "grad_norm": 2.0911736488342285, |
| "learning_rate": 8.254137892740318e-06, |
| "loss": 0.4702, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.0462776659959758, |
| "grad_norm": 2.0248148441314697, |
| "learning_rate": 8.245235691802644e-06, |
| "loss": 0.4635, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.0482897384305836, |
| "grad_norm": 2.0968503952026367, |
| "learning_rate": 8.23631567986779e-06, |
| "loss": 0.4747, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.0503018108651911, |
| "grad_norm": 2.070502281188965, |
| "learning_rate": 8.227377905891927e-06, |
| "loss": 0.4537, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.0523138832997987, |
| "grad_norm": 2.0351507663726807, |
| "learning_rate": 8.218422418928709e-06, |
| "loss": 0.4757, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.0543259557344065, |
| "grad_norm": 2.0005311965942383, |
| "learning_rate": 8.209449268129003e-06, |
| "loss": 0.4605, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.056338028169014, |
| "grad_norm": 2.127006769180298, |
| "learning_rate": 8.200458502740623e-06, |
| "loss": 0.4664, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.0583501006036218, |
| "grad_norm": 2.3169891834259033, |
| "learning_rate": 8.191450172108058e-06, |
| "loss": 0.4957, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.0603621730382293, |
| "grad_norm": 2.0168895721435547, |
| "learning_rate": 8.182424325672203e-06, |
| "loss": 0.49, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.062374245472837, |
| "grad_norm": 2.439521551132202, |
| "learning_rate": 8.173381012970084e-06, |
| "loss": 0.4864, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.0643863179074446, |
| "grad_norm": 2.3614089488983154, |
| "learning_rate": 8.164320283634585e-06, |
| "loss": 0.4545, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.0663983903420524, |
| "grad_norm": 2.2037999629974365, |
| "learning_rate": 8.155242187394184e-06, |
| "loss": 0.4369, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.06841046277666, |
| "grad_norm": 2.243170976638794, |
| "learning_rate": 8.146146774072674e-06, |
| "loss": 0.4901, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.0704225352112675, |
| "grad_norm": 2.1171209812164307, |
| "learning_rate": 8.137034093588885e-06, |
| "loss": 0.4677, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.0724346076458753, |
| "grad_norm": 2.155569314956665, |
| "learning_rate": 8.127904195956424e-06, |
| "loss": 0.43, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.0744466800804828, |
| "grad_norm": 2.3345916271209717, |
| "learning_rate": 8.118757131283383e-06, |
| "loss": 0.4634, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.0764587525150906, |
| "grad_norm": 2.1813671588897705, |
| "learning_rate": 8.109592949772076e-06, |
| "loss": 0.4629, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.0784708249496981, |
| "grad_norm": 2.235050916671753, |
| "learning_rate": 8.100411701718765e-06, |
| "loss": 0.5095, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.080482897384306, |
| "grad_norm": 2.0694808959960938, |
| "learning_rate": 8.091213437513371e-06, |
| "loss": 0.4165, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.0824949698189135, |
| "grad_norm": 2.163832426071167, |
| "learning_rate": 8.081998207639212e-06, |
| "loss": 0.4883, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.084507042253521, |
| "grad_norm": 2.0511605739593506, |
| "learning_rate": 8.072766062672717e-06, |
| "loss": 0.4735, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.0865191146881288, |
| "grad_norm": 2.378787040710449, |
| "learning_rate": 8.06351705328315e-06, |
| "loss": 0.4798, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.0885311871227363, |
| "grad_norm": 2.1771721839904785, |
| "learning_rate": 8.054251230232333e-06, |
| "loss": 0.461, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.090543259557344, |
| "grad_norm": 2.158625602722168, |
| "learning_rate": 8.044968644374373e-06, |
| "loss": 0.4469, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.0925553319919517, |
| "grad_norm": 2.060878038406372, |
| "learning_rate": 8.035669346655368e-06, |
| "loss": 0.4245, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.0945674044265594, |
| "grad_norm": 2.228379487991333, |
| "learning_rate": 8.026353388113142e-06, |
| "loss": 0.4839, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.096579476861167, |
| "grad_norm": 2.0745017528533936, |
| "learning_rate": 8.017020819876962e-06, |
| "loss": 0.4298, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.0985915492957747, |
| "grad_norm": 2.1419124603271484, |
| "learning_rate": 8.007671693167248e-06, |
| "loss": 0.4674, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.1006036217303823, |
| "grad_norm": 2.2269890308380127, |
| "learning_rate": 7.998306059295302e-06, |
| "loss": 0.4667, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.10261569416499, |
| "grad_norm": 2.07487416267395, |
| "learning_rate": 7.988923969663027e-06, |
| "loss": 0.4672, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.1046277665995976, |
| "grad_norm": 2.1029868125915527, |
| "learning_rate": 7.979525475762634e-06, |
| "loss": 0.4545, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.1066398390342052, |
| "grad_norm": 2.18890380859375, |
| "learning_rate": 7.97011062917637e-06, |
| "loss": 0.4577, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.108651911468813, |
| "grad_norm": 2.4978983402252197, |
| "learning_rate": 7.960679481576233e-06, |
| "loss": 0.4757, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.1106639839034205, |
| "grad_norm": 2.099303722381592, |
| "learning_rate": 7.951232084723685e-06, |
| "loss": 0.4564, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.1126760563380282, |
| "grad_norm": 2.0341546535491943, |
| "learning_rate": 7.941768490469368e-06, |
| "loss": 0.4653, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.1146881287726358, |
| "grad_norm": 2.0823800563812256, |
| "learning_rate": 7.932288750752819e-06, |
| "loss": 0.4906, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.1167002012072436, |
| "grad_norm": 2.0323636531829834, |
| "learning_rate": 7.922792917602197e-06, |
| "loss": 0.4837, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.118712273641851, |
| "grad_norm": 1.9934216737747192, |
| "learning_rate": 7.913281043133978e-06, |
| "loss": 0.4657, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.1207243460764587, |
| "grad_norm": 2.1831841468811035, |
| "learning_rate": 7.903753179552682e-06, |
| "loss": 0.4731, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.1227364185110664, |
| "grad_norm": 2.1525766849517822, |
| "learning_rate": 7.89420937915058e-06, |
| "loss": 0.4437, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.124748490945674, |
| "grad_norm": 2.4499902725219727, |
| "learning_rate": 7.884649694307413e-06, |
| "loss": 0.466, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.1267605633802817, |
| "grad_norm": 2.279303789138794, |
| "learning_rate": 7.875074177490103e-06, |
| "loss": 0.4554, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.1287726358148893, |
| "grad_norm": 2.1323020458221436, |
| "learning_rate": 7.86548288125246e-06, |
| "loss": 0.4735, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.130784708249497, |
| "grad_norm": 2.1366891860961914, |
| "learning_rate": 7.855875858234894e-06, |
| "loss": 0.4721, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.1327967806841046, |
| "grad_norm": 2.020785331726074, |
| "learning_rate": 7.846253161164138e-06, |
| "loss": 0.4888, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.1348088531187122, |
| "grad_norm": 2.11442494392395, |
| "learning_rate": 7.836614842852942e-06, |
| "loss": 0.4809, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.13682092555332, |
| "grad_norm": 2.2528202533721924, |
| "learning_rate": 7.826960956199796e-06, |
| "loss": 0.4726, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.1388329979879275, |
| "grad_norm": 2.0524942874908447, |
| "learning_rate": 7.817291554188628e-06, |
| "loss": 0.4295, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.1408450704225352, |
| "grad_norm": 2.4182207584381104, |
| "learning_rate": 7.80760668988853e-06, |
| "loss": 0.4897, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 2.186673879623413, |
| "learning_rate": 7.797906416453445e-06, |
| "loss": 0.4761, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.1448692152917506, |
| "grad_norm": 2.0501601696014404, |
| "learning_rate": 7.788190787121896e-06, |
| "loss": 0.4596, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.1468812877263581, |
| "grad_norm": 2.1505749225616455, |
| "learning_rate": 7.778459855216678e-06, |
| "loss": 0.4727, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.1488933601609659, |
| "grad_norm": 2.1751537322998047, |
| "learning_rate": 7.768713674144578e-06, |
| "loss": 0.4699, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.1509054325955734, |
| "grad_norm": 2.1614253520965576, |
| "learning_rate": 7.758952297396068e-06, |
| "loss": 0.4347, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.1529175050301812, |
| "grad_norm": 2.0868771076202393, |
| "learning_rate": 7.749175778545026e-06, |
| "loss": 0.4835, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.1549295774647887, |
| "grad_norm": 2.064375638961792, |
| "learning_rate": 7.739384171248436e-06, |
| "loss": 0.4815, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.1569416498993963, |
| "grad_norm": 2.0885112285614014, |
| "learning_rate": 7.729577529246084e-06, |
| "loss": 0.4406, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.158953722334004, |
| "grad_norm": 2.109877347946167, |
| "learning_rate": 7.719755906360282e-06, |
| "loss": 0.4575, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.1609657947686116, |
| "grad_norm": 2.359646797180176, |
| "learning_rate": 7.709919356495555e-06, |
| "loss": 0.4899, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.1629778672032194, |
| "grad_norm": 2.143345355987549, |
| "learning_rate": 7.700067933638357e-06, |
| "loss": 0.4668, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.164989939637827, |
| "grad_norm": 2.231412649154663, |
| "learning_rate": 7.690201691856768e-06, |
| "loss": 0.4682, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.1670020120724347, |
| "grad_norm": 2.0817251205444336, |
| "learning_rate": 7.6803206853002e-06, |
| "loss": 0.4536, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.1690140845070423, |
| "grad_norm": 2.1597044467926025, |
| "learning_rate": 7.670424968199099e-06, |
| "loss": 0.4748, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.1710261569416498, |
| "grad_norm": 2.033923864364624, |
| "learning_rate": 7.660514594864648e-06, |
| "loss": 0.4566, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.1730382293762576, |
| "grad_norm": 1.9592833518981934, |
| "learning_rate": 7.650589619688468e-06, |
| "loss": 0.4569, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.1750503018108651, |
| "grad_norm": 1.9300081729888916, |
| "learning_rate": 7.640650097142322e-06, |
| "loss": 0.4589, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.1770623742454729, |
| "grad_norm": 2.2498085498809814, |
| "learning_rate": 7.630696081777813e-06, |
| "loss": 0.4447, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.1790744466800804, |
| "grad_norm": 2.153975009918213, |
| "learning_rate": 7.620727628226081e-06, |
| "loss": 0.4664, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.1810865191146882, |
| "grad_norm": 2.1976206302642822, |
| "learning_rate": 7.610744791197518e-06, |
| "loss": 0.4821, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.1830985915492958, |
| "grad_norm": 2.1182820796966553, |
| "learning_rate": 7.6007476254814495e-06, |
| "loss": 0.4904, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.1851106639839033, |
| "grad_norm": 2.017240524291992, |
| "learning_rate": 7.590736185945843e-06, |
| "loss": 0.4514, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.187122736418511, |
| "grad_norm": 2.5631942749023438, |
| "learning_rate": 7.580710527537008e-06, |
| "loss": 0.4769, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.1891348088531186, |
| "grad_norm": 2.0534706115722656, |
| "learning_rate": 7.570670705279291e-06, |
| "loss": 0.4648, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.1911468812877264, |
| "grad_norm": 2.2206432819366455, |
| "learning_rate": 7.560616774274775e-06, |
| "loss": 0.5011, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.193158953722334, |
| "grad_norm": 2.055204153060913, |
| "learning_rate": 7.550548789702979e-06, |
| "loss": 0.4658, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.1951710261569417, |
| "grad_norm": 2.200359582901001, |
| "learning_rate": 7.540466806820545e-06, |
| "loss": 0.4657, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.1971830985915493, |
| "grad_norm": 1.9172611236572266, |
| "learning_rate": 7.5303708809609514e-06, |
| "loss": 0.4538, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.199195171026157, |
| "grad_norm": 2.0303874015808105, |
| "learning_rate": 7.520261067534198e-06, |
| "loss": 0.4757, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.2012072434607646, |
| "grad_norm": 2.161301851272583, |
| "learning_rate": 7.510137422026502e-06, |
| "loss": 0.4639, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.2032193158953723, |
| "grad_norm": 2.2369987964630127, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.4943, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.20523138832998, |
| "grad_norm": 2.028968095779419, |
| "learning_rate": 7.489848857092436e-06, |
| "loss": 0.4322, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.2072434607645874, |
| "grad_norm": 2.4104745388031006, |
| "learning_rate": 7.479684049016859e-06, |
| "loss": 0.4957, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.2092555331991952, |
| "grad_norm": 1.9772114753723145, |
| "learning_rate": 7.469505631561318e-06, |
| "loss": 0.4543, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.2112676056338028, |
| "grad_norm": 2.124650239944458, |
| "learning_rate": 7.459313660588557e-06, |
| "loss": 0.4756, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.2132796780684105, |
| "grad_norm": 2.071934461593628, |
| "learning_rate": 7.449108192035701e-06, |
| "loss": 0.4617, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.215291750503018, |
| "grad_norm": 2.0079896450042725, |
| "learning_rate": 7.4388892819139625e-06, |
| "loss": 0.4301, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.2173038229376258, |
| "grad_norm": 2.316357374191284, |
| "learning_rate": 7.428656986308318e-06, |
| "loss": 0.4605, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.2193158953722334, |
| "grad_norm": 1.9327945709228516, |
| "learning_rate": 7.4184113613772134e-06, |
| "loss": 0.4604, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.221327967806841, |
| "grad_norm": 1.9896938800811768, |
| "learning_rate": 7.408152463352249e-06, |
| "loss": 0.4764, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.2233400402414487, |
| "grad_norm": 2.1156723499298096, |
| "learning_rate": 7.397880348537873e-06, |
| "loss": 0.4775, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.2253521126760563, |
| "grad_norm": 1.9545114040374756, |
| "learning_rate": 7.387595073311072e-06, |
| "loss": 0.4509, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.227364185110664, |
| "grad_norm": 1.8793998956680298, |
| "learning_rate": 7.3772966941210585e-06, |
| "loss": 0.4614, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.2293762575452716, |
| "grad_norm": 2.2358322143554688, |
| "learning_rate": 7.366985267488971e-06, |
| "loss": 0.4615, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.2313883299798793, |
| "grad_norm": 1.9540613889694214, |
| "learning_rate": 7.356660850007551e-06, |
| "loss": 0.4375, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.233400402414487, |
| "grad_norm": 2.101412057876587, |
| "learning_rate": 7.346323498340839e-06, |
| "loss": 0.4818, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.2354124748490944, |
| "grad_norm": 2.190537214279175, |
| "learning_rate": 7.335973269223865e-06, |
| "loss": 0.4889, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.2374245472837022, |
| "grad_norm": 2.2181079387664795, |
| "learning_rate": 7.325610219462336e-06, |
| "loss": 0.4504, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.2394366197183098, |
| "grad_norm": 2.1350760459899902, |
| "learning_rate": 7.3152344059323165e-06, |
| "loss": 0.4696, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.2414486921529175, |
| "grad_norm": 2.065744161605835, |
| "learning_rate": 7.304845885579933e-06, |
| "loss": 0.4286, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.243460764587525, |
| "grad_norm": 2.2795002460479736, |
| "learning_rate": 7.294444715421043e-06, |
| "loss": 0.4949, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.2454728370221329, |
| "grad_norm": 2.2813029289245605, |
| "learning_rate": 7.284030952540937e-06, |
| "loss": 0.4731, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.2474849094567404, |
| "grad_norm": 1.9561939239501953, |
| "learning_rate": 7.273604654094012e-06, |
| "loss": 0.4693, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.2494969818913482, |
| "grad_norm": 1.989205241203308, |
| "learning_rate": 7.2631658773034715e-06, |
| "loss": 0.4582, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.2515090543259557, |
| "grad_norm": 1.8016010522842407, |
| "learning_rate": 7.252714679461001e-06, |
| "loss": 0.4541, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.2535211267605635, |
| "grad_norm": 2.0992701053619385, |
| "learning_rate": 7.2422511179264555e-06, |
| "loss": 0.4958, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.255533199195171, |
| "grad_norm": 2.151954412460327, |
| "learning_rate": 7.231775250127551e-06, |
| "loss": 0.4732, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.2575452716297786, |
| "grad_norm": 2.354834794998169, |
| "learning_rate": 7.221287133559537e-06, |
| "loss": 0.4532, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.2595573440643864, |
| "grad_norm": 2.047466993331909, |
| "learning_rate": 7.2107868257849e-06, |
| "loss": 0.4551, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.261569416498994, |
| "grad_norm": 2.151855945587158, |
| "learning_rate": 7.200274384433026e-06, |
| "loss": 0.4619, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.2635814889336017, |
| "grad_norm": 2.0850412845611572, |
| "learning_rate": 7.189749867199899e-06, |
| "loss": 0.4991, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.2655935613682092, |
| "grad_norm": 1.9346874952316284, |
| "learning_rate": 7.1792133318477775e-06, |
| "loss": 0.4493, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.267605633802817, |
| "grad_norm": 1.9817110300064087, |
| "learning_rate": 7.1686648362048824e-06, |
| "loss": 0.4444, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.2696177062374245, |
| "grad_norm": 2.3106513023376465, |
| "learning_rate": 7.1581044381650735e-06, |
| "loss": 0.4995, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.271629778672032, |
| "grad_norm": 2.014252185821533, |
| "learning_rate": 7.14753219568754e-06, |
| "loss": 0.4491, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.2736418511066399, |
| "grad_norm": 2.0986545085906982, |
| "learning_rate": 7.136948166796472e-06, |
| "loss": 0.4533, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.2756539235412474, |
| "grad_norm": 2.028027296066284, |
| "learning_rate": 7.126352409580749e-06, |
| "loss": 0.474, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.2776659959758552, |
| "grad_norm": 2.0538876056671143, |
| "learning_rate": 7.115744982193624e-06, |
| "loss": 0.4543, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.2796780684104627, |
| "grad_norm": 2.1324546337127686, |
| "learning_rate": 7.105125942852396e-06, |
| "loss": 0.4947, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.2816901408450705, |
| "grad_norm": 2.01959490776062, |
| "learning_rate": 7.094495349838093e-06, |
| "loss": 0.4451, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.283702213279678, |
| "grad_norm": 1.945053219795227, |
| "learning_rate": 7.083853261495159e-06, |
| "loss": 0.4766, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 2.1535494327545166, |
| "learning_rate": 7.073199736231123e-06, |
| "loss": 0.4911, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.2877263581488934, |
| "grad_norm": 2.027019739151001, |
| "learning_rate": 7.062534832516288e-06, |
| "loss": 0.4701, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.2897384305835011, |
| "grad_norm": 2.15201473236084, |
| "learning_rate": 7.051858608883404e-06, |
| "loss": 0.4968, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.2917505030181087, |
| "grad_norm": 2.152102470397949, |
| "learning_rate": 7.041171123927347e-06, |
| "loss": 0.451, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.2937625754527162, |
| "grad_norm": 2.2539751529693604, |
| "learning_rate": 7.0304724363048025e-06, |
| "loss": 0.4791, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.295774647887324, |
| "grad_norm": 2.0571231842041016, |
| "learning_rate": 7.019762604733939e-06, |
| "loss": 0.4843, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.2977867203219315, |
| "grad_norm": 2.099419355392456, |
| "learning_rate": 7.009041687994085e-06, |
| "loss": 0.465, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.2997987927565393, |
| "grad_norm": 2.1120150089263916, |
| "learning_rate": 6.998309744925411e-06, |
| "loss": 0.4451, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.3018108651911469, |
| "grad_norm": 1.9553170204162598, |
| "learning_rate": 6.987566834428605e-06, |
| "loss": 0.4525, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.3038229376257546, |
| "grad_norm": 1.9628238677978516, |
| "learning_rate": 6.97681301546454e-06, |
| "loss": 0.4738, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.3058350100603622, |
| "grad_norm": 1.9138386249542236, |
| "learning_rate": 6.9660483470539704e-06, |
| "loss": 0.4732, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.3078470824949697, |
| "grad_norm": 2.1095831394195557, |
| "learning_rate": 6.955272888277188e-06, |
| "loss": 0.5139, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.3098591549295775, |
| "grad_norm": 2.0262861251831055, |
| "learning_rate": 6.944486698273704e-06, |
| "loss": 0.4743, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.311871227364185, |
| "grad_norm": 2.467956304550171, |
| "learning_rate": 6.933689836241939e-06, |
| "loss": 0.4598, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.3138832997987928, |
| "grad_norm": 2.182114601135254, |
| "learning_rate": 6.92288236143887e-06, |
| "loss": 0.4858, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.3158953722334004, |
| "grad_norm": 2.1629250049591064, |
| "learning_rate": 6.912064333179729e-06, |
| "loss": 0.4857, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.3179074446680081, |
| "grad_norm": 2.0808186531066895, |
| "learning_rate": 6.901235810837668e-06, |
| "loss": 0.4631, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.3199195171026157, |
| "grad_norm": 2.05938720703125, |
| "learning_rate": 6.890396853843436e-06, |
| "loss": 0.4958, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.3219315895372232, |
| "grad_norm": 2.0860953330993652, |
| "learning_rate": 6.879547521685046e-06, |
| "loss": 0.4936, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.323943661971831, |
| "grad_norm": 2.298236131668091, |
| "learning_rate": 6.868687873907458e-06, |
| "loss": 0.4549, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.3259557344064388, |
| "grad_norm": 2.3687760829925537, |
| "learning_rate": 6.857817970112246e-06, |
| "loss": 0.4538, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.3279678068410463, |
| "grad_norm": 2.0305449962615967, |
| "learning_rate": 6.846937869957272e-06, |
| "loss": 0.448, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.3299798792756539, |
| "grad_norm": 2.1987497806549072, |
| "learning_rate": 6.836047633156361e-06, |
| "loss": 0.5089, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.3319919517102616, |
| "grad_norm": 1.9728593826293945, |
| "learning_rate": 6.8251473194789695e-06, |
| "loss": 0.4767, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.3340040241448692, |
| "grad_norm": 2.0280892848968506, |
| "learning_rate": 6.814236988749863e-06, |
| "loss": 0.4816, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.3360160965794767, |
| "grad_norm": 2.024660587310791, |
| "learning_rate": 6.8033167008487784e-06, |
| "loss": 0.4562, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.3380281690140845, |
| "grad_norm": 2.0069921016693115, |
| "learning_rate": 6.792386515710106e-06, |
| "loss": 0.4399, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.3400402414486923, |
| "grad_norm": 2.357219696044922, |
| "learning_rate": 6.7814464933225535e-06, |
| "loss": 0.4681, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.3420523138832998, |
| "grad_norm": 2.1096150875091553, |
| "learning_rate": 6.77049669372882e-06, |
| "loss": 0.44, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.3440643863179074, |
| "grad_norm": 2.167057991027832, |
| "learning_rate": 6.759537177025263e-06, |
| "loss": 0.4421, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.3460764587525151, |
| "grad_norm": 2.035834789276123, |
| "learning_rate": 6.748568003361576e-06, |
| "loss": 0.448, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.3480885311871227, |
| "grad_norm": 2.0616490840911865, |
| "learning_rate": 6.737589232940445e-06, |
| "loss": 0.4103, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.3501006036217305, |
| "grad_norm": 1.953667163848877, |
| "learning_rate": 6.726600926017234e-06, |
| "loss": 0.456, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.352112676056338, |
| "grad_norm": 1.9617230892181396, |
| "learning_rate": 6.715603142899645e-06, |
| "loss": 0.4652, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.3541247484909458, |
| "grad_norm": 2.0986597537994385, |
| "learning_rate": 6.704595943947385e-06, |
| "loss": 0.4459, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.3561368209255533, |
| "grad_norm": 2.0416882038116455, |
| "learning_rate": 6.693579389571844e-06, |
| "loss": 0.4903, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.3581488933601609, |
| "grad_norm": 2.0102877616882324, |
| "learning_rate": 6.682553540235754e-06, |
| "loss": 0.4337, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.3601609657947686, |
| "grad_norm": 2.2172913551330566, |
| "learning_rate": 6.671518456452859e-06, |
| "loss": 0.4606, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.3621730382293762, |
| "grad_norm": 2.233868360519409, |
| "learning_rate": 6.6604741987875905e-06, |
| "loss": 0.448, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.364185110663984, |
| "grad_norm": 1.9892909526824951, |
| "learning_rate": 6.649420827854729e-06, |
| "loss": 0.4605, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.3661971830985915, |
| "grad_norm": 2.053473711013794, |
| "learning_rate": 6.638358404319064e-06, |
| "loss": 0.4642, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.3682092555331993, |
| "grad_norm": 1.8882447481155396, |
| "learning_rate": 6.62728698889508e-06, |
| "loss": 0.4396, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.3702213279678068, |
| "grad_norm": 2.1912496089935303, |
| "learning_rate": 6.616206642346603e-06, |
| "loss": 0.4522, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.3722334004024144, |
| "grad_norm": 2.0848143100738525, |
| "learning_rate": 6.605117425486483e-06, |
| "loss": 0.4698, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.3742454728370221, |
| "grad_norm": 2.0212464332580566, |
| "learning_rate": 6.594019399176246e-06, |
| "loss": 0.4888, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.37625754527163, |
| "grad_norm": 2.030343532562256, |
| "learning_rate": 6.582912624325777e-06, |
| "loss": 0.4524, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.3782696177062375, |
| "grad_norm": 2.1226394176483154, |
| "learning_rate": 6.571797161892965e-06, |
| "loss": 0.5117, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.380281690140845, |
| "grad_norm": 2.0747764110565186, |
| "learning_rate": 6.5606730728833904e-06, |
| "loss": 0.4656, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.3822937625754528, |
| "grad_norm": 2.0734431743621826, |
| "learning_rate": 6.549540418349969e-06, |
| "loss": 0.4867, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.3843058350100603, |
| "grad_norm": 2.2219855785369873, |
| "learning_rate": 6.538399259392637e-06, |
| "loss": 0.4542, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.3863179074446679, |
| "grad_norm": 2.252269983291626, |
| "learning_rate": 6.527249657157998e-06, |
| "loss": 0.4614, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.3883299798792756, |
| "grad_norm": 1.9988418817520142, |
| "learning_rate": 6.516091672839e-06, |
| "loss": 0.442, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.3903420523138834, |
| "grad_norm": 2.1525936126708984, |
| "learning_rate": 6.504925367674595e-06, |
| "loss": 0.5083, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.392354124748491, |
| "grad_norm": 2.121413230895996, |
| "learning_rate": 6.4937508029493965e-06, |
| "loss": 0.4407, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.3943661971830985, |
| "grad_norm": 1.963167667388916, |
| "learning_rate": 6.482568039993356e-06, |
| "loss": 0.4743, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.3963782696177063, |
| "grad_norm": 2.271878242492676, |
| "learning_rate": 6.471377140181419e-06, |
| "loss": 0.4536, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.3983903420523138, |
| "grad_norm": 2.22976016998291, |
| "learning_rate": 6.4601781649331885e-06, |
| "loss": 0.4729, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.4004024144869216, |
| "grad_norm": 1.9575120210647583, |
| "learning_rate": 6.4489711757125814e-06, |
| "loss": 0.4685, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.4024144869215291, |
| "grad_norm": 2.0964162349700928, |
| "learning_rate": 6.437756234027512e-06, |
| "loss": 0.5093, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.404426559356137, |
| "grad_norm": 2.1892035007476807, |
| "learning_rate": 6.4265334014295284e-06, |
| "loss": 0.5002, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.4064386317907445, |
| "grad_norm": 2.233177423477173, |
| "learning_rate": 6.415302739513492e-06, |
| "loss": 0.4899, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.408450704225352, |
| "grad_norm": 1.9493423700332642, |
| "learning_rate": 6.40406430991723e-06, |
| "loss": 0.4715, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.4104627766599598, |
| "grad_norm": 2.131361246109009, |
| "learning_rate": 6.392818174321213e-06, |
| "loss": 0.491, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.4124748490945673, |
| "grad_norm": 1.9579322338104248, |
| "learning_rate": 6.3815643944481866e-06, |
| "loss": 0.439, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.414486921529175, |
| "grad_norm": 1.9951404333114624, |
| "learning_rate": 6.370303032062869e-06, |
| "loss": 0.4235, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.4164989939637826, |
| "grad_norm": 2.19970965385437, |
| "learning_rate": 6.359034148971581e-06, |
| "loss": 0.4866, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.4185110663983904, |
| "grad_norm": 1.9917786121368408, |
| "learning_rate": 6.347757807021926e-06, |
| "loss": 0.4717, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.420523138832998, |
| "grad_norm": 1.9985647201538086, |
| "learning_rate": 6.336474068102444e-06, |
| "loss": 0.4991, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.4225352112676055, |
| "grad_norm": 2.0368287563323975, |
| "learning_rate": 6.325182994142267e-06, |
| "loss": 0.4417, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.4245472837022133, |
| "grad_norm": 1.9710001945495605, |
| "learning_rate": 6.3138846471107925e-06, |
| "loss": 0.46, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.426559356136821, |
| "grad_norm": 2.040895462036133, |
| "learning_rate": 6.302579089017328e-06, |
| "loss": 0.4903, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 2.1418488025665283, |
| "learning_rate": 6.291266381910761e-06, |
| "loss": 0.4745, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.4305835010060362, |
| "grad_norm": 1.9608333110809326, |
| "learning_rate": 6.279946587879216e-06, |
| "loss": 0.4524, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.432595573440644, |
| "grad_norm": 2.0412542819976807, |
| "learning_rate": 6.268619769049713e-06, |
| "loss": 0.4441, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.4346076458752515, |
| "grad_norm": 1.8654680252075195, |
| "learning_rate": 6.2572859875878225e-06, |
| "loss": 0.4107, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.436619718309859, |
| "grad_norm": 1.9783178567886353, |
| "learning_rate": 6.245945305697335e-06, |
| "loss": 0.4386, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.4386317907444668, |
| "grad_norm": 2.1955788135528564, |
| "learning_rate": 6.234597785619906e-06, |
| "loss": 0.4611, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.4406438631790746, |
| "grad_norm": 1.9734547138214111, |
| "learning_rate": 6.223243489634727e-06, |
| "loss": 0.4506, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.442655935613682, |
| "grad_norm": 2.019916534423828, |
| "learning_rate": 6.211882480058175e-06, |
| "loss": 0.4369, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.4446680080482897, |
| "grad_norm": 2.066774368286133, |
| "learning_rate": 6.200514819243476e-06, |
| "loss": 0.472, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.4466800804828974, |
| "grad_norm": 2.1636836528778076, |
| "learning_rate": 6.189140569580356e-06, |
| "loss": 0.4954, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.448692152917505, |
| "grad_norm": 1.956506371498108, |
| "learning_rate": 6.1777597934947084e-06, |
| "loss": 0.4858, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.4507042253521127, |
| "grad_norm": 2.098059892654419, |
| "learning_rate": 6.166372553448241e-06, |
| "loss": 0.4979, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.4527162977867203, |
| "grad_norm": 2.1118078231811523, |
| "learning_rate": 6.154978911938143e-06, |
| "loss": 0.4482, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.454728370221328, |
| "grad_norm": 2.1067488193511963, |
| "learning_rate": 6.143578931496732e-06, |
| "loss": 0.4546, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.4567404426559356, |
| "grad_norm": 2.073150396347046, |
| "learning_rate": 6.132172674691119e-06, |
| "loss": 0.4523, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.4587525150905432, |
| "grad_norm": 1.9528892040252686, |
| "learning_rate": 6.120760204122862e-06, |
| "loss": 0.4583, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.460764587525151, |
| "grad_norm": 2.218679189682007, |
| "learning_rate": 6.109341582427621e-06, |
| "loss": 0.4744, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.4627766599597585, |
| "grad_norm": 2.0021162033081055, |
| "learning_rate": 6.097916872274815e-06, |
| "loss": 0.4482, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.4647887323943662, |
| "grad_norm": 2.2345480918884277, |
| "learning_rate": 6.086486136367281e-06, |
| "loss": 0.4799, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.4668008048289738, |
| "grad_norm": 2.0748777389526367, |
| "learning_rate": 6.075049437440927e-06, |
| "loss": 0.4432, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.4688128772635816, |
| "grad_norm": 2.083968162536621, |
| "learning_rate": 6.063606838264384e-06, |
| "loss": 0.4438, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.470824949698189, |
| "grad_norm": 1.9462379217147827, |
| "learning_rate": 6.0521584016386735e-06, |
| "loss": 0.4895, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.4728370221327967, |
| "grad_norm": 2.2787818908691406, |
| "learning_rate": 6.040704190396847e-06, |
| "loss": 0.4758, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.4748490945674044, |
| "grad_norm": 2.0492944717407227, |
| "learning_rate": 6.029244267403652e-06, |
| "loss": 0.4756, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.4768611670020122, |
| "grad_norm": 1.994223952293396, |
| "learning_rate": 6.0177786955551874e-06, |
| "loss": 0.4213, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.4788732394366197, |
| "grad_norm": 2.0081369876861572, |
| "learning_rate": 6.006307537778552e-06, |
| "loss": 0.4307, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.4808853118712273, |
| "grad_norm": 1.9906424283981323, |
| "learning_rate": 5.9948308570315e-06, |
| "loss": 0.4815, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.482897384305835, |
| "grad_norm": 2.073981761932373, |
| "learning_rate": 5.983348716302101e-06, |
| "loss": 0.4892, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.4849094567404426, |
| "grad_norm": 2.121760606765747, |
| "learning_rate": 5.97186117860839e-06, |
| "loss": 0.4734, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.4869215291750504, |
| "grad_norm": 2.0857138633728027, |
| "learning_rate": 5.960368306998023e-06, |
| "loss": 0.4555, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.488933601609658, |
| "grad_norm": 2.2429444789886475, |
| "learning_rate": 5.948870164547932e-06, |
| "loss": 0.4985, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.4909456740442657, |
| "grad_norm": 2.1200976371765137, |
| "learning_rate": 5.9373668143639694e-06, |
| "loss": 0.4807, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.4929577464788732, |
| "grad_norm": 2.1874051094055176, |
| "learning_rate": 5.92585831958058e-06, |
| "loss": 0.4907, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.4949698189134808, |
| "grad_norm": 1.9364540576934814, |
| "learning_rate": 5.914344743360435e-06, |
| "loss": 0.4441, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.4969818913480886, |
| "grad_norm": 2.2439324855804443, |
| "learning_rate": 5.902826148894102e-06, |
| "loss": 0.4845, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.4989939637826961, |
| "grad_norm": 2.184908390045166, |
| "learning_rate": 5.891302599399686e-06, |
| "loss": 0.4523, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.5010060362173037, |
| "grad_norm": 2.3075244426727295, |
| "learning_rate": 5.8797741581224866e-06, |
| "loss": 0.4966, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.5030181086519114, |
| "grad_norm": 2.011004686355591, |
| "learning_rate": 5.8682408883346535e-06, |
| "loss": 0.4264, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.5050301810865192, |
| "grad_norm": 2.2199621200561523, |
| "learning_rate": 5.856702853334833e-06, |
| "loss": 0.4877, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.5070422535211268, |
| "grad_norm": 2.1460952758789062, |
| "learning_rate": 5.845160116447833e-06, |
| "loss": 0.4548, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.5090543259557343, |
| "grad_norm": 2.155158758163452, |
| "learning_rate": 5.833612741024256e-06, |
| "loss": 0.4729, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.511066398390342, |
| "grad_norm": 2.3106179237365723, |
| "learning_rate": 5.8220607904401725e-06, |
| "loss": 0.5055, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.5130784708249498, |
| "grad_norm": 2.1805429458618164, |
| "learning_rate": 5.810504328096756e-06, |
| "loss": 0.4714, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.5150905432595574, |
| "grad_norm": 1.9099804162979126, |
| "learning_rate": 5.798943417419944e-06, |
| "loss": 0.4072, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.517102615694165, |
| "grad_norm": 2.059183359146118, |
| "learning_rate": 5.78737812186009e-06, |
| "loss": 0.4982, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.5191146881287727, |
| "grad_norm": 2.180112600326538, |
| "learning_rate": 5.775808504891612e-06, |
| "loss": 0.4974, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.5211267605633803, |
| "grad_norm": 2.0823423862457275, |
| "learning_rate": 5.764234630012643e-06, |
| "loss": 0.4779, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.5231388329979878, |
| "grad_norm": 2.170238733291626, |
| "learning_rate": 5.752656560744692e-06, |
| "loss": 0.4495, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.5251509054325956, |
| "grad_norm": 2.0095527172088623, |
| "learning_rate": 5.741074360632278e-06, |
| "loss": 0.4173, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.5271629778672033, |
| "grad_norm": 2.175075054168701, |
| "learning_rate": 5.729488093242601e-06, |
| "loss": 0.4744, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.529175050301811, |
| "grad_norm": 2.083327293395996, |
| "learning_rate": 5.717897822165179e-06, |
| "loss": 0.4455, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.5311871227364184, |
| "grad_norm": 1.970665454864502, |
| "learning_rate": 5.706303611011502e-06, |
| "loss": 0.4756, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.5331991951710262, |
| "grad_norm": 2.1470654010772705, |
| "learning_rate": 5.694705523414691e-06, |
| "loss": 0.4607, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.5352112676056338, |
| "grad_norm": 2.2010574340820312, |
| "learning_rate": 5.6831036230291345e-06, |
| "loss": 0.4645, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.5372233400402413, |
| "grad_norm": 1.93354332447052, |
| "learning_rate": 5.671497973530152e-06, |
| "loss": 0.4472, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.539235412474849, |
| "grad_norm": 2.1269092559814453, |
| "learning_rate": 5.659888638613638e-06, |
| "loss": 0.4577, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.5412474849094568, |
| "grad_norm": 2.004490852355957, |
| "learning_rate": 5.648275681995716e-06, |
| "loss": 0.4566, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.5432595573440644, |
| "grad_norm": 1.9603842496871948, |
| "learning_rate": 5.636659167412381e-06, |
| "loss": 0.4608, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.545271629778672, |
| "grad_norm": 2.082428216934204, |
| "learning_rate": 5.625039158619161e-06, |
| "loss": 0.4735, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.5472837022132797, |
| "grad_norm": 2.371439218521118, |
| "learning_rate": 5.613415719390759e-06, |
| "loss": 0.4786, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.5492957746478875, |
| "grad_norm": 2.142385482788086, |
| "learning_rate": 5.601788913520706e-06, |
| "loss": 0.484, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.5513078470824948, |
| "grad_norm": 2.027139663696289, |
| "learning_rate": 5.590158804821011e-06, |
| "loss": 0.4389, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.5533199195171026, |
| "grad_norm": 2.114689588546753, |
| "learning_rate": 5.578525457121807e-06, |
| "loss": 0.4502, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.5553319919517103, |
| "grad_norm": 2.133058786392212, |
| "learning_rate": 5.566888934271007e-06, |
| "loss": 0.4906, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.557344064386318, |
| "grad_norm": 1.9754748344421387, |
| "learning_rate": 5.5552493001339535e-06, |
| "loss": 0.4646, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.5593561368209254, |
| "grad_norm": 2.156743288040161, |
| "learning_rate": 5.543606618593053e-06, |
| "loss": 0.4571, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.5613682092555332, |
| "grad_norm": 2.0975053310394287, |
| "learning_rate": 5.531960953547452e-06, |
| "loss": 0.47, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.563380281690141, |
| "grad_norm": 2.1429851055145264, |
| "learning_rate": 5.520312368912661e-06, |
| "loss": 0.4562, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.5653923541247485, |
| "grad_norm": 2.181736946105957, |
| "learning_rate": 5.508660928620216e-06, |
| "loss": 0.4377, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.567404426559356, |
| "grad_norm": 2.0732667446136475, |
| "learning_rate": 5.497006696617333e-06, |
| "loss": 0.4527, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.5694164989939638, |
| "grad_norm": 2.0271811485290527, |
| "learning_rate": 5.485349736866541e-06, |
| "loss": 0.4733, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 2.1071932315826416, |
| "learning_rate": 5.473690113345343e-06, |
| "loss": 0.4597, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.573440643863179, |
| "grad_norm": 2.325165033340454, |
| "learning_rate": 5.462027890045862e-06, |
| "loss": 0.4859, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.5754527162977867, |
| "grad_norm": 2.015083074569702, |
| "learning_rate": 5.450363130974492e-06, |
| "loss": 0.4725, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.5774647887323945, |
| "grad_norm": 2.0369396209716797, |
| "learning_rate": 5.438695900151537e-06, |
| "loss": 0.4506, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.579476861167002, |
| "grad_norm": 1.943955898284912, |
| "learning_rate": 5.427026261610877e-06, |
| "loss": 0.4499, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.5814889336016096, |
| "grad_norm": 2.0148682594299316, |
| "learning_rate": 5.4153542793995985e-06, |
| "loss": 0.4854, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.5835010060362174, |
| "grad_norm": 1.9239193201065063, |
| "learning_rate": 5.403680017577653e-06, |
| "loss": 0.429, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.585513078470825, |
| "grad_norm": 2.050187587738037, |
| "learning_rate": 5.392003540217505e-06, |
| "loss": 0.4537, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.5875251509054324, |
| "grad_norm": 1.947845697402954, |
| "learning_rate": 5.380324911403776e-06, |
| "loss": 0.4409, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.5895372233400402, |
| "grad_norm": 1.9879707098007202, |
| "learning_rate": 5.368644195232896e-06, |
| "loss": 0.4574, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.591549295774648, |
| "grad_norm": 2.045276165008545, |
| "learning_rate": 5.356961455812754e-06, |
| "loss": 0.462, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.5935613682092555, |
| "grad_norm": 1.8802539110183716, |
| "learning_rate": 5.34527675726234e-06, |
| "loss": 0.4382, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.595573440643863, |
| "grad_norm": 2.2450199127197266, |
| "learning_rate": 5.3335901637113985e-06, |
| "loss": 0.4532, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.5975855130784709, |
| "grad_norm": 1.9864826202392578, |
| "learning_rate": 5.321901739300074e-06, |
| "loss": 0.4517, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.5995975855130786, |
| "grad_norm": 2.0585172176361084, |
| "learning_rate": 5.310211548178556e-06, |
| "loss": 0.4497, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.6016096579476862, |
| "grad_norm": 2.0852487087249756, |
| "learning_rate": 5.298519654506736e-06, |
| "loss": 0.4839, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.6036217303822937, |
| "grad_norm": 1.9984678030014038, |
| "learning_rate": 5.286826122453847e-06, |
| "loss": 0.4271, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.6056338028169015, |
| "grad_norm": 2.0208373069763184, |
| "learning_rate": 5.275131016198112e-06, |
| "loss": 0.4755, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.607645875251509, |
| "grad_norm": 2.085428237915039, |
| "learning_rate": 5.2634343999263985e-06, |
| "loss": 0.4494, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.6096579476861166, |
| "grad_norm": 2.018244981765747, |
| "learning_rate": 5.251736337833857e-06, |
| "loss": 0.4527, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.6116700201207244, |
| "grad_norm": 2.217186450958252, |
| "learning_rate": 5.2400368941235745e-06, |
| "loss": 0.4691, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.6136820925553321, |
| "grad_norm": 2.0559592247009277, |
| "learning_rate": 5.228336133006223e-06, |
| "loss": 0.4481, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.6156941649899397, |
| "grad_norm": 1.9424960613250732, |
| "learning_rate": 5.216634118699701e-06, |
| "loss": 0.4291, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.6177062374245472, |
| "grad_norm": 2.2018039226531982, |
| "learning_rate": 5.20493091542879e-06, |
| "loss": 0.4898, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.619718309859155, |
| "grad_norm": 2.1593589782714844, |
| "learning_rate": 5.193226587424793e-06, |
| "loss": 0.4734, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.6217303822937625, |
| "grad_norm": 2.275895118713379, |
| "learning_rate": 5.181521198925183e-06, |
| "loss": 0.4686, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.62374245472837, |
| "grad_norm": 2.2770731449127197, |
| "learning_rate": 5.169814814173263e-06, |
| "loss": 0.5107, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.6257545271629779, |
| "grad_norm": 2.013920545578003, |
| "learning_rate": 5.158107497417795e-06, |
| "loss": 0.4326, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.6277665995975856, |
| "grad_norm": 2.1957645416259766, |
| "learning_rate": 5.14639931291266e-06, |
| "loss": 0.4449, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.6297786720321932, |
| "grad_norm": 2.270573854446411, |
| "learning_rate": 5.134690324916502e-06, |
| "loss": 0.4456, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.6317907444668007, |
| "grad_norm": 2.1294291019439697, |
| "learning_rate": 5.122980597692372e-06, |
| "loss": 0.461, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.6338028169014085, |
| "grad_norm": 2.094083309173584, |
| "learning_rate": 5.11127019550738e-06, |
| "loss": 0.4692, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.635814889336016, |
| "grad_norm": 2.167789936065674, |
| "learning_rate": 5.099559182632342e-06, |
| "loss": 0.4748, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.6378269617706236, |
| "grad_norm": 2.0614733695983887, |
| "learning_rate": 5.087847623341421e-06, |
| "loss": 0.4512, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.6398390342052314, |
| "grad_norm": 2.1112565994262695, |
| "learning_rate": 5.076135581911784e-06, |
| "loss": 0.4708, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.6418511066398391, |
| "grad_norm": 2.078747272491455, |
| "learning_rate": 5.0644231226232434e-06, |
| "loss": 0.4351, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.6438631790744467, |
| "grad_norm": 2.212445020675659, |
| "learning_rate": 5.052710309757899e-06, |
| "loss": 0.484, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.6458752515090542, |
| "grad_norm": 2.132495880126953, |
| "learning_rate": 5.040997207599798e-06, |
| "loss": 0.456, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.647887323943662, |
| "grad_norm": 2.0021562576293945, |
| "learning_rate": 5.029283880434575e-06, |
| "loss": 0.4747, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.6498993963782698, |
| "grad_norm": 2.115347146987915, |
| "learning_rate": 5.0175703925490936e-06, |
| "loss": 0.457, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.6519114688128773, |
| "grad_norm": 1.9082791805267334, |
| "learning_rate": 5.005856808231108e-06, |
| "loss": 0.4414, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.6539235412474849, |
| "grad_norm": 1.9503601789474487, |
| "learning_rate": 4.994143191768893e-06, |
| "loss": 0.4342, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.6559356136820926, |
| "grad_norm": 2.0434417724609375, |
| "learning_rate": 4.982429607450907e-06, |
| "loss": 0.4592, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.6579476861167002, |
| "grad_norm": 2.0412988662719727, |
| "learning_rate": 4.970716119565427e-06, |
| "loss": 0.4432, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.6599597585513077, |
| "grad_norm": 1.9840989112854004, |
| "learning_rate": 4.959002792400205e-06, |
| "loss": 0.4636, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.6619718309859155, |
| "grad_norm": 2.2154717445373535, |
| "learning_rate": 4.947289690242103e-06, |
| "loss": 0.5052, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.6639839034205233, |
| "grad_norm": 2.267805337905884, |
| "learning_rate": 4.935576877376759e-06, |
| "loss": 0.4632, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.6659959758551308, |
| "grad_norm": 2.170832872390747, |
| "learning_rate": 4.9238644180882175e-06, |
| "loss": 0.4847, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.6680080482897384, |
| "grad_norm": 2.076737403869629, |
| "learning_rate": 4.91215237665858e-06, |
| "loss": 0.4341, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.6700201207243461, |
| "grad_norm": 1.9692955017089844, |
| "learning_rate": 4.900440817367661e-06, |
| "loss": 0.456, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.6720321931589537, |
| "grad_norm": 1.9987684488296509, |
| "learning_rate": 4.88872980449262e-06, |
| "loss": 0.4358, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.6740442655935612, |
| "grad_norm": 2.1247968673706055, |
| "learning_rate": 4.877019402307629e-06, |
| "loss": 0.461, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.676056338028169, |
| "grad_norm": 2.2850656509399414, |
| "learning_rate": 4.8653096750835e-06, |
| "loss": 0.4332, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.6780684104627768, |
| "grad_norm": 2.0370311737060547, |
| "learning_rate": 4.853600687087342e-06, |
| "loss": 0.4627, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.6800804828973843, |
| "grad_norm": 2.027519702911377, |
| "learning_rate": 4.841892502582206e-06, |
| "loss": 0.4236, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.6820925553319919, |
| "grad_norm": 2.3669791221618652, |
| "learning_rate": 4.830185185826739e-06, |
| "loss": 0.5049, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.6841046277665996, |
| "grad_norm": 2.097409963607788, |
| "learning_rate": 4.818478801074818e-06, |
| "loss": 0.4571, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.6861167002012074, |
| "grad_norm": 1.9327471256256104, |
| "learning_rate": 4.806773412575211e-06, |
| "loss": 0.4383, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.6881287726358147, |
| "grad_norm": 1.9600250720977783, |
| "learning_rate": 4.795069084571211e-06, |
| "loss": 0.4242, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.6901408450704225, |
| "grad_norm": 2.109365224838257, |
| "learning_rate": 4.7833658813002995e-06, |
| "loss": 0.4403, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.6921529175050303, |
| "grad_norm": 2.030219793319702, |
| "learning_rate": 4.7716638669937784e-06, |
| "loss": 0.4917, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.6941649899396378, |
| "grad_norm": 1.9466344118118286, |
| "learning_rate": 4.759963105876428e-06, |
| "loss": 0.4385, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.6961770623742454, |
| "grad_norm": 2.284513473510742, |
| "learning_rate": 4.748263662166145e-06, |
| "loss": 0.4599, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.6981891348088531, |
| "grad_norm": 2.1511683464050293, |
| "learning_rate": 4.736565600073602e-06, |
| "loss": 0.4905, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.700201207243461, |
| "grad_norm": 2.027404308319092, |
| "learning_rate": 4.724868983801889e-06, |
| "loss": 0.464, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.7022132796780685, |
| "grad_norm": 2.0017733573913574, |
| "learning_rate": 4.713173877546155e-06, |
| "loss": 0.4683, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.704225352112676, |
| "grad_norm": 2.067063331604004, |
| "learning_rate": 4.701480345493266e-06, |
| "loss": 0.4882, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.7062374245472838, |
| "grad_norm": 2.250122547149658, |
| "learning_rate": 4.689788451821445e-06, |
| "loss": 0.4518, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.7082494969818913, |
| "grad_norm": 1.9627032279968262, |
| "learning_rate": 4.678098260699928e-06, |
| "loss": 0.4553, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.7102615694164989, |
| "grad_norm": 2.173635959625244, |
| "learning_rate": 4.666409836288603e-06, |
| "loss": 0.454, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.7122736418511066, |
| "grad_norm": 2.029207944869995, |
| "learning_rate": 4.654723242737661e-06, |
| "loss": 0.4599, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 2.130281925201416, |
| "learning_rate": 4.643038544187246e-06, |
| "loss": 0.4592, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.716297786720322, |
| "grad_norm": 1.9927594661712646, |
| "learning_rate": 4.631355804767106e-06, |
| "loss": 0.481, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.7183098591549295, |
| "grad_norm": 2.2229726314544678, |
| "learning_rate": 4.619675088596226e-06, |
| "loss": 0.4815, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.7203219315895373, |
| "grad_norm": 2.119466781616211, |
| "learning_rate": 4.607996459782498e-06, |
| "loss": 0.4562, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.7223340040241448, |
| "grad_norm": 2.228403329849243, |
| "learning_rate": 4.596319982422348e-06, |
| "loss": 0.4897, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.7243460764587524, |
| "grad_norm": 2.0544867515563965, |
| "learning_rate": 4.584645720600403e-06, |
| "loss": 0.4711, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.7263581488933601, |
| "grad_norm": 1.99813711643219, |
| "learning_rate": 4.572973738389124e-06, |
| "loss": 0.4732, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.728370221327968, |
| "grad_norm": 2.0984408855438232, |
| "learning_rate": 4.561304099848464e-06, |
| "loss": 0.4765, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.7303822937625755, |
| "grad_norm": 2.145559310913086, |
| "learning_rate": 4.549636869025511e-06, |
| "loss": 0.4586, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.732394366197183, |
| "grad_norm": 2.286984443664551, |
| "learning_rate": 4.5379721099541385e-06, |
| "loss": 0.4723, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.7344064386317908, |
| "grad_norm": 1.8818014860153198, |
| "learning_rate": 4.526309886654659e-06, |
| "loss": 0.4366, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.7364185110663986, |
| "grad_norm": 1.8989081382751465, |
| "learning_rate": 4.514650263133461e-06, |
| "loss": 0.4338, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.7384305835010059, |
| "grad_norm": 2.0375852584838867, |
| "learning_rate": 4.502993303382669e-06, |
| "loss": 0.4514, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.7404426559356136, |
| "grad_norm": 2.0181658267974854, |
| "learning_rate": 4.491339071379783e-06, |
| "loss": 0.4441, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.7424547283702214, |
| "grad_norm": 2.303046941757202, |
| "learning_rate": 4.47968763108734e-06, |
| "loss": 0.4722, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.744466800804829, |
| "grad_norm": 2.083085298538208, |
| "learning_rate": 4.46803904645255e-06, |
| "loss": 0.4504, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.7464788732394365, |
| "grad_norm": 2.0313520431518555, |
| "learning_rate": 4.4563933814069475e-06, |
| "loss": 0.4545, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.7484909456740443, |
| "grad_norm": 2.080291748046875, |
| "learning_rate": 4.444750699866047e-06, |
| "loss": 0.444, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.750503018108652, |
| "grad_norm": 2.0764756202697754, |
| "learning_rate": 4.433111065728992e-06, |
| "loss": 0.4334, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.7525150905432596, |
| "grad_norm": 1.9911096096038818, |
| "learning_rate": 4.4214745428781946e-06, |
| "loss": 0.4635, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.7545271629778671, |
| "grad_norm": 2.1145546436309814, |
| "learning_rate": 4.409841195178991e-06, |
| "loss": 0.4674, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.756539235412475, |
| "grad_norm": 2.2299070358276367, |
| "learning_rate": 4.3982110864792956e-06, |
| "loss": 0.4466, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.7585513078470825, |
| "grad_norm": 2.0294342041015625, |
| "learning_rate": 4.386584280609242e-06, |
| "loss": 0.4633, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.76056338028169, |
| "grad_norm": 1.9908781051635742, |
| "learning_rate": 4.37496084138084e-06, |
| "loss": 0.433, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.7625754527162978, |
| "grad_norm": 2.0967750549316406, |
| "learning_rate": 4.363340832587621e-06, |
| "loss": 0.4488, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.7645875251509056, |
| "grad_norm": 1.912702202796936, |
| "learning_rate": 4.351724318004286e-06, |
| "loss": 0.4405, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.766599597585513, |
| "grad_norm": 1.9199731349945068, |
| "learning_rate": 4.340111361386361e-06, |
| "loss": 0.4658, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.7686116700201207, |
| "grad_norm": 2.046421527862549, |
| "learning_rate": 4.328502026469849e-06, |
| "loss": 0.4595, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.7706237424547284, |
| "grad_norm": 2.1802546977996826, |
| "learning_rate": 4.316896376970866e-06, |
| "loss": 0.4886, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.772635814889336, |
| "grad_norm": 2.084728240966797, |
| "learning_rate": 4.305294476585312e-06, |
| "loss": 0.4269, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.7746478873239435, |
| "grad_norm": 2.1468799114227295, |
| "learning_rate": 4.293696388988498e-06, |
| "loss": 0.4661, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.7766599597585513, |
| "grad_norm": 1.940514087677002, |
| "learning_rate": 4.282102177834822e-06, |
| "loss": 0.4365, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.778672032193159, |
| "grad_norm": 2.1592133045196533, |
| "learning_rate": 4.2705119067574006e-06, |
| "loss": 0.4283, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.7806841046277666, |
| "grad_norm": 2.060865879058838, |
| "learning_rate": 4.258925639367723e-06, |
| "loss": 0.433, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.7826961770623742, |
| "grad_norm": 1.940653681755066, |
| "learning_rate": 4.2473434392553115e-06, |
| "loss": 0.4262, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.784708249496982, |
| "grad_norm": 2.010108709335327, |
| "learning_rate": 4.235765369987358e-06, |
| "loss": 0.4684, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.7867203219315897, |
| "grad_norm": 1.950357437133789, |
| "learning_rate": 4.224191495108391e-06, |
| "loss": 0.4145, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.788732394366197, |
| "grad_norm": 2.0455737113952637, |
| "learning_rate": 4.212621878139912e-06, |
| "loss": 0.4411, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.7907444668008048, |
| "grad_norm": 1.9487494230270386, |
| "learning_rate": 4.201056582580059e-06, |
| "loss": 0.4615, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.7927565392354126, |
| "grad_norm": 2.0646378993988037, |
| "learning_rate": 4.189495671903246e-06, |
| "loss": 0.4776, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.79476861167002, |
| "grad_norm": 1.9958351850509644, |
| "learning_rate": 4.177939209559828e-06, |
| "loss": 0.4467, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.7967806841046277, |
| "grad_norm": 2.160418748855591, |
| "learning_rate": 4.1663872589757445e-06, |
| "loss": 0.4512, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.7987927565392354, |
| "grad_norm": 2.015880584716797, |
| "learning_rate": 4.154839883552169e-06, |
| "loss": 0.4536, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.8008048289738432, |
| "grad_norm": 2.0357439517974854, |
| "learning_rate": 4.143297146665167e-06, |
| "loss": 0.4459, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.8028169014084507, |
| "grad_norm": 2.141810178756714, |
| "learning_rate": 4.131759111665349e-06, |
| "loss": 0.4381, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.8048289738430583, |
| "grad_norm": 1.9679553508758545, |
| "learning_rate": 4.120225841877515e-06, |
| "loss": 0.4573, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.806841046277666, |
| "grad_norm": 2.099289894104004, |
| "learning_rate": 4.108697400600316e-06, |
| "loss": 0.4425, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.8088531187122736, |
| "grad_norm": 1.9328632354736328, |
| "learning_rate": 4.0971738511059e-06, |
| "loss": 0.4328, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.8108651911468812, |
| "grad_norm": 1.991387128829956, |
| "learning_rate": 4.085655256639565e-06, |
| "loss": 0.4432, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.812877263581489, |
| "grad_norm": 1.9772350788116455, |
| "learning_rate": 4.074141680419422e-06, |
| "loss": 0.4606, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.8148893360160967, |
| "grad_norm": 2.063642978668213, |
| "learning_rate": 4.062633185636031e-06, |
| "loss": 0.4283, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.8169014084507042, |
| "grad_norm": 2.158418655395508, |
| "learning_rate": 4.051129835452071e-06, |
| "loss": 0.4539, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.8189134808853118, |
| "grad_norm": 2.029050827026367, |
| "learning_rate": 4.039631693001976e-06, |
| "loss": 0.4085, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.8209255533199196, |
| "grad_norm": 2.1008615493774414, |
| "learning_rate": 4.028138821391611e-06, |
| "loss": 0.4713, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.8229376257545271, |
| "grad_norm": 2.0045077800750732, |
| "learning_rate": 4.016651283697901e-06, |
| "loss": 0.4516, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.8249496981891347, |
| "grad_norm": 1.9948246479034424, |
| "learning_rate": 4.005169142968503e-06, |
| "loss": 0.4344, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.8269617706237424, |
| "grad_norm": 2.154141664505005, |
| "learning_rate": 3.99369246222145e-06, |
| "loss": 0.4837, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.8289738430583502, |
| "grad_norm": 2.0279178619384766, |
| "learning_rate": 3.982221304444813e-06, |
| "loss": 0.4489, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.8309859154929577, |
| "grad_norm": 2.0363872051239014, |
| "learning_rate": 3.970755732596349e-06, |
| "loss": 0.4596, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.8329979879275653, |
| "grad_norm": 2.062293767929077, |
| "learning_rate": 3.959295809603155e-06, |
| "loss": 0.4652, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.835010060362173, |
| "grad_norm": 1.976284384727478, |
| "learning_rate": 3.947841598361329e-06, |
| "loss": 0.4415, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.8370221327967808, |
| "grad_norm": 2.208650827407837, |
| "learning_rate": 3.936393161735616e-06, |
| "loss": 0.4848, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.8390342052313882, |
| "grad_norm": 2.186150550842285, |
| "learning_rate": 3.924950562559074e-06, |
| "loss": 0.4438, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.841046277665996, |
| "grad_norm": 2.255722999572754, |
| "learning_rate": 3.91351386363272e-06, |
| "loss": 0.452, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.8430583501006037, |
| "grad_norm": 2.2457187175750732, |
| "learning_rate": 3.902083127725186e-06, |
| "loss": 0.4987, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.8450704225352113, |
| "grad_norm": 2.1687653064727783, |
| "learning_rate": 3.890658417572379e-06, |
| "loss": 0.484, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.8470824949698188, |
| "grad_norm": 2.064213752746582, |
| "learning_rate": 3.879239795877139e-06, |
| "loss": 0.4827, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.8490945674044266, |
| "grad_norm": 2.2020626068115234, |
| "learning_rate": 3.867827325308882e-06, |
| "loss": 0.4244, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.8511066398390343, |
| "grad_norm": 2.0802299976348877, |
| "learning_rate": 3.8564210685032695e-06, |
| "loss": 0.4633, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.8531187122736419, |
| "grad_norm": 2.077509641647339, |
| "learning_rate": 3.845021088061858e-06, |
| "loss": 0.4269, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.8551307847082494, |
| "grad_norm": 2.064271926879883, |
| "learning_rate": 3.83362744655176e-06, |
| "loss": 0.4551, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 1.8204706907272339, |
| "learning_rate": 3.822240206505293e-06, |
| "loss": 0.4576, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.8591549295774648, |
| "grad_norm": 2.13497257232666, |
| "learning_rate": 3.810859430419646e-06, |
| "loss": 0.4544, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.8611670020120723, |
| "grad_norm": 2.3285865783691406, |
| "learning_rate": 3.799485180756526e-06, |
| "loss": 0.4876, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.86317907444668, |
| "grad_norm": 2.118795394897461, |
| "learning_rate": 3.788117519941825e-06, |
| "loss": 0.4532, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.8651911468812878, |
| "grad_norm": 2.0889439582824707, |
| "learning_rate": 3.776756510365275e-06, |
| "loss": 0.448, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.8672032193158954, |
| "grad_norm": 2.229954957962036, |
| "learning_rate": 3.765402214380095e-06, |
| "loss": 0.4849, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.869215291750503, |
| "grad_norm": 2.2357869148254395, |
| "learning_rate": 3.7540546943026677e-06, |
| "loss": 0.431, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.8712273641851107, |
| "grad_norm": 2.2264294624328613, |
| "learning_rate": 3.7427140124121774e-06, |
| "loss": 0.4364, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.8732394366197183, |
| "grad_norm": 1.928287148475647, |
| "learning_rate": 3.731380230950288e-06, |
| "loss": 0.4132, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.8752515090543258, |
| "grad_norm": 2.287692070007324, |
| "learning_rate": 3.720053412120784e-06, |
| "loss": 0.4247, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.8772635814889336, |
| "grad_norm": 2.039332151412964, |
| "learning_rate": 3.7087336180892395e-06, |
| "loss": 0.4352, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.8792756539235413, |
| "grad_norm": 2.0014684200286865, |
| "learning_rate": 3.6974209109826724e-06, |
| "loss": 0.4743, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.881287726358149, |
| "grad_norm": 2.0897486209869385, |
| "learning_rate": 3.686115352889209e-06, |
| "loss": 0.4952, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.8832997987927564, |
| "grad_norm": 2.2058608531951904, |
| "learning_rate": 3.674817005857735e-06, |
| "loss": 0.4366, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.8853118712273642, |
| "grad_norm": 2.0209622383117676, |
| "learning_rate": 3.663525931897559e-06, |
| "loss": 0.4689, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.887323943661972, |
| "grad_norm": 2.1546790599823, |
| "learning_rate": 3.6522421929780746e-06, |
| "loss": 0.4743, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.8893360160965795, |
| "grad_norm": 2.0966379642486572, |
| "learning_rate": 3.6409658510284208e-06, |
| "loss": 0.4299, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.891348088531187, |
| "grad_norm": 1.9026216268539429, |
| "learning_rate": 3.6296969679371325e-06, |
| "loss": 0.4367, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.8933601609657948, |
| "grad_norm": 1.863969326019287, |
| "learning_rate": 3.6184356055518143e-06, |
| "loss": 0.423, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.8953722334004024, |
| "grad_norm": 2.201620101928711, |
| "learning_rate": 3.6071818256787906e-06, |
| "loss": 0.4244, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.89738430583501, |
| "grad_norm": 2.143632650375366, |
| "learning_rate": 3.595935690082769e-06, |
| "loss": 0.4744, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.8993963782696177, |
| "grad_norm": 2.0650856494903564, |
| "learning_rate": 3.5846972604865103e-06, |
| "loss": 0.4696, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.9014084507042255, |
| "grad_norm": 2.024775743484497, |
| "learning_rate": 3.5734665985704732e-06, |
| "loss": 0.4614, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.903420523138833, |
| "grad_norm": 2.026299476623535, |
| "learning_rate": 3.56224376597249e-06, |
| "loss": 0.4446, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.9054325955734406, |
| "grad_norm": 1.9642820358276367, |
| "learning_rate": 3.551028824287418e-06, |
| "loss": 0.4592, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.9074446680080483, |
| "grad_norm": 2.067823648452759, |
| "learning_rate": 3.5398218350668136e-06, |
| "loss": 0.4665, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.909456740442656, |
| "grad_norm": 1.9692931175231934, |
| "learning_rate": 3.528622859818582e-06, |
| "loss": 0.4287, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.9114688128772634, |
| "grad_norm": 2.2274701595306396, |
| "learning_rate": 3.517431960006645e-06, |
| "loss": 0.4307, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.9134808853118712, |
| "grad_norm": 1.988458514213562, |
| "learning_rate": 3.506249197050604e-06, |
| "loss": 0.434, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.915492957746479, |
| "grad_norm": 2.008466958999634, |
| "learning_rate": 3.495074632325407e-06, |
| "loss": 0.4262, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.9175050301810865, |
| "grad_norm": 2.0033493041992188, |
| "learning_rate": 3.4839083271610007e-06, |
| "loss": 0.4252, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.919517102615694, |
| "grad_norm": 1.943579077720642, |
| "learning_rate": 3.472750342842003e-06, |
| "loss": 0.4361, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.9215291750503019, |
| "grad_norm": 2.111849069595337, |
| "learning_rate": 3.461600740607366e-06, |
| "loss": 0.46, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.9235412474849096, |
| "grad_norm": 2.095845937728882, |
| "learning_rate": 3.4504595816500318e-06, |
| "loss": 0.4766, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.925553319919517, |
| "grad_norm": 1.9449495077133179, |
| "learning_rate": 3.4393269271166117e-06, |
| "loss": 0.4518, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.9275653923541247, |
| "grad_norm": 2.125091552734375, |
| "learning_rate": 3.4282028381070366e-06, |
| "loss": 0.4408, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.9295774647887325, |
| "grad_norm": 2.1160356998443604, |
| "learning_rate": 3.4170873756742263e-06, |
| "loss": 0.4429, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.93158953722334, |
| "grad_norm": 1.942185640335083, |
| "learning_rate": 3.405980600823754e-06, |
| "loss": 0.4197, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.9336016096579476, |
| "grad_norm": 2.013988733291626, |
| "learning_rate": 3.3948825745135196e-06, |
| "loss": 0.4262, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.9356136820925554, |
| "grad_norm": 2.161773443222046, |
| "learning_rate": 3.383793357653398e-06, |
| "loss": 0.48, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.9376257545271631, |
| "grad_norm": 2.071415662765503, |
| "learning_rate": 3.372713011104922e-06, |
| "loss": 0.4106, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.9396378269617707, |
| "grad_norm": 2.229872465133667, |
| "learning_rate": 3.361641595680937e-06, |
| "loss": 0.4894, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.9416498993963782, |
| "grad_norm": 1.8888545036315918, |
| "learning_rate": 3.350579172145273e-06, |
| "loss": 0.4299, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.943661971830986, |
| "grad_norm": 1.9907033443450928, |
| "learning_rate": 3.3395258012124103e-06, |
| "loss": 0.4731, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.9456740442655935, |
| "grad_norm": 2.101872682571411, |
| "learning_rate": 3.3284815435471423e-06, |
| "loss": 0.4627, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.947686116700201, |
| "grad_norm": 1.9270325899124146, |
| "learning_rate": 3.3174464597642497e-06, |
| "loss": 0.4535, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.9496981891348089, |
| "grad_norm": 2.015502691268921, |
| "learning_rate": 3.306420610428157e-06, |
| "loss": 0.4303, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.9517102615694166, |
| "grad_norm": 2.1776444911956787, |
| "learning_rate": 3.295404056052616e-06, |
| "loss": 0.4968, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.9537223340040242, |
| "grad_norm": 2.200979232788086, |
| "learning_rate": 3.284396857100357e-06, |
| "loss": 0.4782, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.9557344064386317, |
| "grad_norm": 2.23836612701416, |
| "learning_rate": 3.273399073982768e-06, |
| "loss": 0.4682, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.9577464788732395, |
| "grad_norm": 2.088153839111328, |
| "learning_rate": 3.2624107670595567e-06, |
| "loss": 0.4557, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.959758551307847, |
| "grad_norm": 2.098024606704712, |
| "learning_rate": 3.251431996638427e-06, |
| "loss": 0.4578, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.9617706237424546, |
| "grad_norm": 1.946381688117981, |
| "learning_rate": 3.2404628229747386e-06, |
| "loss": 0.4263, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.9637826961770624, |
| "grad_norm": 2.1083738803863525, |
| "learning_rate": 3.2295033062711823e-06, |
| "loss": 0.4683, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.9657947686116701, |
| "grad_norm": 1.9927233457565308, |
| "learning_rate": 3.2185535066774477e-06, |
| "loss": 0.4478, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.9678068410462777, |
| "grad_norm": 2.067403793334961, |
| "learning_rate": 3.2076134842898955e-06, |
| "loss": 0.4404, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.9698189134808852, |
| "grad_norm": 2.1482181549072266, |
| "learning_rate": 3.1966832991512232e-06, |
| "loss": 0.4626, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.971830985915493, |
| "grad_norm": 2.094623327255249, |
| "learning_rate": 3.1857630112501397e-06, |
| "loss": 0.4116, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.9738430583501008, |
| "grad_norm": 2.0925683975219727, |
| "learning_rate": 3.174852680521032e-06, |
| "loss": 0.4319, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.975855130784708, |
| "grad_norm": 2.095231056213379, |
| "learning_rate": 3.16395236684364e-06, |
| "loss": 0.447, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.9778672032193159, |
| "grad_norm": 1.9157521724700928, |
| "learning_rate": 3.1530621300427294e-06, |
| "loss": 0.4593, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.9798792756539236, |
| "grad_norm": 2.173527956008911, |
| "learning_rate": 3.1421820298877554e-06, |
| "loss": 0.4772, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.9818913480885312, |
| "grad_norm": 2.125617742538452, |
| "learning_rate": 3.131312126092544e-06, |
| "loss": 0.439, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.9839034205231387, |
| "grad_norm": 2.0524990558624268, |
| "learning_rate": 3.1204524783149546e-06, |
| "loss": 0.4433, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.9859154929577465, |
| "grad_norm": 2.243478298187256, |
| "learning_rate": 3.1096031461565656e-06, |
| "loss": 0.4445, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.9879275653923543, |
| "grad_norm": 2.210712194442749, |
| "learning_rate": 3.098764189162332e-06, |
| "loss": 0.4589, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.9899396378269618, |
| "grad_norm": 2.2478127479553223, |
| "learning_rate": 3.087935666820273e-06, |
| "loss": 0.4519, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.9919517102615694, |
| "grad_norm": 2.1954751014709473, |
| "learning_rate": 3.0771176385611318e-06, |
| "loss": 0.4132, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.9939637826961771, |
| "grad_norm": 2.0039350986480713, |
| "learning_rate": 3.0663101637580626e-06, |
| "loss": 0.4582, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.9959758551307847, |
| "grad_norm": 1.9488987922668457, |
| "learning_rate": 3.055513301726296e-06, |
| "loss": 0.4492, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.9979879275653922, |
| "grad_norm": 1.9810996055603027, |
| "learning_rate": 3.044727111722815e-06, |
| "loss": 0.4541, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 2.0250370502471924, |
| "learning_rate": 3.03395165294603e-06, |
| "loss": 0.4507, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.0020120724346078, |
| "grad_norm": 1.8620500564575195, |
| "learning_rate": 3.02318698453546e-06, |
| "loss": 0.351, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.004024144869215, |
| "grad_norm": 1.8618104457855225, |
| "learning_rate": 3.0124331655713966e-06, |
| "loss": 0.3471, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.006036217303823, |
| "grad_norm": 1.7663768529891968, |
| "learning_rate": 3.0016902550745896e-06, |
| "loss": 0.3199, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.0080482897384306, |
| "grad_norm": 1.7676547765731812, |
| "learning_rate": 2.990958312005916e-06, |
| "loss": 0.3247, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.0100603621730384, |
| "grad_norm": 1.8144701719284058, |
| "learning_rate": 2.980237395266061e-06, |
| "loss": 0.3384, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.0120724346076457, |
| "grad_norm": 1.9220832586288452, |
| "learning_rate": 2.9695275636951983e-06, |
| "loss": 0.3346, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.0140845070422535, |
| "grad_norm": 1.9769055843353271, |
| "learning_rate": 2.958828876072654e-06, |
| "loss": 0.3497, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.0160965794768613, |
| "grad_norm": 1.8415639400482178, |
| "learning_rate": 2.9481413911165984e-06, |
| "loss": 0.3224, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.0181086519114686, |
| "grad_norm": 1.7870392799377441, |
| "learning_rate": 2.9374651674837128e-06, |
| "loss": 0.2993, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.0201207243460764, |
| "grad_norm": 1.8392082452774048, |
| "learning_rate": 2.9268002637688788e-06, |
| "loss": 0.2891, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.022132796780684, |
| "grad_norm": 2.053039073944092, |
| "learning_rate": 2.9161467385048425e-06, |
| "loss": 0.3221, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.024144869215292, |
| "grad_norm": 2.1062474250793457, |
| "learning_rate": 2.9055046501619088e-06, |
| "loss": 0.3138, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.0261569416498992, |
| "grad_norm": 2.1327919960021973, |
| "learning_rate": 2.894874057147606e-06, |
| "loss": 0.3319, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.028169014084507, |
| "grad_norm": 2.227560520172119, |
| "learning_rate": 2.8842550178063777e-06, |
| "loss": 0.3259, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.0301810865191148, |
| "grad_norm": 2.0540666580200195, |
| "learning_rate": 2.8736475904192516e-06, |
| "loss": 0.3203, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.0321931589537225, |
| "grad_norm": 2.228684663772583, |
| "learning_rate": 2.863051833203531e-06, |
| "loss": 0.3381, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.03420523138833, |
| "grad_norm": 2.0458016395568848, |
| "learning_rate": 2.852467804312463e-06, |
| "loss": 0.3369, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.0362173038229376, |
| "grad_norm": 2.0923287868499756, |
| "learning_rate": 2.841895561834927e-06, |
| "loss": 0.314, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.0382293762575454, |
| "grad_norm": 1.8492389917373657, |
| "learning_rate": 2.8313351637951196e-06, |
| "loss": 0.3044, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.0402414486921527, |
| "grad_norm": 1.9469666481018066, |
| "learning_rate": 2.8207866681522233e-06, |
| "loss": 0.3365, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.0422535211267605, |
| "grad_norm": 1.9745899438858032, |
| "learning_rate": 2.810250132800103e-06, |
| "loss": 0.3094, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.0442655935613683, |
| "grad_norm": 2.1182546615600586, |
| "learning_rate": 2.7997256155669737e-06, |
| "loss": 0.3388, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.046277665995976, |
| "grad_norm": 1.8506799936294556, |
| "learning_rate": 2.7892131742151007e-06, |
| "loss": 0.3286, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.0482897384305834, |
| "grad_norm": 1.8766345977783203, |
| "learning_rate": 2.778712866440464e-06, |
| "loss": 0.3427, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.050301810865191, |
| "grad_norm": 2.0145444869995117, |
| "learning_rate": 2.7682247498724536e-06, |
| "loss": 0.362, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.052313883299799, |
| "grad_norm": 1.8069449663162231, |
| "learning_rate": 2.7577488820735465e-06, |
| "loss": 0.3251, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.0543259557344062, |
| "grad_norm": 1.952471375465393, |
| "learning_rate": 2.7472853205389997e-06, |
| "loss": 0.3323, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.056338028169014, |
| "grad_norm": 1.8935775756835938, |
| "learning_rate": 2.736834122696529e-06, |
| "loss": 0.3165, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.058350100603622, |
| "grad_norm": 2.1522717475891113, |
| "learning_rate": 2.7263953459059888e-06, |
| "loss": 0.3245, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.0603621730382295, |
| "grad_norm": 1.9437185525894165, |
| "learning_rate": 2.715969047459066e-06, |
| "loss": 0.3144, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.062374245472837, |
| "grad_norm": 2.134711980819702, |
| "learning_rate": 2.705555284578958e-06, |
| "loss": 0.3343, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.0643863179074446, |
| "grad_norm": 1.7659924030303955, |
| "learning_rate": 2.6951541144200676e-06, |
| "loss": 0.3019, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.0663983903420524, |
| "grad_norm": 2.057668685913086, |
| "learning_rate": 2.6847655940676843e-06, |
| "loss": 0.3173, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.0684104627766597, |
| "grad_norm": 1.9060618877410889, |
| "learning_rate": 2.6743897805376672e-06, |
| "loss": 0.3124, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.0704225352112675, |
| "grad_norm": 1.781306505203247, |
| "learning_rate": 2.664026730776136e-06, |
| "loss": 0.2955, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.0724346076458753, |
| "grad_norm": 1.9370532035827637, |
| "learning_rate": 2.6536765016591626e-06, |
| "loss": 0.3154, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.074446680080483, |
| "grad_norm": 1.8034547567367554, |
| "learning_rate": 2.64333914999245e-06, |
| "loss": 0.313, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.0764587525150904, |
| "grad_norm": 1.9127081632614136, |
| "learning_rate": 2.63301473251103e-06, |
| "loss": 0.325, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.078470824949698, |
| "grad_norm": 1.9056042432785034, |
| "learning_rate": 2.622703305878941e-06, |
| "loss": 0.312, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.080482897384306, |
| "grad_norm": 1.922743797302246, |
| "learning_rate": 2.6124049266889296e-06, |
| "loss": 0.3124, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.0824949698189137, |
| "grad_norm": 1.9337576627731323, |
| "learning_rate": 2.6021196514621283e-06, |
| "loss": 0.3069, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.084507042253521, |
| "grad_norm": 2.0789332389831543, |
| "learning_rate": 2.5918475366477536e-06, |
| "loss": 0.3276, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.086519114688129, |
| "grad_norm": 1.9439971446990967, |
| "learning_rate": 2.5815886386227882e-06, |
| "loss": 0.3067, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.0885311871227366, |
| "grad_norm": 1.9567536115646362, |
| "learning_rate": 2.5713430136916828e-06, |
| "loss": 0.3264, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.090543259557344, |
| "grad_norm": 1.9910478591918945, |
| "learning_rate": 2.5611107180860395e-06, |
| "loss": 0.3422, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.0925553319919517, |
| "grad_norm": 1.9096462726593018, |
| "learning_rate": 2.5508918079643e-06, |
| "loss": 0.3087, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.0945674044265594, |
| "grad_norm": 1.9407165050506592, |
| "learning_rate": 2.540686339411446e-06, |
| "loss": 0.3083, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.096579476861167, |
| "grad_norm": 2.0691964626312256, |
| "learning_rate": 2.530494368438683e-06, |
| "loss": 0.32, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.0985915492957745, |
| "grad_norm": 1.9105881452560425, |
| "learning_rate": 2.520315950983141e-06, |
| "loss": 0.3293, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.1006036217303823, |
| "grad_norm": 2.024200916290283, |
| "learning_rate": 2.5101511429075654e-06, |
| "loss": 0.3245, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.10261569416499, |
| "grad_norm": 1.9861524105072021, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.3235, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.1046277665995974, |
| "grad_norm": 2.02134370803833, |
| "learning_rate": 2.489862577973498e-06, |
| "loss": 0.3244, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.106639839034205, |
| "grad_norm": 1.9421387910842896, |
| "learning_rate": 2.4797389324658037e-06, |
| "loss": 0.3215, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.108651911468813, |
| "grad_norm": 2.0010151863098145, |
| "learning_rate": 2.4696291190390494e-06, |
| "loss": 0.3157, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.1106639839034207, |
| "grad_norm": 1.8834630250930786, |
| "learning_rate": 2.459533193179457e-06, |
| "loss": 0.31, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.112676056338028, |
| "grad_norm": 1.9237369298934937, |
| "learning_rate": 2.4494512102970247e-06, |
| "loss": 0.3048, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.114688128772636, |
| "grad_norm": 1.9493533372879028, |
| "learning_rate": 2.4393832257252253e-06, |
| "loss": 0.3194, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.1167002012072436, |
| "grad_norm": 1.905882716178894, |
| "learning_rate": 2.42932929472071e-06, |
| "loss": 0.3211, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.118712273641851, |
| "grad_norm": 2.0549941062927246, |
| "learning_rate": 2.4192894724629943e-06, |
| "loss": 0.3362, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.1207243460764587, |
| "grad_norm": 2.161760091781616, |
| "learning_rate": 2.4092638140541586e-06, |
| "loss": 0.3293, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.1227364185110664, |
| "grad_norm": 1.8797554969787598, |
| "learning_rate": 2.399252374518551e-06, |
| "loss": 0.2915, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.124748490945674, |
| "grad_norm": 1.9617598056793213, |
| "learning_rate": 2.3892552088024827e-06, |
| "loss": 0.3111, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.1267605633802815, |
| "grad_norm": 1.9987127780914307, |
| "learning_rate": 2.3792723717739197e-06, |
| "loss": 0.319, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.1287726358148893, |
| "grad_norm": 1.9436331987380981, |
| "learning_rate": 2.3693039182221907e-06, |
| "loss": 0.3167, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.130784708249497, |
| "grad_norm": 1.9609178304672241, |
| "learning_rate": 2.3593499028576793e-06, |
| "loss": 0.3181, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.132796780684105, |
| "grad_norm": 1.8295328617095947, |
| "learning_rate": 2.349410380311532e-06, |
| "loss": 0.3098, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.134808853118712, |
| "grad_norm": 1.9270719289779663, |
| "learning_rate": 2.3394854051353534e-06, |
| "loss": 0.3213, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.13682092555332, |
| "grad_norm": 1.8924223184585571, |
| "learning_rate": 2.329575031800903e-06, |
| "loss": 0.3164, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.1388329979879277, |
| "grad_norm": 2.010369062423706, |
| "learning_rate": 2.319679314699801e-06, |
| "loss": 0.3217, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.140845070422535, |
| "grad_norm": 1.8954033851623535, |
| "learning_rate": 2.3097983081432334e-06, |
| "loss": 0.3241, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 2.061217784881592, |
| "learning_rate": 2.299932066361643e-06, |
| "loss": 0.3274, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.1448692152917506, |
| "grad_norm": 2.0788443088531494, |
| "learning_rate": 2.290080643504446e-06, |
| "loss": 0.3092, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.1468812877263583, |
| "grad_norm": 1.9684137105941772, |
| "learning_rate": 2.2802440936397203e-06, |
| "loss": 0.3227, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.1488933601609657, |
| "grad_norm": 1.9991480112075806, |
| "learning_rate": 2.2704224707539164e-06, |
| "loss": 0.3041, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.1509054325955734, |
| "grad_norm": 1.926109790802002, |
| "learning_rate": 2.2606158287515662e-06, |
| "loss": 0.3132, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.152917505030181, |
| "grad_norm": 1.8808412551879883, |
| "learning_rate": 2.250824221454976e-06, |
| "loss": 0.302, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.1549295774647885, |
| "grad_norm": 2.0052027702331543, |
| "learning_rate": 2.2410477026039335e-06, |
| "loss": 0.3344, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.1569416498993963, |
| "grad_norm": 1.9723577499389648, |
| "learning_rate": 2.2312863258554236e-06, |
| "loss": 0.3377, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.158953722334004, |
| "grad_norm": 1.9339603185653687, |
| "learning_rate": 2.221540144783323e-06, |
| "loss": 0.3195, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.160965794768612, |
| "grad_norm": 1.9883408546447754, |
| "learning_rate": 2.211809212878106e-06, |
| "loss": 0.3129, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.162977867203219, |
| "grad_norm": 1.8902078866958618, |
| "learning_rate": 2.2020935835465567e-06, |
| "loss": 0.31, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.164989939637827, |
| "grad_norm": 2.026864528656006, |
| "learning_rate": 2.1923933101114713e-06, |
| "loss": 0.3211, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.1670020120724347, |
| "grad_norm": 1.993359923362732, |
| "learning_rate": 2.182708445811371e-06, |
| "loss": 0.3295, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.169014084507042, |
| "grad_norm": 1.8539053201675415, |
| "learning_rate": 2.1730390438002056e-06, |
| "loss": 0.2943, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.17102615694165, |
| "grad_norm": 1.9406895637512207, |
| "learning_rate": 2.1633851571470595e-06, |
| "loss": 0.3138, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.1730382293762576, |
| "grad_norm": 1.890519142150879, |
| "learning_rate": 2.1537468388358645e-06, |
| "loss": 0.3266, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.1750503018108653, |
| "grad_norm": 1.8422409296035767, |
| "learning_rate": 2.1441241417651072e-06, |
| "loss": 0.3042, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.1770623742454727, |
| "grad_norm": 2.003852128982544, |
| "learning_rate": 2.134517118747541e-06, |
| "loss": 0.3173, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.1790744466800804, |
| "grad_norm": 1.9138869047164917, |
| "learning_rate": 2.1249258225098974e-06, |
| "loss": 0.3044, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.181086519114688, |
| "grad_norm": 2.0287420749664307, |
| "learning_rate": 2.1153503056925872e-06, |
| "loss": 0.3354, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.183098591549296, |
| "grad_norm": 1.9738901853561401, |
| "learning_rate": 2.1057906208494204e-06, |
| "loss": 0.3211, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.1851106639839033, |
| "grad_norm": 1.9624329805374146, |
| "learning_rate": 2.09624682044732e-06, |
| "loss": 0.3349, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.187122736418511, |
| "grad_norm": 2.0174593925476074, |
| "learning_rate": 2.086718956866024e-06, |
| "loss": 0.313, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.189134808853119, |
| "grad_norm": 2.0925774574279785, |
| "learning_rate": 2.0772070823978034e-06, |
| "loss": 0.3098, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.191146881287726, |
| "grad_norm": 1.9644815921783447, |
| "learning_rate": 2.06771124924718e-06, |
| "loss": 0.3177, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.193158953722334, |
| "grad_norm": 1.8980356454849243, |
| "learning_rate": 2.0582315095306343e-06, |
| "loss": 0.3178, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.1951710261569417, |
| "grad_norm": 1.9412862062454224, |
| "learning_rate": 2.0487679152763173e-06, |
| "loss": 0.3026, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.1971830985915495, |
| "grad_norm": 1.9344528913497925, |
| "learning_rate": 2.0393205184237687e-06, |
| "loss": 0.339, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.199195171026157, |
| "grad_norm": 1.8947501182556152, |
| "learning_rate": 2.0298893708236307e-06, |
| "loss": 0.3275, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.2012072434607646, |
| "grad_norm": 1.942131757736206, |
| "learning_rate": 2.0204745242373665e-06, |
| "loss": 0.3351, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.2032193158953723, |
| "grad_norm": 1.9295439720153809, |
| "learning_rate": 2.011076030336974e-06, |
| "loss": 0.3167, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.20523138832998, |
| "grad_norm": 2.072382926940918, |
| "learning_rate": 2.0016939407046987e-06, |
| "loss": 0.3532, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.2072434607645874, |
| "grad_norm": 2.0070250034332275, |
| "learning_rate": 1.992328306832755e-06, |
| "loss": 0.3542, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.209255533199195, |
| "grad_norm": 1.749671220779419, |
| "learning_rate": 1.9829791801230398e-06, |
| "loss": 0.2876, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.211267605633803, |
| "grad_norm": 1.9479471445083618, |
| "learning_rate": 1.9736466118868573e-06, |
| "loss": 0.342, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.2132796780684103, |
| "grad_norm": 1.8488900661468506, |
| "learning_rate": 1.9643306533446332e-06, |
| "loss": 0.3147, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.215291750503018, |
| "grad_norm": 2.0866432189941406, |
| "learning_rate": 1.9550313556256294e-06, |
| "loss": 0.3121, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.217303822937626, |
| "grad_norm": 1.9821211099624634, |
| "learning_rate": 1.945748769767667e-06, |
| "loss": 0.3254, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.219315895372233, |
| "grad_norm": 1.9831209182739258, |
| "learning_rate": 1.9364829467168522e-06, |
| "loss": 0.3243, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.221327967806841, |
| "grad_norm": 1.8965582847595215, |
| "learning_rate": 1.927233937327285e-06, |
| "loss": 0.319, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.2233400402414487, |
| "grad_norm": 1.8991901874542236, |
| "learning_rate": 1.9180017923607884e-06, |
| "loss": 0.3179, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.2253521126760565, |
| "grad_norm": 1.933734655380249, |
| "learning_rate": 1.9087865624866297e-06, |
| "loss": 0.3194, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.227364185110664, |
| "grad_norm": 1.9359025955200195, |
| "learning_rate": 1.8995882982812352e-06, |
| "loss": 0.3141, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.2293762575452716, |
| "grad_norm": 2.0395710468292236, |
| "learning_rate": 1.8904070502279242e-06, |
| "loss": 0.329, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.2313883299798793, |
| "grad_norm": 1.9852657318115234, |
| "learning_rate": 1.8812428687166195e-06, |
| "loss": 0.3519, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.233400402414487, |
| "grad_norm": 2.0795342922210693, |
| "learning_rate": 1.8720958040435772e-06, |
| "loss": 0.3267, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.2354124748490944, |
| "grad_norm": 2.028374433517456, |
| "learning_rate": 1.8629659064111138e-06, |
| "loss": 0.3324, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.237424547283702, |
| "grad_norm": 1.9261505603790283, |
| "learning_rate": 1.8538532259273272e-06, |
| "loss": 0.3209, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.23943661971831, |
| "grad_norm": 2.0454418659210205, |
| "learning_rate": 1.844757812605817e-06, |
| "loss": 0.2846, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.2414486921529173, |
| "grad_norm": 1.9863117933273315, |
| "learning_rate": 1.8356797163654172e-06, |
| "loss": 0.3121, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.243460764587525, |
| "grad_norm": 2.1988511085510254, |
| "learning_rate": 1.8266189870299184e-06, |
| "loss": 0.3031, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.245472837022133, |
| "grad_norm": 2.0179038047790527, |
| "learning_rate": 1.8175756743277967e-06, |
| "loss": 0.3246, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.2474849094567406, |
| "grad_norm": 1.9999439716339111, |
| "learning_rate": 1.8085498278919421e-06, |
| "loss": 0.3066, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.249496981891348, |
| "grad_norm": 1.915867567062378, |
| "learning_rate": 1.7995414972593784e-06, |
| "loss": 0.3303, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.2515090543259557, |
| "grad_norm": 2.091055154800415, |
| "learning_rate": 1.7905507318709997e-06, |
| "loss": 0.3478, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.2535211267605635, |
| "grad_norm": 1.8792387247085571, |
| "learning_rate": 1.7815775810712921e-06, |
| "loss": 0.3155, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.2555331991951713, |
| "grad_norm": 2.0227322578430176, |
| "learning_rate": 1.772622094108074e-06, |
| "loss": 0.3273, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.2575452716297786, |
| "grad_norm": 1.9454227685928345, |
| "learning_rate": 1.7636843201322106e-06, |
| "loss": 0.3376, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.2595573440643864, |
| "grad_norm": 1.826862096786499, |
| "learning_rate": 1.754764308197358e-06, |
| "loss": 0.3242, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.261569416498994, |
| "grad_norm": 1.9834349155426025, |
| "learning_rate": 1.7458621072596827e-06, |
| "loss": 0.318, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.2635814889336014, |
| "grad_norm": 1.9824222326278687, |
| "learning_rate": 1.7369777661776032e-06, |
| "loss": 0.3243, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.265593561368209, |
| "grad_norm": 1.8404629230499268, |
| "learning_rate": 1.728111333711514e-06, |
| "loss": 0.3094, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.267605633802817, |
| "grad_norm": 1.9138398170471191, |
| "learning_rate": 1.7192628585235188e-06, |
| "loss": 0.3125, |
| "step": 1127 |
| }, |
| { |
| "epoch": 2.2696177062374243, |
| "grad_norm": 1.917009949684143, |
| "learning_rate": 1.7104323891771697e-06, |
| "loss": 0.3205, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.271629778672032, |
| "grad_norm": 1.910467267036438, |
| "learning_rate": 1.7016199741371958e-06, |
| "loss": 0.3063, |
| "step": 1129 |
| }, |
| { |
| "epoch": 2.27364185110664, |
| "grad_norm": 2.0068464279174805, |
| "learning_rate": 1.6928256617692357e-06, |
| "loss": 0.3219, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.2756539235412476, |
| "grad_norm": 1.9971808195114136, |
| "learning_rate": 1.6840495003395741e-06, |
| "loss": 0.3223, |
| "step": 1131 |
| }, |
| { |
| "epoch": 2.277665995975855, |
| "grad_norm": 1.9968491792678833, |
| "learning_rate": 1.6752915380148772e-06, |
| "loss": 0.3146, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.2796780684104627, |
| "grad_norm": 1.8720755577087402, |
| "learning_rate": 1.6665518228619316e-06, |
| "loss": 0.3043, |
| "step": 1133 |
| }, |
| { |
| "epoch": 2.2816901408450705, |
| "grad_norm": 2.0129830837249756, |
| "learning_rate": 1.6578304028473703e-06, |
| "loss": 0.3109, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.2837022132796783, |
| "grad_norm": 1.853187084197998, |
| "learning_rate": 1.6491273258374241e-06, |
| "loss": 0.3086, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.2857142857142856, |
| "grad_norm": 1.9343267679214478, |
| "learning_rate": 1.6404426395976446e-06, |
| "loss": 0.3179, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.2877263581488934, |
| "grad_norm": 1.9186265468597412, |
| "learning_rate": 1.6317763917926494e-06, |
| "loss": 0.3252, |
| "step": 1137 |
| }, |
| { |
| "epoch": 2.289738430583501, |
| "grad_norm": 1.889265775680542, |
| "learning_rate": 1.6231286299858635e-06, |
| "loss": 0.3049, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.2917505030181085, |
| "grad_norm": 2.0032553672790527, |
| "learning_rate": 1.6144994016392484e-06, |
| "loss": 0.3246, |
| "step": 1139 |
| }, |
| { |
| "epoch": 2.2937625754527162, |
| "grad_norm": 1.9866799116134644, |
| "learning_rate": 1.6058887541130541e-06, |
| "loss": 0.3323, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.295774647887324, |
| "grad_norm": 2.0110700130462646, |
| "learning_rate": 1.5972967346655449e-06, |
| "loss": 0.3232, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.2977867203219318, |
| "grad_norm": 1.9062737226486206, |
| "learning_rate": 1.5887233904527548e-06, |
| "loss": 0.3115, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.299798792756539, |
| "grad_norm": 2.0070598125457764, |
| "learning_rate": 1.5801687685282169e-06, |
| "loss": 0.3148, |
| "step": 1143 |
| }, |
| { |
| "epoch": 2.301810865191147, |
| "grad_norm": 1.9191631078720093, |
| "learning_rate": 1.5716329158427097e-06, |
| "loss": 0.3244, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.3038229376257546, |
| "grad_norm": 1.8915290832519531, |
| "learning_rate": 1.5631158792440027e-06, |
| "loss": 0.3026, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.3058350100603624, |
| "grad_norm": 1.9323855638504028, |
| "learning_rate": 1.5546177054765954e-06, |
| "loss": 0.2986, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.3078470824949697, |
| "grad_norm": 1.9170725345611572, |
| "learning_rate": 1.546138441181459e-06, |
| "loss": 0.3278, |
| "step": 1147 |
| }, |
| { |
| "epoch": 2.3098591549295775, |
| "grad_norm": 1.9867016077041626, |
| "learning_rate": 1.5376781328957846e-06, |
| "loss": 0.3282, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.3118712273641853, |
| "grad_norm": 1.8710124492645264, |
| "learning_rate": 1.5292368270527259e-06, |
| "loss": 0.3168, |
| "step": 1149 |
| }, |
| { |
| "epoch": 2.3138832997987926, |
| "grad_norm": 1.8784232139587402, |
| "learning_rate": 1.5208145699811417e-06, |
| "loss": 0.3163, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.3158953722334004, |
| "grad_norm": 2.079653024673462, |
| "learning_rate": 1.5124114079053492e-06, |
| "loss": 0.3268, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.317907444668008, |
| "grad_norm": 1.9615051746368408, |
| "learning_rate": 1.5040273869448652e-06, |
| "loss": 0.3163, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.3199195171026155, |
| "grad_norm": 2.029083251953125, |
| "learning_rate": 1.4956625531141495e-06, |
| "loss": 0.3118, |
| "step": 1153 |
| }, |
| { |
| "epoch": 2.3219315895372232, |
| "grad_norm": 1.926914930343628, |
| "learning_rate": 1.4873169523223568e-06, |
| "loss": 0.3132, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.323943661971831, |
| "grad_norm": 2.0502724647521973, |
| "learning_rate": 1.4789906303730888e-06, |
| "loss": 0.338, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.3259557344064388, |
| "grad_norm": 2.0026066303253174, |
| "learning_rate": 1.470683632964131e-06, |
| "loss": 0.3235, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.327967806841046, |
| "grad_norm": 1.9444767236709595, |
| "learning_rate": 1.462396005687216e-06, |
| "loss": 0.3239, |
| "step": 1157 |
| }, |
| { |
| "epoch": 2.329979879275654, |
| "grad_norm": 1.9651098251342773, |
| "learning_rate": 1.4541277940277604e-06, |
| "loss": 0.3336, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.3319919517102616, |
| "grad_norm": 2.0174999237060547, |
| "learning_rate": 1.4458790433646264e-06, |
| "loss": 0.3276, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.3340040241448694, |
| "grad_norm": 1.976993203163147, |
| "learning_rate": 1.4376497989698635e-06, |
| "loss": 0.321, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.3360160965794767, |
| "grad_norm": 1.8561229705810547, |
| "learning_rate": 1.4294401060084634e-06, |
| "loss": 0.297, |
| "step": 1161 |
| }, |
| { |
| "epoch": 2.3380281690140845, |
| "grad_norm": 1.98836350440979, |
| "learning_rate": 1.4212500095381176e-06, |
| "loss": 0.3096, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.3400402414486923, |
| "grad_norm": 2.002023935317993, |
| "learning_rate": 1.4130795545089588e-06, |
| "loss": 0.3252, |
| "step": 1163 |
| }, |
| { |
| "epoch": 2.3420523138832996, |
| "grad_norm": 1.9276113510131836, |
| "learning_rate": 1.4049287857633264e-06, |
| "loss": 0.3416, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.3440643863179074, |
| "grad_norm": 1.998106598854065, |
| "learning_rate": 1.3967977480355106e-06, |
| "loss": 0.2949, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.346076458752515, |
| "grad_norm": 1.9151923656463623, |
| "learning_rate": 1.388686485951512e-06, |
| "loss": 0.3093, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.348088531187123, |
| "grad_norm": 1.9476468563079834, |
| "learning_rate": 1.3805950440287936e-06, |
| "loss": 0.3013, |
| "step": 1167 |
| }, |
| { |
| "epoch": 2.3501006036217302, |
| "grad_norm": 1.9365510940551758, |
| "learning_rate": 1.3725234666760428e-06, |
| "loss": 0.3257, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.352112676056338, |
| "grad_norm": 1.9782971143722534, |
| "learning_rate": 1.3644717981929213e-06, |
| "loss": 0.3212, |
| "step": 1169 |
| }, |
| { |
| "epoch": 2.3541247484909458, |
| "grad_norm": 1.8265795707702637, |
| "learning_rate": 1.356440082769822e-06, |
| "loss": 0.2976, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.3561368209255535, |
| "grad_norm": 1.9859201908111572, |
| "learning_rate": 1.3484283644876289e-06, |
| "loss": 0.3246, |
| "step": 1171 |
| }, |
| { |
| "epoch": 2.358148893360161, |
| "grad_norm": 1.8772772550582886, |
| "learning_rate": 1.3404366873174778e-06, |
| "loss": 0.3079, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.3601609657947686, |
| "grad_norm": 1.969179391860962, |
| "learning_rate": 1.3324650951205064e-06, |
| "loss": 0.3113, |
| "step": 1173 |
| }, |
| { |
| "epoch": 2.3621730382293764, |
| "grad_norm": 2.0032851696014404, |
| "learning_rate": 1.3245136316476253e-06, |
| "loss": 0.3253, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.3641851106639837, |
| "grad_norm": 1.8898330926895142, |
| "learning_rate": 1.3165823405392668e-06, |
| "loss": 0.3076, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.3661971830985915, |
| "grad_norm": 1.8925584554672241, |
| "learning_rate": 1.3086712653251504e-06, |
| "loss": 0.3034, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.3682092555331993, |
| "grad_norm": 1.8813642263412476, |
| "learning_rate": 1.3007804494240478e-06, |
| "loss": 0.3083, |
| "step": 1177 |
| }, |
| { |
| "epoch": 2.3702213279678066, |
| "grad_norm": 2.067413568496704, |
| "learning_rate": 1.2929099361435348e-06, |
| "loss": 0.3329, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.3722334004024144, |
| "grad_norm": 1.9133418798446655, |
| "learning_rate": 1.2850597686797644e-06, |
| "loss": 0.306, |
| "step": 1179 |
| }, |
| { |
| "epoch": 2.374245472837022, |
| "grad_norm": 2.065636157989502, |
| "learning_rate": 1.2772299901172198e-06, |
| "loss": 0.3425, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.37625754527163, |
| "grad_norm": 1.886622428894043, |
| "learning_rate": 1.2694206434284878e-06, |
| "loss": 0.3182, |
| "step": 1181 |
| }, |
| { |
| "epoch": 2.3782696177062372, |
| "grad_norm": 1.9201165437698364, |
| "learning_rate": 1.261631771474014e-06, |
| "loss": 0.317, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.380281690140845, |
| "grad_norm": 1.94356369972229, |
| "learning_rate": 1.2538634170018727e-06, |
| "loss": 0.3114, |
| "step": 1183 |
| }, |
| { |
| "epoch": 2.3822937625754528, |
| "grad_norm": 2.046518564224243, |
| "learning_rate": 1.246115622647529e-06, |
| "loss": 0.3231, |
| "step": 1184 |
| }, |
| { |
| "epoch": 2.3843058350100605, |
| "grad_norm": 1.8998048305511475, |
| "learning_rate": 1.2383884309336114e-06, |
| "loss": 0.3334, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.386317907444668, |
| "grad_norm": 2.0530076026916504, |
| "learning_rate": 1.2306818842696716e-06, |
| "loss": 0.3336, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.3883299798792756, |
| "grad_norm": 1.8843131065368652, |
| "learning_rate": 1.222996024951953e-06, |
| "loss": 0.324, |
| "step": 1187 |
| }, |
| { |
| "epoch": 2.3903420523138834, |
| "grad_norm": 2.0479204654693604, |
| "learning_rate": 1.21533089516316e-06, |
| "loss": 0.3317, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.3923541247484907, |
| "grad_norm": 1.8047560453414917, |
| "learning_rate": 1.2076865369722246e-06, |
| "loss": 0.2889, |
| "step": 1189 |
| }, |
| { |
| "epoch": 2.3943661971830985, |
| "grad_norm": 1.9316056966781616, |
| "learning_rate": 1.2000629923340801e-06, |
| "loss": 0.3171, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.3963782696177063, |
| "grad_norm": 1.7541041374206543, |
| "learning_rate": 1.1924603030894277e-06, |
| "loss": 0.2852, |
| "step": 1191 |
| }, |
| { |
| "epoch": 2.398390342052314, |
| "grad_norm": 2.0493853092193604, |
| "learning_rate": 1.184878510964504e-06, |
| "loss": 0.3269, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.4004024144869214, |
| "grad_norm": 1.8844995498657227, |
| "learning_rate": 1.1773176575708544e-06, |
| "loss": 0.335, |
| "step": 1193 |
| }, |
| { |
| "epoch": 2.402414486921529, |
| "grad_norm": 2.019207715988159, |
| "learning_rate": 1.1697777844051105e-06, |
| "loss": 0.3085, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.404426559356137, |
| "grad_norm": 1.9953314065933228, |
| "learning_rate": 1.1622589328487505e-06, |
| "loss": 0.3153, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.4064386317907447, |
| "grad_norm": 1.9246023893356323, |
| "learning_rate": 1.1547611441678836e-06, |
| "loss": 0.3251, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.408450704225352, |
| "grad_norm": 1.9564388990402222, |
| "learning_rate": 1.1472844595130145e-06, |
| "loss": 0.3349, |
| "step": 1197 |
| }, |
| { |
| "epoch": 2.41046277665996, |
| "grad_norm": 1.972939372062683, |
| "learning_rate": 1.1398289199188262e-06, |
| "loss": 0.298, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.4124748490945676, |
| "grad_norm": 1.9610313177108765, |
| "learning_rate": 1.132394566303946e-06, |
| "loss": 0.3106, |
| "step": 1199 |
| }, |
| { |
| "epoch": 2.414486921529175, |
| "grad_norm": 1.8735629320144653, |
| "learning_rate": 1.124981439470726e-06, |
| "loss": 0.3173, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.4164989939637826, |
| "grad_norm": 2.084070920944214, |
| "learning_rate": 1.1175895801050185e-06, |
| "loss": 0.3334, |
| "step": 1201 |
| }, |
| { |
| "epoch": 2.4185110663983904, |
| "grad_norm": 1.9063400030136108, |
| "learning_rate": 1.110219028775954e-06, |
| "loss": 0.3069, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.4205231388329977, |
| "grad_norm": 2.0351479053497314, |
| "learning_rate": 1.1028698259357162e-06, |
| "loss": 0.3225, |
| "step": 1203 |
| }, |
| { |
| "epoch": 2.4225352112676055, |
| "grad_norm": 2.048466682434082, |
| "learning_rate": 1.09554201191932e-06, |
| "loss": 0.3249, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.4245472837022133, |
| "grad_norm": 1.9268667697906494, |
| "learning_rate": 1.0882356269443912e-06, |
| "loss": 0.3009, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.426559356136821, |
| "grad_norm": 1.935354232788086, |
| "learning_rate": 1.080950711110943e-06, |
| "loss": 0.312, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.4285714285714284, |
| "grad_norm": 2.054149627685547, |
| "learning_rate": 1.0736873044011632e-06, |
| "loss": 0.3133, |
| "step": 1207 |
| }, |
| { |
| "epoch": 2.430583501006036, |
| "grad_norm": 1.9512964487075806, |
| "learning_rate": 1.066445446679189e-06, |
| "loss": 0.3184, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.432595573440644, |
| "grad_norm": 1.9467830657958984, |
| "learning_rate": 1.0592251776908857e-06, |
| "loss": 0.3043, |
| "step": 1209 |
| }, |
| { |
| "epoch": 2.4346076458752517, |
| "grad_norm": 1.9755560159683228, |
| "learning_rate": 1.052026537063634e-06, |
| "loss": 0.3321, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.436619718309859, |
| "grad_norm": 2.049520492553711, |
| "learning_rate": 1.044849564306112e-06, |
| "loss": 0.337, |
| "step": 1211 |
| }, |
| { |
| "epoch": 2.438631790744467, |
| "grad_norm": 2.0248889923095703, |
| "learning_rate": 1.037694298808074e-06, |
| "loss": 0.3206, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.4406438631790746, |
| "grad_norm": 1.9970359802246094, |
| "learning_rate": 1.03056077984014e-06, |
| "loss": 0.3217, |
| "step": 1213 |
| }, |
| { |
| "epoch": 2.442655935613682, |
| "grad_norm": 1.858817219734192, |
| "learning_rate": 1.023449046553575e-06, |
| "loss": 0.3107, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.4446680080482897, |
| "grad_norm": 1.7835618257522583, |
| "learning_rate": 1.0163591379800796e-06, |
| "loss": 0.3038, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.4466800804828974, |
| "grad_norm": 2.1031856536865234, |
| "learning_rate": 1.0092910930315698e-06, |
| "loss": 0.3268, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.448692152917505, |
| "grad_norm": 1.9893072843551636, |
| "learning_rate": 1.002244950499966e-06, |
| "loss": 0.3081, |
| "step": 1217 |
| }, |
| { |
| "epoch": 2.4507042253521125, |
| "grad_norm": 1.9361686706542969, |
| "learning_rate": 9.952207490569816e-07, |
| "loss": 0.3101, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.4527162977867203, |
| "grad_norm": 1.8221616744995117, |
| "learning_rate": 9.882185272539107e-07, |
| "loss": 0.3039, |
| "step": 1219 |
| }, |
| { |
| "epoch": 2.454728370221328, |
| "grad_norm": 1.901294469833374, |
| "learning_rate": 9.81238323521415e-07, |
| "loss": 0.3056, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.456740442655936, |
| "grad_norm": 1.874755620956421, |
| "learning_rate": 9.742801761693122e-07, |
| "loss": 0.2944, |
| "step": 1221 |
| }, |
| { |
| "epoch": 2.458752515090543, |
| "grad_norm": 1.930405855178833, |
| "learning_rate": 9.673441233863661e-07, |
| "loss": 0.3138, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.460764587525151, |
| "grad_norm": 1.8764375448226929, |
| "learning_rate": 9.604302032400787e-07, |
| "loss": 0.316, |
| "step": 1223 |
| }, |
| { |
| "epoch": 2.4627766599597587, |
| "grad_norm": 1.952162265777588, |
| "learning_rate": 9.535384536764807e-07, |
| "loss": 0.3254, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.464788732394366, |
| "grad_norm": 1.8980810642242432, |
| "learning_rate": 9.466689125199247e-07, |
| "loss": 0.3177, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.466800804828974, |
| "grad_norm": 1.9399605989456177, |
| "learning_rate": 9.39821617472872e-07, |
| "loss": 0.3204, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.4688128772635816, |
| "grad_norm": 1.874392032623291, |
| "learning_rate": 9.329966061156887e-07, |
| "loss": 0.3042, |
| "step": 1227 |
| }, |
| { |
| "epoch": 2.470824949698189, |
| "grad_norm": 2.0169479846954346, |
| "learning_rate": 9.261939159064465e-07, |
| "loss": 0.3132, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.4728370221327967, |
| "grad_norm": 1.8879144191741943, |
| "learning_rate": 9.194135841807028e-07, |
| "loss": 0.3095, |
| "step": 1229 |
| }, |
| { |
| "epoch": 2.4748490945674044, |
| "grad_norm": 1.959427833557129, |
| "learning_rate": 9.12655648151311e-07, |
| "loss": 0.3175, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.476861167002012, |
| "grad_norm": 1.9600000381469727, |
| "learning_rate": 9.059201449082045e-07, |
| "loss": 0.315, |
| "step": 1231 |
| }, |
| { |
| "epoch": 2.4788732394366195, |
| "grad_norm": 1.9241917133331299, |
| "learning_rate": 8.992071114181977e-07, |
| "loss": 0.3196, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.4808853118712273, |
| "grad_norm": 2.018326997756958, |
| "learning_rate": 8.925165845247858e-07, |
| "loss": 0.3501, |
| "step": 1233 |
| }, |
| { |
| "epoch": 2.482897384305835, |
| "grad_norm": 1.9944074153900146, |
| "learning_rate": 8.858486009479384e-07, |
| "loss": 0.3331, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.484909456740443, |
| "grad_norm": 1.9525293111801147, |
| "learning_rate": 8.792031972838966e-07, |
| "loss": 0.3029, |
| "step": 1235 |
| }, |
| { |
| "epoch": 2.48692152917505, |
| "grad_norm": 1.9734416007995605, |
| "learning_rate": 8.7258041000498e-07, |
| "loss": 0.3137, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.488933601609658, |
| "grad_norm": 2.0141916275024414, |
| "learning_rate": 8.659802754593805e-07, |
| "loss": 0.3307, |
| "step": 1237 |
| }, |
| { |
| "epoch": 2.4909456740442657, |
| "grad_norm": 1.8858426809310913, |
| "learning_rate": 8.594028298709605e-07, |
| "loss": 0.3147, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.492957746478873, |
| "grad_norm": 1.9975107908248901, |
| "learning_rate": 8.528481093390606e-07, |
| "loss": 0.3344, |
| "step": 1239 |
| }, |
| { |
| "epoch": 2.494969818913481, |
| "grad_norm": 1.8693078756332397, |
| "learning_rate": 8.463161498382949e-07, |
| "loss": 0.3246, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.4969818913480886, |
| "grad_norm": 1.9392619132995605, |
| "learning_rate": 8.398069872183607e-07, |
| "loss": 0.2963, |
| "step": 1241 |
| }, |
| { |
| "epoch": 2.4989939637826963, |
| "grad_norm": 2.0875697135925293, |
| "learning_rate": 8.333206572038377e-07, |
| "loss": 0.3388, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.5010060362173037, |
| "grad_norm": 1.840063214302063, |
| "learning_rate": 8.268571953939897e-07, |
| "loss": 0.3095, |
| "step": 1243 |
| }, |
| { |
| "epoch": 2.5030181086519114, |
| "grad_norm": 2.0050125122070312, |
| "learning_rate": 8.204166372625727e-07, |
| "loss": 0.3182, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.505030181086519, |
| "grad_norm": 1.988363265991211, |
| "learning_rate": 8.139990181576391e-07, |
| "loss": 0.3323, |
| "step": 1245 |
| }, |
| { |
| "epoch": 2.507042253521127, |
| "grad_norm": 1.9448785781860352, |
| "learning_rate": 8.07604373301345e-07, |
| "loss": 0.3304, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.5090543259557343, |
| "grad_norm": 1.9935352802276611, |
| "learning_rate": 8.012327377897561e-07, |
| "loss": 0.3036, |
| "step": 1247 |
| }, |
| { |
| "epoch": 2.511066398390342, |
| "grad_norm": 1.865787386894226, |
| "learning_rate": 7.948841465926533e-07, |
| "loss": 0.3221, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.51307847082495, |
| "grad_norm": 1.9329603910446167, |
| "learning_rate": 7.885586345533397e-07, |
| "loss": 0.2934, |
| "step": 1249 |
| }, |
| { |
| "epoch": 2.515090543259557, |
| "grad_norm": 1.9562501907348633, |
| "learning_rate": 7.822562363884584e-07, |
| "loss": 0.333, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.517102615694165, |
| "grad_norm": 2.2125632762908936, |
| "learning_rate": 7.759769866877892e-07, |
| "loss": 0.3409, |
| "step": 1251 |
| }, |
| { |
| "epoch": 2.5191146881287727, |
| "grad_norm": 1.965734601020813, |
| "learning_rate": 7.697209199140676e-07, |
| "loss": 0.2914, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.52112676056338, |
| "grad_norm": 1.9423799514770508, |
| "learning_rate": 7.634880704027936e-07, |
| "loss": 0.3089, |
| "step": 1253 |
| }, |
| { |
| "epoch": 2.523138832997988, |
| "grad_norm": 1.93148672580719, |
| "learning_rate": 7.572784723620424e-07, |
| "loss": 0.297, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.5251509054325956, |
| "grad_norm": 1.876718282699585, |
| "learning_rate": 7.510921598722765e-07, |
| "loss": 0.3018, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.5271629778672033, |
| "grad_norm": 1.8500628471374512, |
| "learning_rate": 7.449291668861575e-07, |
| "loss": 0.3041, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.529175050301811, |
| "grad_norm": 1.909126877784729, |
| "learning_rate": 7.387895272283635e-07, |
| "loss": 0.3115, |
| "step": 1257 |
| }, |
| { |
| "epoch": 2.5311871227364184, |
| "grad_norm": 1.8048720359802246, |
| "learning_rate": 7.326732745954001e-07, |
| "loss": 0.2716, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.533199195171026, |
| "grad_norm": 2.0273842811584473, |
| "learning_rate": 7.265804425554202e-07, |
| "loss": 0.3362, |
| "step": 1259 |
| }, |
| { |
| "epoch": 2.535211267605634, |
| "grad_norm": 1.8919765949249268, |
| "learning_rate": 7.205110645480307e-07, |
| "loss": 0.3133, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.5372233400402413, |
| "grad_norm": 1.9173896312713623, |
| "learning_rate": 7.144651738841174e-07, |
| "loss": 0.2987, |
| "step": 1261 |
| }, |
| { |
| "epoch": 2.539235412474849, |
| "grad_norm": 1.8675278425216675, |
| "learning_rate": 7.084428037456587e-07, |
| "loss": 0.3081, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.541247484909457, |
| "grad_norm": 2.023757219314575, |
| "learning_rate": 7.024439871855448e-07, |
| "loss": 0.3106, |
| "step": 1263 |
| }, |
| { |
| "epoch": 2.543259557344064, |
| "grad_norm": 1.903003215789795, |
| "learning_rate": 6.96468757127396e-07, |
| "loss": 0.3097, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.545271629778672, |
| "grad_norm": 2.010345458984375, |
| "learning_rate": 6.905171463653798e-07, |
| "loss": 0.3248, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.5472837022132797, |
| "grad_norm": 1.883948802947998, |
| "learning_rate": 6.845891875640331e-07, |
| "loss": 0.3153, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.5492957746478875, |
| "grad_norm": 1.9149082899093628, |
| "learning_rate": 6.786849132580841e-07, |
| "loss": 0.3049, |
| "step": 1267 |
| }, |
| { |
| "epoch": 2.551307847082495, |
| "grad_norm": 2.012042284011841, |
| "learning_rate": 6.728043558522706e-07, |
| "loss": 0.3291, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.5533199195171026, |
| "grad_norm": 1.913818359375, |
| "learning_rate": 6.669475476211628e-07, |
| "loss": 0.3248, |
| "step": 1269 |
| }, |
| { |
| "epoch": 2.5553319919517103, |
| "grad_norm": 1.8596763610839844, |
| "learning_rate": 6.611145207089897e-07, |
| "loss": 0.2996, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.557344064386318, |
| "grad_norm": 1.9084898233413696, |
| "learning_rate": 6.55305307129459e-07, |
| "loss": 0.2944, |
| "step": 1271 |
| }, |
| { |
| "epoch": 2.5593561368209254, |
| "grad_norm": 1.9268581867218018, |
| "learning_rate": 6.49519938765582e-07, |
| "loss": 0.3169, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.561368209255533, |
| "grad_norm": 2.001847505569458, |
| "learning_rate": 6.437584473694991e-07, |
| "loss": 0.3339, |
| "step": 1273 |
| }, |
| { |
| "epoch": 2.563380281690141, |
| "grad_norm": 2.047466993331909, |
| "learning_rate": 6.380208645623037e-07, |
| "loss": 0.3148, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.5653923541247483, |
| "grad_norm": 2.0780773162841797, |
| "learning_rate": 6.323072218338739e-07, |
| "loss": 0.3154, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.567404426559356, |
| "grad_norm": 1.877504825592041, |
| "learning_rate": 6.266175505426958e-07, |
| "loss": 0.306, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.569416498993964, |
| "grad_norm": 1.825515627861023, |
| "learning_rate": 6.209518819156895e-07, |
| "loss": 0.3002, |
| "step": 1277 |
| }, |
| { |
| "epoch": 2.571428571428571, |
| "grad_norm": 1.8743566274642944, |
| "learning_rate": 6.15310247048041e-07, |
| "loss": 0.3089, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.573440643863179, |
| "grad_norm": 1.9637446403503418, |
| "learning_rate": 6.096926769030298e-07, |
| "loss": 0.3118, |
| "step": 1279 |
| }, |
| { |
| "epoch": 2.5754527162977867, |
| "grad_norm": 2.0054385662078857, |
| "learning_rate": 6.040992023118624e-07, |
| "loss": 0.302, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.5774647887323945, |
| "grad_norm": 1.9758704900741577, |
| "learning_rate": 5.985298539734973e-07, |
| "loss": 0.3302, |
| "step": 1281 |
| }, |
| { |
| "epoch": 2.5794768611670023, |
| "grad_norm": 1.9158987998962402, |
| "learning_rate": 5.929846624544821e-07, |
| "loss": 0.3166, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.5814889336016096, |
| "grad_norm": 1.9502754211425781, |
| "learning_rate": 5.874636581887804e-07, |
| "loss": 0.3229, |
| "step": 1283 |
| }, |
| { |
| "epoch": 2.5835010060362174, |
| "grad_norm": 1.9610239267349243, |
| "learning_rate": 5.819668714776122e-07, |
| "loss": 0.3149, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.585513078470825, |
| "grad_norm": 1.9487340450286865, |
| "learning_rate": 5.76494332489278e-07, |
| "loss": 0.3048, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.5875251509054324, |
| "grad_norm": 1.97187340259552, |
| "learning_rate": 5.710460712589993e-07, |
| "loss": 0.3304, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.58953722334004, |
| "grad_norm": 1.9745339155197144, |
| "learning_rate": 5.656221176887572e-07, |
| "loss": 0.3121, |
| "step": 1287 |
| }, |
| { |
| "epoch": 2.591549295774648, |
| "grad_norm": 1.8480572700500488, |
| "learning_rate": 5.602225015471175e-07, |
| "loss": 0.3001, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.5935613682092553, |
| "grad_norm": 2.0272209644317627, |
| "learning_rate": 5.548472524690784e-07, |
| "loss": 0.3463, |
| "step": 1289 |
| }, |
| { |
| "epoch": 2.595573440643863, |
| "grad_norm": 2.1198689937591553, |
| "learning_rate": 5.494963999559011e-07, |
| "loss": 0.328, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.597585513078471, |
| "grad_norm": 1.9094234704971313, |
| "learning_rate": 5.441699733749479e-07, |
| "loss": 0.3346, |
| "step": 1291 |
| }, |
| { |
| "epoch": 2.5995975855130786, |
| "grad_norm": 1.8595271110534668, |
| "learning_rate": 5.388680019595266e-07, |
| "loss": 0.3198, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.6016096579476864, |
| "grad_norm": 1.8942431211471558, |
| "learning_rate": 5.335905148087256e-07, |
| "loss": 0.3185, |
| "step": 1293 |
| }, |
| { |
| "epoch": 2.6036217303822937, |
| "grad_norm": 1.82722008228302, |
| "learning_rate": 5.283375408872538e-07, |
| "loss": 0.2988, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.6056338028169015, |
| "grad_norm": 1.8183109760284424, |
| "learning_rate": 5.231091090252832e-07, |
| "loss": 0.3016, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.6076458752515093, |
| "grad_norm": 1.949315071105957, |
| "learning_rate": 5.179052479182889e-07, |
| "loss": 0.2915, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.6096579476861166, |
| "grad_norm": 1.9181135892868042, |
| "learning_rate": 5.127259861268974e-07, |
| "loss": 0.2865, |
| "step": 1297 |
| }, |
| { |
| "epoch": 2.6116700201207244, |
| "grad_norm": 1.8018304109573364, |
| "learning_rate": 5.075713520767201e-07, |
| "loss": 0.2975, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.613682092555332, |
| "grad_norm": 1.929329514503479, |
| "learning_rate": 5.024413740582074e-07, |
| "loss": 0.3304, |
| "step": 1299 |
| }, |
| { |
| "epoch": 2.6156941649899395, |
| "grad_norm": 1.8946964740753174, |
| "learning_rate": 4.973360802264859e-07, |
| "loss": 0.2947, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.6177062374245472, |
| "grad_norm": 1.8121057748794556, |
| "learning_rate": 4.922554986012068e-07, |
| "loss": 0.3002, |
| "step": 1301 |
| }, |
| { |
| "epoch": 2.619718309859155, |
| "grad_norm": 1.8261134624481201, |
| "learning_rate": 4.871996570663934e-07, |
| "loss": 0.274, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.6217303822937623, |
| "grad_norm": 1.8316187858581543, |
| "learning_rate": 4.82168583370285e-07, |
| "loss": 0.2878, |
| "step": 1303 |
| }, |
| { |
| "epoch": 2.62374245472837, |
| "grad_norm": 1.8134701251983643, |
| "learning_rate": 4.771623051251878e-07, |
| "loss": 0.2951, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.625754527162978, |
| "grad_norm": 1.8709946870803833, |
| "learning_rate": 4.721808498073205e-07, |
| "loss": 0.3039, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.6277665995975856, |
| "grad_norm": 2.038022518157959, |
| "learning_rate": 4.6722424475666715e-07, |
| "loss": 0.3066, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.6297786720321934, |
| "grad_norm": 1.8829350471496582, |
| "learning_rate": 4.622925171768211e-07, |
| "loss": 0.3043, |
| "step": 1307 |
| }, |
| { |
| "epoch": 2.6317907444668007, |
| "grad_norm": 1.8870759010314941, |
| "learning_rate": 4.57385694134842e-07, |
| "loss": 0.3039, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.6338028169014085, |
| "grad_norm": 2.1094441413879395, |
| "learning_rate": 4.5250380256110335e-07, |
| "loss": 0.3452, |
| "step": 1309 |
| }, |
| { |
| "epoch": 2.6358148893360163, |
| "grad_norm": 1.867954134941101, |
| "learning_rate": 4.476468692491487e-07, |
| "loss": 0.2878, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.6378269617706236, |
| "grad_norm": 1.9955129623413086, |
| "learning_rate": 4.428149208555388e-07, |
| "loss": 0.2993, |
| "step": 1311 |
| }, |
| { |
| "epoch": 2.6398390342052314, |
| "grad_norm": 1.785326361656189, |
| "learning_rate": 4.380079838997087e-07, |
| "loss": 0.3052, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.641851106639839, |
| "grad_norm": 1.8974698781967163, |
| "learning_rate": 4.3322608476382255e-07, |
| "loss": 0.2985, |
| "step": 1313 |
| }, |
| { |
| "epoch": 2.6438631790744465, |
| "grad_norm": 1.9238228797912598, |
| "learning_rate": 4.2846924969262736e-07, |
| "loss": 0.3157, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.6458752515090542, |
| "grad_norm": 2.059622287750244, |
| "learning_rate": 4.237375047933118e-07, |
| "loss": 0.3192, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.647887323943662, |
| "grad_norm": 1.8804223537445068, |
| "learning_rate": 4.190308760353595e-07, |
| "loss": 0.3101, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.6498993963782698, |
| "grad_norm": 1.9766371250152588, |
| "learning_rate": 4.1434938925040804e-07, |
| "loss": 0.3165, |
| "step": 1317 |
| }, |
| { |
| "epoch": 2.6519114688128775, |
| "grad_norm": 1.8625285625457764, |
| "learning_rate": 4.0969307013210445e-07, |
| "loss": 0.3115, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.653923541247485, |
| "grad_norm": 1.8935160636901855, |
| "learning_rate": 4.050619442359721e-07, |
| "loss": 0.2961, |
| "step": 1319 |
| }, |
| { |
| "epoch": 2.6559356136820926, |
| "grad_norm": 2.0412039756774902, |
| "learning_rate": 4.004560369792593e-07, |
| "loss": 0.3169, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.6579476861167004, |
| "grad_norm": 1.8851536512374878, |
| "learning_rate": 3.958753736408105e-07, |
| "loss": 0.3136, |
| "step": 1321 |
| }, |
| { |
| "epoch": 2.6599597585513077, |
| "grad_norm": 1.9968008995056152, |
| "learning_rate": 3.91319979360919e-07, |
| "loss": 0.302, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.6619718309859155, |
| "grad_norm": 1.869197964668274, |
| "learning_rate": 3.867898791411956e-07, |
| "loss": 0.3141, |
| "step": 1323 |
| }, |
| { |
| "epoch": 2.6639839034205233, |
| "grad_norm": 1.8277980089187622, |
| "learning_rate": 3.822850978444254e-07, |
| "loss": 0.3149, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.6659959758551306, |
| "grad_norm": 1.792640209197998, |
| "learning_rate": 3.778056601944358e-07, |
| "loss": 0.3035, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.6680080482897384, |
| "grad_norm": 1.92755126953125, |
| "learning_rate": 3.733515907759594e-07, |
| "loss": 0.3227, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.670020120724346, |
| "grad_norm": 1.9215903282165527, |
| "learning_rate": 3.6892291403449963e-07, |
| "loss": 0.3126, |
| "step": 1327 |
| }, |
| { |
| "epoch": 2.6720321931589535, |
| "grad_norm": 2.0002219676971436, |
| "learning_rate": 3.645196542761953e-07, |
| "loss": 0.3399, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.6740442655935612, |
| "grad_norm": 1.9246290922164917, |
| "learning_rate": 3.6014183566768725e-07, |
| "loss": 0.3389, |
| "step": 1329 |
| }, |
| { |
| "epoch": 2.676056338028169, |
| "grad_norm": 1.8840545415878296, |
| "learning_rate": 3.557894822359864e-07, |
| "loss": 0.3063, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.6780684104627768, |
| "grad_norm": 1.9435675144195557, |
| "learning_rate": 3.5146261786834225e-07, |
| "loss": 0.3238, |
| "step": 1331 |
| }, |
| { |
| "epoch": 2.6800804828973845, |
| "grad_norm": 1.9354828596115112, |
| "learning_rate": 3.471612663121121e-07, |
| "loss": 0.3235, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.682092555331992, |
| "grad_norm": 1.8880016803741455, |
| "learning_rate": 3.428854511746293e-07, |
| "loss": 0.3195, |
| "step": 1333 |
| }, |
| { |
| "epoch": 2.6841046277665996, |
| "grad_norm": 1.8282817602157593, |
| "learning_rate": 3.386351959230738e-07, |
| "loss": 0.3048, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.6861167002012074, |
| "grad_norm": 1.8626350164413452, |
| "learning_rate": 3.344105238843437e-07, |
| "loss": 0.3023, |
| "step": 1335 |
| }, |
| { |
| "epoch": 2.6881287726358147, |
| "grad_norm": 1.9268531799316406, |
| "learning_rate": 3.302114582449295e-07, |
| "loss": 0.3143, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.6901408450704225, |
| "grad_norm": 1.9087598323822021, |
| "learning_rate": 3.2603802205078195e-07, |
| "loss": 0.3051, |
| "step": 1337 |
| }, |
| { |
| "epoch": 2.6921529175050303, |
| "grad_norm": 1.9388471841812134, |
| "learning_rate": 3.2189023820719034e-07, |
| "loss": 0.294, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.6941649899396376, |
| "grad_norm": 1.8099417686462402, |
| "learning_rate": 3.177681294786539e-07, |
| "loss": 0.2976, |
| "step": 1339 |
| }, |
| { |
| "epoch": 2.6961770623742454, |
| "grad_norm": 2.025315761566162, |
| "learning_rate": 3.136717184887589e-07, |
| "loss": 0.301, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.698189134808853, |
| "grad_norm": 1.9770852327346802, |
| "learning_rate": 3.0960102772005174e-07, |
| "loss": 0.3199, |
| "step": 1341 |
| }, |
| { |
| "epoch": 2.700201207243461, |
| "grad_norm": 1.8802844285964966, |
| "learning_rate": 3.055560795139173e-07, |
| "loss": 0.3248, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.7022132796780687, |
| "grad_norm": 1.8507022857666016, |
| "learning_rate": 3.015368960704584e-07, |
| "loss": 0.2858, |
| "step": 1343 |
| }, |
| { |
| "epoch": 2.704225352112676, |
| "grad_norm": 2.052767753601074, |
| "learning_rate": 2.975434994483689e-07, |
| "loss": 0.3219, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.7062374245472838, |
| "grad_norm": 1.999880313873291, |
| "learning_rate": 2.9357591156481793e-07, |
| "loss": 0.3258, |
| "step": 1345 |
| }, |
| { |
| "epoch": 2.7082494969818915, |
| "grad_norm": 1.8796782493591309, |
| "learning_rate": 2.896341541953257e-07, |
| "loss": 0.3131, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.710261569416499, |
| "grad_norm": 1.8701180219650269, |
| "learning_rate": 2.85718248973646e-07, |
| "loss": 0.285, |
| "step": 1347 |
| }, |
| { |
| "epoch": 2.7122736418511066, |
| "grad_norm": 1.8917667865753174, |
| "learning_rate": 2.8182821739164534e-07, |
| "loss": 0.3163, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.7142857142857144, |
| "grad_norm": 1.933946132659912, |
| "learning_rate": 2.779640807991896e-07, |
| "loss": 0.3079, |
| "step": 1349 |
| }, |
| { |
| "epoch": 2.7162977867203217, |
| "grad_norm": 2.010690689086914, |
| "learning_rate": 2.74125860404022e-07, |
| "loss": 0.3163, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.7183098591549295, |
| "grad_norm": 1.8380995988845825, |
| "learning_rate": 2.7031357727164865e-07, |
| "loss": 0.3043, |
| "step": 1351 |
| }, |
| { |
| "epoch": 2.7203219315895373, |
| "grad_norm": 2.11596417427063, |
| "learning_rate": 2.665272523252216e-07, |
| "loss": 0.3343, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.7223340040241446, |
| "grad_norm": 2.0510504245758057, |
| "learning_rate": 2.627669063454291e-07, |
| "loss": 0.3328, |
| "step": 1353 |
| }, |
| { |
| "epoch": 2.7243460764587524, |
| "grad_norm": 1.8645933866500854, |
| "learning_rate": 2.5903255997037246e-07, |
| "loss": 0.3116, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.72635814889336, |
| "grad_norm": 1.8328444957733154, |
| "learning_rate": 2.553242336954631e-07, |
| "loss": 0.3054, |
| "step": 1355 |
| }, |
| { |
| "epoch": 2.728370221327968, |
| "grad_norm": 1.905352234840393, |
| "learning_rate": 2.516419478733012e-07, |
| "loss": 0.3156, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.7303822937625757, |
| "grad_norm": 1.8690829277038574, |
| "learning_rate": 2.479857227135685e-07, |
| "loss": 0.3138, |
| "step": 1357 |
| }, |
| { |
| "epoch": 2.732394366197183, |
| "grad_norm": 1.8393657207489014, |
| "learning_rate": 2.443555782829188e-07, |
| "loss": 0.3068, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.734406438631791, |
| "grad_norm": 1.9217677116394043, |
| "learning_rate": 2.407515345048622e-07, |
| "loss": 0.3097, |
| "step": 1359 |
| }, |
| { |
| "epoch": 2.7364185110663986, |
| "grad_norm": 2.0007879734039307, |
| "learning_rate": 2.3717361115966343e-07, |
| "loss": 0.2843, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.738430583501006, |
| "grad_norm": 1.9894534349441528, |
| "learning_rate": 2.3362182788422395e-07, |
| "loss": 0.3149, |
| "step": 1361 |
| }, |
| { |
| "epoch": 2.7404426559356136, |
| "grad_norm": 1.8293792009353638, |
| "learning_rate": 2.300962041719851e-07, |
| "loss": 0.2966, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.7424547283702214, |
| "grad_norm": 1.9846625328063965, |
| "learning_rate": 2.2659675937281078e-07, |
| "loss": 0.299, |
| "step": 1363 |
| }, |
| { |
| "epoch": 2.7444668008048287, |
| "grad_norm": 2.0236048698425293, |
| "learning_rate": 2.2312351269288712e-07, |
| "loss": 0.3109, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.7464788732394365, |
| "grad_norm": 1.8248317241668701, |
| "learning_rate": 2.1967648319461577e-07, |
| "loss": 0.3068, |
| "step": 1365 |
| }, |
| { |
| "epoch": 2.7484909456740443, |
| "grad_norm": 1.9960497617721558, |
| "learning_rate": 2.1625568979651012e-07, |
| "loss": 0.3365, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.750503018108652, |
| "grad_norm": 1.8922979831695557, |
| "learning_rate": 2.1286115127308992e-07, |
| "loss": 0.3029, |
| "step": 1367 |
| }, |
| { |
| "epoch": 2.75251509054326, |
| "grad_norm": 1.9048439264297485, |
| "learning_rate": 2.0949288625477903e-07, |
| "loss": 0.3167, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.754527162977867, |
| "grad_norm": 1.855001449584961, |
| "learning_rate": 2.061509132278028e-07, |
| "loss": 0.3012, |
| "step": 1369 |
| }, |
| { |
| "epoch": 2.756539235412475, |
| "grad_norm": 1.9068505764007568, |
| "learning_rate": 2.028352505340858e-07, |
| "loss": 0.328, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.7585513078470827, |
| "grad_norm": 2.014235734939575, |
| "learning_rate": 1.9954591637115495e-07, |
| "loss": 0.3387, |
| "step": 1371 |
| }, |
| { |
| "epoch": 2.76056338028169, |
| "grad_norm": 1.9344955682754517, |
| "learning_rate": 1.9628292879203482e-07, |
| "loss": 0.3176, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.762575452716298, |
| "grad_norm": 1.9466530084609985, |
| "learning_rate": 1.9304630570515182e-07, |
| "loss": 0.3062, |
| "step": 1373 |
| }, |
| { |
| "epoch": 2.7645875251509056, |
| "grad_norm": 1.8726989030838013, |
| "learning_rate": 1.8983606487423255e-07, |
| "loss": 0.3215, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.766599597585513, |
| "grad_norm": 2.013495445251465, |
| "learning_rate": 1.866522239182117e-07, |
| "loss": 0.3394, |
| "step": 1375 |
| }, |
| { |
| "epoch": 2.7686116700201207, |
| "grad_norm": 1.9069247245788574, |
| "learning_rate": 1.8349480031112977e-07, |
| "loss": 0.3259, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.7706237424547284, |
| "grad_norm": 1.7492789030075073, |
| "learning_rate": 1.8036381138204051e-07, |
| "loss": 0.2815, |
| "step": 1377 |
| }, |
| { |
| "epoch": 2.7726358148893357, |
| "grad_norm": 2.041621685028076, |
| "learning_rate": 1.7725927431491375e-07, |
| "loss": 0.3234, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.7746478873239435, |
| "grad_norm": 2.031970977783203, |
| "learning_rate": 1.7418120614854427e-07, |
| "loss": 0.3093, |
| "step": 1379 |
| }, |
| { |
| "epoch": 2.7766599597585513, |
| "grad_norm": 1.9473472833633423, |
| "learning_rate": 1.711296237764548e-07, |
| "loss": 0.3081, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.778672032193159, |
| "grad_norm": 2.1944499015808105, |
| "learning_rate": 1.6810454394680431e-07, |
| "loss": 0.3473, |
| "step": 1381 |
| }, |
| { |
| "epoch": 2.780684104627767, |
| "grad_norm": 1.859397292137146, |
| "learning_rate": 1.6510598326229645e-07, |
| "loss": 0.2997, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.782696177062374, |
| "grad_norm": 1.9514036178588867, |
| "learning_rate": 1.6213395818009016e-07, |
| "loss": 0.3157, |
| "step": 1383 |
| }, |
| { |
| "epoch": 2.784708249496982, |
| "grad_norm": 2.0373504161834717, |
| "learning_rate": 1.5918848501170647e-07, |
| "loss": 0.326, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.7867203219315897, |
| "grad_norm": 1.9372187852859497, |
| "learning_rate": 1.5626957992293966e-07, |
| "loss": 0.317, |
| "step": 1385 |
| }, |
| { |
| "epoch": 2.788732394366197, |
| "grad_norm": 1.8588268756866455, |
| "learning_rate": 1.5337725893376954e-07, |
| "loss": 0.2764, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.790744466800805, |
| "grad_norm": 1.865725040435791, |
| "learning_rate": 1.505115379182731e-07, |
| "loss": 0.3216, |
| "step": 1387 |
| }, |
| { |
| "epoch": 2.7927565392354126, |
| "grad_norm": 1.9945579767227173, |
| "learning_rate": 1.47672432604537e-07, |
| "loss": 0.3149, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.79476861167002, |
| "grad_norm": 1.7939214706420898, |
| "learning_rate": 1.4485995857457246e-07, |
| "loss": 0.2956, |
| "step": 1389 |
| }, |
| { |
| "epoch": 2.7967806841046277, |
| "grad_norm": 1.8982107639312744, |
| "learning_rate": 1.420741312642282e-07, |
| "loss": 0.3121, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.7987927565392354, |
| "grad_norm": 1.9184291362762451, |
| "learning_rate": 1.3931496596310545e-07, |
| "loss": 0.3006, |
| "step": 1391 |
| }, |
| { |
| "epoch": 2.800804828973843, |
| "grad_norm": 1.929681658744812, |
| "learning_rate": 1.3658247781447642e-07, |
| "loss": 0.308, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.802816901408451, |
| "grad_norm": 2.064938545227051, |
| "learning_rate": 1.338766818151982e-07, |
| "loss": 0.3255, |
| "step": 1393 |
| }, |
| { |
| "epoch": 2.8048289738430583, |
| "grad_norm": 1.8805738687515259, |
| "learning_rate": 1.3119759281563392e-07, |
| "loss": 0.2883, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.806841046277666, |
| "grad_norm": 2.030398368835449, |
| "learning_rate": 1.2854522551956738e-07, |
| "loss": 0.3209, |
| "step": 1395 |
| }, |
| { |
| "epoch": 2.808853118712274, |
| "grad_norm": 1.8628559112548828, |
| "learning_rate": 1.2591959448412628e-07, |
| "loss": 0.2927, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.810865191146881, |
| "grad_norm": 1.9574239253997803, |
| "learning_rate": 1.2332071411969792e-07, |
| "loss": 0.3308, |
| "step": 1397 |
| }, |
| { |
| "epoch": 2.812877263581489, |
| "grad_norm": 2.0328168869018555, |
| "learning_rate": 1.2074859868985377e-07, |
| "loss": 0.3143, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.8148893360160967, |
| "grad_norm": 1.9412139654159546, |
| "learning_rate": 1.1820326231126944e-07, |
| "loss": 0.3297, |
| "step": 1399 |
| }, |
| { |
| "epoch": 2.816901408450704, |
| "grad_norm": 1.9176777601242065, |
| "learning_rate": 1.1568471895364863e-07, |
| "loss": 0.3229, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.818913480885312, |
| "grad_norm": 1.95015549659729, |
| "learning_rate": 1.1319298243964549e-07, |
| "loss": 0.3056, |
| "step": 1401 |
| }, |
| { |
| "epoch": 2.8209255533199196, |
| "grad_norm": 1.9641227722167969, |
| "learning_rate": 1.107280664447874e-07, |
| "loss": 0.3415, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.822937625754527, |
| "grad_norm": 1.8761005401611328, |
| "learning_rate": 1.082899844974017e-07, |
| "loss": 0.3046, |
| "step": 1403 |
| }, |
| { |
| "epoch": 2.8249496981891347, |
| "grad_norm": 1.9387608766555786, |
| "learning_rate": 1.0587874997854186e-07, |
| "loss": 0.2972, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.8269617706237424, |
| "grad_norm": 2.00655460357666, |
| "learning_rate": 1.0349437612191259e-07, |
| "loss": 0.3216, |
| "step": 1405 |
| }, |
| { |
| "epoch": 2.82897384305835, |
| "grad_norm": 1.9893320798873901, |
| "learning_rate": 1.0113687601379818e-07, |
| "loss": 0.3208, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.830985915492958, |
| "grad_norm": 1.9536393880844116, |
| "learning_rate": 9.880626259298976e-08, |
| "loss": 0.2988, |
| "step": 1407 |
| }, |
| { |
| "epoch": 2.8329979879275653, |
| "grad_norm": 1.9395415782928467, |
| "learning_rate": 9.650254865071428e-08, |
| "loss": 0.3084, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.835010060362173, |
| "grad_norm": 1.9450585842132568, |
| "learning_rate": 9.422574683056795e-08, |
| "loss": 0.3429, |
| "step": 1409 |
| }, |
| { |
| "epoch": 2.837022132796781, |
| "grad_norm": 1.9292974472045898, |
| "learning_rate": 9.197586962843952e-08, |
| "loss": 0.3018, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.839034205231388, |
| "grad_norm": 1.8370075225830078, |
| "learning_rate": 8.975292939244928e-08, |
| "loss": 0.308, |
| "step": 1411 |
| }, |
| { |
| "epoch": 2.841046277665996, |
| "grad_norm": 2.0159215927124023, |
| "learning_rate": 8.755693832287581e-08, |
| "loss": 0.3139, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.8430583501006037, |
| "grad_norm": 1.8302010297775269, |
| "learning_rate": 8.538790847209211e-08, |
| "loss": 0.3057, |
| "step": 1413 |
| }, |
| { |
| "epoch": 2.845070422535211, |
| "grad_norm": 1.9254567623138428, |
| "learning_rate": 8.324585174449895e-08, |
| "loss": 0.3305, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.847082494969819, |
| "grad_norm": 1.7892004251480103, |
| "learning_rate": 8.11307798964589e-08, |
| "loss": 0.2847, |
| "step": 1415 |
| }, |
| { |
| "epoch": 2.8490945674044266, |
| "grad_norm": 1.957893967628479, |
| "learning_rate": 7.90427045362302e-08, |
| "loss": 0.308, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.8511066398390343, |
| "grad_norm": 1.836108684539795, |
| "learning_rate": 7.698163712390683e-08, |
| "loss": 0.3072, |
| "step": 1417 |
| }, |
| { |
| "epoch": 2.853118712273642, |
| "grad_norm": 1.8814616203308105, |
| "learning_rate": 7.494758897135412e-08, |
| "loss": 0.2938, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.8551307847082494, |
| "grad_norm": 2.026705265045166, |
| "learning_rate": 7.294057124214438e-08, |
| "loss": 0.3311, |
| "step": 1419 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 1.9151105880737305, |
| "learning_rate": 7.096059495149855e-08, |
| "loss": 0.3127, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.859154929577465, |
| "grad_norm": 1.9340089559555054, |
| "learning_rate": 6.900767096622352e-08, |
| "loss": 0.3123, |
| "step": 1421 |
| }, |
| { |
| "epoch": 2.8611670020120723, |
| "grad_norm": 1.9273947477340698, |
| "learning_rate": 6.708181000465552e-08, |
| "loss": 0.3069, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.86317907444668, |
| "grad_norm": 1.8585259914398193, |
| "learning_rate": 6.518302263659737e-08, |
| "loss": 0.3073, |
| "step": 1423 |
| }, |
| { |
| "epoch": 2.865191146881288, |
| "grad_norm": 1.9229656457901, |
| "learning_rate": 6.331131928326407e-08, |
| "loss": 0.3171, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.867203219315895, |
| "grad_norm": 1.8940155506134033, |
| "learning_rate": 6.146671021722284e-08, |
| "loss": 0.3228, |
| "step": 1425 |
| }, |
| { |
| "epoch": 2.869215291750503, |
| "grad_norm": 1.772291660308838, |
| "learning_rate": 5.964920556233767e-08, |
| "loss": 0.2821, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.8712273641851107, |
| "grad_norm": 1.874535083770752, |
| "learning_rate": 5.785881529371595e-08, |
| "loss": 0.2926, |
| "step": 1427 |
| }, |
| { |
| "epoch": 2.873239436619718, |
| "grad_norm": 2.0020828247070312, |
| "learning_rate": 5.609554923764915e-08, |
| "loss": 0.3234, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.875251509054326, |
| "grad_norm": 2.036130666732788, |
| "learning_rate": 5.435941707156389e-08, |
| "loss": 0.3074, |
| "step": 1429 |
| }, |
| { |
| "epoch": 2.8772635814889336, |
| "grad_norm": 1.8147860765457153, |
| "learning_rate": 5.265042832396428e-08, |
| "loss": 0.2758, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.8792756539235413, |
| "grad_norm": 2.0033209323883057, |
| "learning_rate": 5.0968592374384116e-08, |
| "loss": 0.3158, |
| "step": 1431 |
| }, |
| { |
| "epoch": 2.881287726358149, |
| "grad_norm": 2.0302250385284424, |
| "learning_rate": 4.931391845333089e-08, |
| "loss": 0.3167, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.8832997987927564, |
| "grad_norm": 1.8428484201431274, |
| "learning_rate": 4.768641564223852e-08, |
| "loss": 0.3046, |
| "step": 1433 |
| }, |
| { |
| "epoch": 2.885311871227364, |
| "grad_norm": 2.1346139907836914, |
| "learning_rate": 4.608609287341581e-08, |
| "loss": 0.3182, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.887323943661972, |
| "grad_norm": 1.9685754776000977, |
| "learning_rate": 4.451295892999863e-08, |
| "loss": 0.3058, |
| "step": 1435 |
| }, |
| { |
| "epoch": 2.8893360160965793, |
| "grad_norm": 1.896419882774353, |
| "learning_rate": 4.296702244590056e-08, |
| "loss": 0.2972, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.891348088531187, |
| "grad_norm": 1.8687481880187988, |
| "learning_rate": 4.144829190576516e-08, |
| "loss": 0.3066, |
| "step": 1437 |
| }, |
| { |
| "epoch": 2.893360160965795, |
| "grad_norm": 1.9098145961761475, |
| "learning_rate": 3.99567756449204e-08, |
| "loss": 0.3276, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.895372233400402, |
| "grad_norm": 1.8980722427368164, |
| "learning_rate": 3.84924818493343e-08, |
| "loss": 0.2977, |
| "step": 1439 |
| }, |
| { |
| "epoch": 2.89738430583501, |
| "grad_norm": 1.9447804689407349, |
| "learning_rate": 3.705541855556716e-08, |
| "loss": 0.31, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.8993963782696177, |
| "grad_norm": 1.932647943496704, |
| "learning_rate": 3.5645593650728284e-08, |
| "loss": 0.3065, |
| "step": 1441 |
| }, |
| { |
| "epoch": 2.9014084507042255, |
| "grad_norm": 1.9486619234085083, |
| "learning_rate": 3.426301487243433e-08, |
| "loss": 0.3083, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.9034205231388333, |
| "grad_norm": 2.015017032623291, |
| "learning_rate": 3.290768980876324e-08, |
| "loss": 0.3284, |
| "step": 1443 |
| }, |
| { |
| "epoch": 2.9054325955734406, |
| "grad_norm": 1.9345711469650269, |
| "learning_rate": 3.157962589821872e-08, |
| "loss": 0.3237, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.9074446680080483, |
| "grad_norm": 2.0234079360961914, |
| "learning_rate": 3.027883042968249e-08, |
| "loss": 0.3112, |
| "step": 1445 |
| }, |
| { |
| "epoch": 2.909456740442656, |
| "grad_norm": 1.8842613697052002, |
| "learning_rate": 2.9005310542378205e-08, |
| "loss": 0.3024, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.9114688128772634, |
| "grad_norm": 1.8669836521148682, |
| "learning_rate": 2.77590732258326e-08, |
| "loss": 0.3031, |
| "step": 1447 |
| }, |
| { |
| "epoch": 2.913480885311871, |
| "grad_norm": 2.0102365016937256, |
| "learning_rate": 2.6540125319834964e-08, |
| "loss": 0.3102, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.915492957746479, |
| "grad_norm": 1.9279073476791382, |
| "learning_rate": 2.5348473514400507e-08, |
| "loss": 0.2971, |
| "step": 1449 |
| }, |
| { |
| "epoch": 2.9175050301810863, |
| "grad_norm": 1.921225666999817, |
| "learning_rate": 2.4184124349734828e-08, |
| "loss": 0.3094, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.919517102615694, |
| "grad_norm": 1.9262641668319702, |
| "learning_rate": 2.3047084216196724e-08, |
| "loss": 0.3069, |
| "step": 1451 |
| }, |
| { |
| "epoch": 2.921529175050302, |
| "grad_norm": 1.8994249105453491, |
| "learning_rate": 2.1937359354262665e-08, |
| "loss": 0.3096, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.9235412474849096, |
| "grad_norm": 2.079052686691284, |
| "learning_rate": 2.085495585449404e-08, |
| "loss": 0.3253, |
| "step": 1453 |
| }, |
| { |
| "epoch": 2.925553319919517, |
| "grad_norm": 1.9570258855819702, |
| "learning_rate": 1.979987965750274e-08, |
| "loss": 0.3455, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.9275653923541247, |
| "grad_norm": 2.058948040008545, |
| "learning_rate": 1.8772136553918408e-08, |
| "loss": 0.331, |
| "step": 1455 |
| }, |
| { |
| "epoch": 2.9295774647887325, |
| "grad_norm": 2.0447700023651123, |
| "learning_rate": 1.7771732184357905e-08, |
| "loss": 0.3452, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.9315895372233403, |
| "grad_norm": 2.081425428390503, |
| "learning_rate": 1.679867203939256e-08, |
| "loss": 0.3349, |
| "step": 1457 |
| }, |
| { |
| "epoch": 2.9336016096579476, |
| "grad_norm": 1.8499003648757935, |
| "learning_rate": 1.5852961459519868e-08, |
| "loss": 0.2985, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.9356136820925554, |
| "grad_norm": 1.9242502450942993, |
| "learning_rate": 1.4934605635132383e-08, |
| "loss": 0.3154, |
| "step": 1459 |
| }, |
| { |
| "epoch": 2.937625754527163, |
| "grad_norm": 1.93385648727417, |
| "learning_rate": 1.4043609606489983e-08, |
| "loss": 0.299, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.9396378269617705, |
| "grad_norm": 1.9221806526184082, |
| "learning_rate": 1.3179978263694326e-08, |
| "loss": 0.3123, |
| "step": 1461 |
| }, |
| { |
| "epoch": 2.941649899396378, |
| "grad_norm": 2.0381548404693604, |
| "learning_rate": 1.2343716346657209e-08, |
| "loss": 0.3252, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.943661971830986, |
| "grad_norm": 1.926993489265442, |
| "learning_rate": 1.1534828445080027e-08, |
| "loss": 0.3015, |
| "step": 1463 |
| }, |
| { |
| "epoch": 2.9456740442655933, |
| "grad_norm": 1.840437412261963, |
| "learning_rate": 1.0753318998423246e-08, |
| "loss": 0.3145, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.947686116700201, |
| "grad_norm": 1.8412384986877441, |
| "learning_rate": 9.999192295886973e-09, |
| "loss": 0.3011, |
| "step": 1465 |
| }, |
| { |
| "epoch": 2.949698189134809, |
| "grad_norm": 1.8052462339401245, |
| "learning_rate": 9.272452476384308e-09, |
| "loss": 0.282, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.9517102615694166, |
| "grad_norm": 1.972144603729248, |
| "learning_rate": 8.5731035285197e-09, |
| "loss": 0.3058, |
| "step": 1467 |
| }, |
| { |
| "epoch": 2.9537223340040244, |
| "grad_norm": 1.8558961153030396, |
| "learning_rate": 7.90114929056618e-09, |
| "loss": 0.3157, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.9557344064386317, |
| "grad_norm": 2.027374029159546, |
| "learning_rate": 7.256593450444827e-09, |
| "loss": 0.3317, |
| "step": 1469 |
| }, |
| { |
| "epoch": 2.9577464788732395, |
| "grad_norm": 1.951428771018982, |
| "learning_rate": 6.639439545707005e-09, |
| "loss": 0.3011, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.9597585513078473, |
| "grad_norm": 1.7829604148864746, |
| "learning_rate": 6.04969096350938e-09, |
| "loss": 0.2914, |
| "step": 1471 |
| }, |
| { |
| "epoch": 2.9617706237424546, |
| "grad_norm": 1.9716224670410156, |
| "learning_rate": 5.487350940600044e-09, |
| "loss": 0.3362, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.9637826961770624, |
| "grad_norm": 2.0185909271240234, |
| "learning_rate": 4.952422563300197e-09, |
| "loss": 0.2978, |
| "step": 1473 |
| }, |
| { |
| "epoch": 2.96579476861167, |
| "grad_norm": 1.8780219554901123, |
| "learning_rate": 4.444908767484712e-09, |
| "loss": 0.2806, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.9678068410462775, |
| "grad_norm": 1.9690665006637573, |
| "learning_rate": 3.964812338567714e-09, |
| "loss": 0.3135, |
| "step": 1475 |
| }, |
| { |
| "epoch": 2.9698189134808852, |
| "grad_norm": 1.9470399618148804, |
| "learning_rate": 3.5121359114886898e-09, |
| "loss": 0.318, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.971830985915493, |
| "grad_norm": 1.9868518114089966, |
| "learning_rate": 3.0868819706947327e-09, |
| "loss": 0.3146, |
| "step": 1477 |
| }, |
| { |
| "epoch": 2.9738430583501008, |
| "grad_norm": 1.8380684852600098, |
| "learning_rate": 2.6890528501288814e-09, |
| "loss": 0.2889, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.975855130784708, |
| "grad_norm": 1.9578129053115845, |
| "learning_rate": 2.3186507332184636e-09, |
| "loss": 0.3003, |
| "step": 1479 |
| }, |
| { |
| "epoch": 2.977867203219316, |
| "grad_norm": 1.9266585111618042, |
| "learning_rate": 1.9756776528601085e-09, |
| "loss": 0.324, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.9798792756539236, |
| "grad_norm": 1.8439252376556396, |
| "learning_rate": 1.660135491411974e-09, |
| "loss": 0.3137, |
| "step": 1481 |
| }, |
| { |
| "epoch": 2.9818913480885314, |
| "grad_norm": 1.9398472309112549, |
| "learning_rate": 1.3720259806793146e-09, |
| "loss": 0.2902, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.9839034205231387, |
| "grad_norm": 1.9513535499572754, |
| "learning_rate": 1.111350701909486e-09, |
| "loss": 0.3255, |
| "step": 1483 |
| }, |
| { |
| "epoch": 2.9859154929577465, |
| "grad_norm": 1.8612955808639526, |
| "learning_rate": 8.781110857802866e-10, |
| "loss": 0.3181, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.9879275653923543, |
| "grad_norm": 1.9021211862564087, |
| "learning_rate": 6.723084123921864e-10, |
| "loss": 0.297, |
| "step": 1485 |
| }, |
| { |
| "epoch": 2.9899396378269616, |
| "grad_norm": 1.824216604232788, |
| "learning_rate": 4.939438112638861e-10, |
| "loss": 0.3023, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.9919517102615694, |
| "grad_norm": 1.8582955598831177, |
| "learning_rate": 3.430182613223254e-10, |
| "loss": 0.3129, |
| "step": 1487 |
| }, |
| { |
| "epoch": 2.993963782696177, |
| "grad_norm": 1.8397884368896484, |
| "learning_rate": 2.1953259090101708e-10, |
| "loss": 0.3229, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.9959758551307845, |
| "grad_norm": 1.9026719331741333, |
| "learning_rate": 1.2348747773172075e-10, |
| "loss": 0.2996, |
| "step": 1489 |
| }, |
| { |
| "epoch": 2.9979879275653922, |
| "grad_norm": 1.9118090867996216, |
| "learning_rate": 5.488344894444275e-11, |
| "loss": 0.3106, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.7826815843582153, |
| "learning_rate": 1.3720881062440073e-11, |
| "loss": 0.2744, |
| "step": 1491 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1491, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5071875383559193e+19, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|