| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.25588536335721596, | |
| "eval_steps": 250, | |
| "global_step": 250, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010235414534288639, | |
| "grad_norm": 2.0027730464935303, | |
| "learning_rate": 0.0, | |
| "loss": 0.7807, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0020470829068577278, | |
| "grad_norm": 2.1167027950286865, | |
| "learning_rate": 2e-09, | |
| "loss": 0.7864, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0030706243602865915, | |
| "grad_norm": 2.759631395339966, | |
| "learning_rate": 4e-09, | |
| "loss": 0.7935, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0040941658137154556, | |
| "grad_norm": 1.614827036857605, | |
| "learning_rate": 6e-09, | |
| "loss": 0.7812, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00511770726714432, | |
| "grad_norm": 2.2924065589904785, | |
| "learning_rate": 8e-09, | |
| "loss": 0.7841, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.006141248720573183, | |
| "grad_norm": 2.6293888092041016, | |
| "learning_rate": 1e-08, | |
| "loss": 0.7944, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.007164790174002047, | |
| "grad_norm": 1.7844728231430054, | |
| "learning_rate": 1.2e-08, | |
| "loss": 0.7793, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.008188331627430911, | |
| "grad_norm": 1.4052098989486694, | |
| "learning_rate": 1.4000000000000001e-08, | |
| "loss": 0.7671, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.009211873080859774, | |
| "grad_norm": 2.682335376739502, | |
| "learning_rate": 1.6e-08, | |
| "loss": 0.795, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.01023541453428864, | |
| "grad_norm": 2.45478892326355, | |
| "learning_rate": 1.8000000000000002e-08, | |
| "loss": 0.7918, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.011258955987717503, | |
| "grad_norm": 2.523688316345215, | |
| "learning_rate": 2e-08, | |
| "loss": 0.799, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.012282497441146366, | |
| "grad_norm": 2.3234951496124268, | |
| "learning_rate": 2.2000000000000002e-08, | |
| "loss": 0.7835, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.01330603889457523, | |
| "grad_norm": 2.2566919326782227, | |
| "learning_rate": 2.4e-08, | |
| "loss": 0.7926, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.014329580348004094, | |
| "grad_norm": 2.3038065433502197, | |
| "learning_rate": 2.6e-08, | |
| "loss": 0.7921, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.015353121801432957, | |
| "grad_norm": 2.107079267501831, | |
| "learning_rate": 2.8000000000000003e-08, | |
| "loss": 0.7864, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.016376663254861822, | |
| "grad_norm": 2.129422426223755, | |
| "learning_rate": 3.0000000000000004e-08, | |
| "loss": 0.7756, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.017400204708290685, | |
| "grad_norm": 1.5430221557617188, | |
| "learning_rate": 3.2e-08, | |
| "loss": 0.7695, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.01842374616171955, | |
| "grad_norm": 2.061033248901367, | |
| "learning_rate": 3.4e-08, | |
| "loss": 0.7812, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.019447287615148412, | |
| "grad_norm": 2.001169443130493, | |
| "learning_rate": 3.6000000000000005e-08, | |
| "loss": 0.7859, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.02047082906857728, | |
| "grad_norm": 1.981490135192871, | |
| "learning_rate": 3.8e-08, | |
| "loss": 0.788, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.021494370522006142, | |
| "grad_norm": 1.7497801780700684, | |
| "learning_rate": 4e-08, | |
| "loss": 0.7762, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.022517911975435005, | |
| "grad_norm": 1.8900872468948364, | |
| "learning_rate": 4.2e-08, | |
| "loss": 0.7835, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.02354145342886387, | |
| "grad_norm": 2.0690395832061768, | |
| "learning_rate": 4.4000000000000004e-08, | |
| "loss": 0.7795, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.02456499488229273, | |
| "grad_norm": 1.251330852508545, | |
| "learning_rate": 4.6e-08, | |
| "loss": 0.7675, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0255885363357216, | |
| "grad_norm": 1.5707719326019287, | |
| "learning_rate": 4.8e-08, | |
| "loss": 0.7677, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.02661207778915046, | |
| "grad_norm": 1.831811785697937, | |
| "learning_rate": 5.0000000000000004e-08, | |
| "loss": 0.7677, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.027635619242579325, | |
| "grad_norm": 1.8244602680206299, | |
| "learning_rate": 5.2e-08, | |
| "loss": 0.7714, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.028659160696008188, | |
| "grad_norm": 2.0211703777313232, | |
| "learning_rate": 5.400000000000001e-08, | |
| "loss": 0.7848, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.02968270214943705, | |
| "grad_norm": 1.8793987035751343, | |
| "learning_rate": 5.6000000000000005e-08, | |
| "loss": 0.7846, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.030706243602865915, | |
| "grad_norm": 1.2707512378692627, | |
| "learning_rate": 5.8e-08, | |
| "loss": 0.7723, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03172978505629478, | |
| "grad_norm": 1.710810899734497, | |
| "learning_rate": 6.000000000000001e-08, | |
| "loss": 0.7788, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.032753326509723645, | |
| "grad_norm": 1.5903525352478027, | |
| "learning_rate": 6.2e-08, | |
| "loss": 0.7775, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.033776867963152504, | |
| "grad_norm": 2.2208263874053955, | |
| "learning_rate": 6.4e-08, | |
| "loss": 0.78, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.03480040941658137, | |
| "grad_norm": 2.3763325214385986, | |
| "learning_rate": 6.600000000000001e-08, | |
| "loss": 0.7939, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.03582395087001024, | |
| "grad_norm": 1.6977214813232422, | |
| "learning_rate": 6.8e-08, | |
| "loss": 0.7745, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0368474923234391, | |
| "grad_norm": 2.1862456798553467, | |
| "learning_rate": 7e-08, | |
| "loss": 0.7844, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.037871033776867964, | |
| "grad_norm": 1.8891886472702026, | |
| "learning_rate": 7.200000000000001e-08, | |
| "loss": 0.775, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.038894575230296824, | |
| "grad_norm": 2.0394537448883057, | |
| "learning_rate": 7.400000000000001e-08, | |
| "loss": 0.7771, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.03991811668372569, | |
| "grad_norm": 2.105576992034912, | |
| "learning_rate": 7.6e-08, | |
| "loss": 0.7862, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.04094165813715456, | |
| "grad_norm": 2.123842477798462, | |
| "learning_rate": 7.8e-08, | |
| "loss": 0.7804, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04196519959058342, | |
| "grad_norm": 1.3442330360412598, | |
| "learning_rate": 8e-08, | |
| "loss": 0.7676, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.042988741044012284, | |
| "grad_norm": 2.431581735610962, | |
| "learning_rate": 8.200000000000002e-08, | |
| "loss": 0.7785, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.044012282497441144, | |
| "grad_norm": 2.3671112060546875, | |
| "learning_rate": 8.4e-08, | |
| "loss": 0.7996, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.04503582395087001, | |
| "grad_norm": 2.2737319469451904, | |
| "learning_rate": 8.6e-08, | |
| "loss": 0.7794, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.04605936540429888, | |
| "grad_norm": 1.5326226949691772, | |
| "learning_rate": 8.800000000000001e-08, | |
| "loss": 0.774, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.04708290685772774, | |
| "grad_norm": 1.845044493675232, | |
| "learning_rate": 9e-08, | |
| "loss": 0.7875, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.048106448311156604, | |
| "grad_norm": 2.3159587383270264, | |
| "learning_rate": 9.2e-08, | |
| "loss": 0.7916, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.04912998976458546, | |
| "grad_norm": 1.6539689302444458, | |
| "learning_rate": 9.400000000000001e-08, | |
| "loss": 0.775, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.05015353121801433, | |
| "grad_norm": 1.5031073093414307, | |
| "learning_rate": 9.6e-08, | |
| "loss": 0.777, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.0511770726714432, | |
| "grad_norm": 1.9059022665023804, | |
| "learning_rate": 9.8e-08, | |
| "loss": 0.7836, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.052200614124872056, | |
| "grad_norm": 2.2032649517059326, | |
| "learning_rate": 1.0000000000000001e-07, | |
| "loss": 0.7862, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.05322415557830092, | |
| "grad_norm": 1.2774498462677002, | |
| "learning_rate": 1.0200000000000001e-07, | |
| "loss": 0.7691, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.05424769703172978, | |
| "grad_norm": 1.7091227769851685, | |
| "learning_rate": 1.04e-07, | |
| "loss": 0.7843, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.05527123848515865, | |
| "grad_norm": 1.3407061100006104, | |
| "learning_rate": 1.0600000000000001e-07, | |
| "loss": 0.7704, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.05629477993858751, | |
| "grad_norm": 1.7370580434799194, | |
| "learning_rate": 1.0800000000000001e-07, | |
| "loss": 0.7803, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.057318321392016376, | |
| "grad_norm": 2.144951105117798, | |
| "learning_rate": 1.1e-07, | |
| "loss": 0.7816, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.05834186284544524, | |
| "grad_norm": 1.5752339363098145, | |
| "learning_rate": 1.1200000000000001e-07, | |
| "loss": 0.7723, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.0593654042988741, | |
| "grad_norm": 1.595261573791504, | |
| "learning_rate": 1.1400000000000001e-07, | |
| "loss": 0.7785, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.06038894575230297, | |
| "grad_norm": 2.0418097972869873, | |
| "learning_rate": 1.16e-07, | |
| "loss": 0.7864, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.06141248720573183, | |
| "grad_norm": 1.6113048791885376, | |
| "learning_rate": 1.1800000000000001e-07, | |
| "loss": 0.7719, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.062436028659160696, | |
| "grad_norm": 1.533006191253662, | |
| "learning_rate": 1.2000000000000002e-07, | |
| "loss": 0.7743, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.06345957011258956, | |
| "grad_norm": 0.9601923823356628, | |
| "learning_rate": 1.22e-07, | |
| "loss": 0.7737, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.06448311156601842, | |
| "grad_norm": 1.3146189451217651, | |
| "learning_rate": 1.24e-07, | |
| "loss": 0.7683, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.06550665301944729, | |
| "grad_norm": 1.5222556591033936, | |
| "learning_rate": 1.2600000000000002e-07, | |
| "loss": 0.7739, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.06653019447287616, | |
| "grad_norm": 1.4367096424102783, | |
| "learning_rate": 1.28e-07, | |
| "loss": 0.7726, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.06755373592630501, | |
| "grad_norm": 1.03805673122406, | |
| "learning_rate": 1.3e-07, | |
| "loss": 0.7638, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.06857727737973388, | |
| "grad_norm": 1.9511936902999878, | |
| "learning_rate": 1.3200000000000002e-07, | |
| "loss": 0.7815, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.06960081883316274, | |
| "grad_norm": 1.5974836349487305, | |
| "learning_rate": 1.34e-07, | |
| "loss": 0.7713, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07062436028659161, | |
| "grad_norm": 1.4479100704193115, | |
| "learning_rate": 1.36e-07, | |
| "loss": 0.7761, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.07164790174002048, | |
| "grad_norm": 1.4680284261703491, | |
| "learning_rate": 1.3800000000000002e-07, | |
| "loss": 0.766, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07267144319344933, | |
| "grad_norm": 1.4755054712295532, | |
| "learning_rate": 1.4e-07, | |
| "loss": 0.7713, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0736949846468782, | |
| "grad_norm": 1.3230453729629517, | |
| "learning_rate": 1.4200000000000003e-07, | |
| "loss": 0.7755, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.07471852610030706, | |
| "grad_norm": 1.8009718656539917, | |
| "learning_rate": 1.4400000000000002e-07, | |
| "loss": 0.7833, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.07574206755373593, | |
| "grad_norm": 1.5481526851654053, | |
| "learning_rate": 1.46e-07, | |
| "loss": 0.7806, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0767656090071648, | |
| "grad_norm": 0.9285205602645874, | |
| "learning_rate": 1.4800000000000003e-07, | |
| "loss": 0.767, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07778915046059365, | |
| "grad_norm": 1.6149866580963135, | |
| "learning_rate": 1.5000000000000002e-07, | |
| "loss": 0.7802, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.07881269191402251, | |
| "grad_norm": 1.667980670928955, | |
| "learning_rate": 1.52e-07, | |
| "loss": 0.7757, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.07983623336745138, | |
| "grad_norm": 1.862239956855774, | |
| "learning_rate": 1.5400000000000003e-07, | |
| "loss": 0.771, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.08085977482088025, | |
| "grad_norm": 1.3772761821746826, | |
| "learning_rate": 1.56e-07, | |
| "loss": 0.782, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.08188331627430911, | |
| "grad_norm": 1.9442654848098755, | |
| "learning_rate": 1.5800000000000004e-07, | |
| "loss": 0.7746, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08290685772773797, | |
| "grad_norm": 1.420366644859314, | |
| "learning_rate": 1.6e-07, | |
| "loss": 0.7749, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.08393039918116683, | |
| "grad_norm": 1.7965872287750244, | |
| "learning_rate": 1.62e-07, | |
| "loss": 0.776, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.0849539406345957, | |
| "grad_norm": 1.811287522315979, | |
| "learning_rate": 1.6400000000000004e-07, | |
| "loss": 0.7794, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.08597748208802457, | |
| "grad_norm": 1.489158034324646, | |
| "learning_rate": 1.66e-07, | |
| "loss": 0.7785, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.08700102354145343, | |
| "grad_norm": 1.527341604232788, | |
| "learning_rate": 1.68e-07, | |
| "loss": 0.7804, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.08802456499488229, | |
| "grad_norm": 1.0682123899459839, | |
| "learning_rate": 1.7000000000000001e-07, | |
| "loss": 0.7717, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.08904810644831115, | |
| "grad_norm": 0.7309737205505371, | |
| "learning_rate": 1.72e-07, | |
| "loss": 0.7644, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.09007164790174002, | |
| "grad_norm": 1.156342625617981, | |
| "learning_rate": 1.74e-07, | |
| "loss": 0.7703, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.09109518935516889, | |
| "grad_norm": 1.4098986387252808, | |
| "learning_rate": 1.7600000000000001e-07, | |
| "loss": 0.774, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.09211873080859775, | |
| "grad_norm": 1.740477204322815, | |
| "learning_rate": 1.78e-07, | |
| "loss": 0.7737, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0931422722620266, | |
| "grad_norm": 1.291332721710205, | |
| "learning_rate": 1.8e-07, | |
| "loss": 0.769, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.09416581371545547, | |
| "grad_norm": 1.403385877609253, | |
| "learning_rate": 1.8200000000000002e-07, | |
| "loss": 0.777, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.09518935516888434, | |
| "grad_norm": 1.0460313558578491, | |
| "learning_rate": 1.84e-07, | |
| "loss": 0.7654, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.09621289662231321, | |
| "grad_norm": 0.8738810420036316, | |
| "learning_rate": 1.86e-07, | |
| "loss": 0.7645, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.09723643807574207, | |
| "grad_norm": 1.2067152261734009, | |
| "learning_rate": 1.8800000000000002e-07, | |
| "loss": 0.771, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.09825997952917093, | |
| "grad_norm": 0.7921656370162964, | |
| "learning_rate": 1.9e-07, | |
| "loss": 0.7673, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0992835209825998, | |
| "grad_norm": 1.2949451208114624, | |
| "learning_rate": 1.92e-07, | |
| "loss": 0.771, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.10030706243602866, | |
| "grad_norm": 1.0598716735839844, | |
| "learning_rate": 1.9400000000000002e-07, | |
| "loss": 0.7661, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.10133060388945753, | |
| "grad_norm": 0.5756526589393616, | |
| "learning_rate": 1.96e-07, | |
| "loss": 0.7629, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.1023541453428864, | |
| "grad_norm": 0.9098894596099854, | |
| "learning_rate": 1.9800000000000003e-07, | |
| "loss": 0.7613, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10337768679631525, | |
| "grad_norm": 1.3523019552230835, | |
| "learning_rate": 2.0000000000000002e-07, | |
| "loss": 0.7753, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.10440122824974411, | |
| "grad_norm": 0.9900273680686951, | |
| "learning_rate": 2.02e-07, | |
| "loss": 0.7761, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.10542476970317298, | |
| "grad_norm": 0.7855979204177856, | |
| "learning_rate": 2.0400000000000003e-07, | |
| "loss": 0.7651, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.10644831115660185, | |
| "grad_norm": 1.0023835897445679, | |
| "learning_rate": 2.0600000000000002e-07, | |
| "loss": 0.7677, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.10747185261003071, | |
| "grad_norm": 0.6019173860549927, | |
| "learning_rate": 2.08e-07, | |
| "loss": 0.7606, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.10849539406345957, | |
| "grad_norm": 0.9002220630645752, | |
| "learning_rate": 2.1000000000000003e-07, | |
| "loss": 0.7701, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.10951893551688843, | |
| "grad_norm": 0.8429995775222778, | |
| "learning_rate": 2.1200000000000002e-07, | |
| "loss": 0.7656, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.1105424769703173, | |
| "grad_norm": 0.5726915001869202, | |
| "learning_rate": 2.14e-07, | |
| "loss": 0.7667, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.11156601842374617, | |
| "grad_norm": 0.49694034457206726, | |
| "learning_rate": 2.1600000000000003e-07, | |
| "loss": 0.7614, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.11258955987717502, | |
| "grad_norm": 0.5831499099731445, | |
| "learning_rate": 2.1800000000000002e-07, | |
| "loss": 0.7646, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11361310133060389, | |
| "grad_norm": 0.49775540828704834, | |
| "learning_rate": 2.2e-07, | |
| "loss": 0.7611, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.11463664278403275, | |
| "grad_norm": 1.5634126663208008, | |
| "learning_rate": 2.2200000000000003e-07, | |
| "loss": 0.7824, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.11566018423746162, | |
| "grad_norm": 0.9528007507324219, | |
| "learning_rate": 2.2400000000000002e-07, | |
| "loss": 0.7595, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.11668372569089049, | |
| "grad_norm": 0.5957873463630676, | |
| "learning_rate": 2.26e-07, | |
| "loss": 0.761, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.11770726714431934, | |
| "grad_norm": 0.6020492911338806, | |
| "learning_rate": 2.2800000000000003e-07, | |
| "loss": 0.7605, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.1187308085977482, | |
| "grad_norm": 0.578925371170044, | |
| "learning_rate": 2.3000000000000002e-07, | |
| "loss": 0.768, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.11975435005117707, | |
| "grad_norm": 0.5749679207801819, | |
| "learning_rate": 2.32e-07, | |
| "loss": 0.7636, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.12077789150460594, | |
| "grad_norm": 1.0057039260864258, | |
| "learning_rate": 2.3400000000000003e-07, | |
| "loss": 0.7591, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.1218014329580348, | |
| "grad_norm": 0.5447669625282288, | |
| "learning_rate": 2.3600000000000002e-07, | |
| "loss": 0.7671, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.12282497441146366, | |
| "grad_norm": 0.5045008063316345, | |
| "learning_rate": 2.3800000000000004e-07, | |
| "loss": 0.7614, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.12384851586489252, | |
| "grad_norm": 1.2467900514602661, | |
| "learning_rate": 2.4000000000000003e-07, | |
| "loss": 0.7683, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.12487205731832139, | |
| "grad_norm": 0.49536195397377014, | |
| "learning_rate": 2.42e-07, | |
| "loss": 0.7626, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.12589559877175024, | |
| "grad_norm": 1.1078747510910034, | |
| "learning_rate": 2.44e-07, | |
| "loss": 0.7679, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.1269191402251791, | |
| "grad_norm": 0.5230876207351685, | |
| "learning_rate": 2.46e-07, | |
| "loss": 0.7579, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.12794268167860798, | |
| "grad_norm": 0.7761886119842529, | |
| "learning_rate": 2.48e-07, | |
| "loss": 0.768, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.12896622313203684, | |
| "grad_norm": 0.4926793575286865, | |
| "learning_rate": 2.5000000000000004e-07, | |
| "loss": 0.7639, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1299897645854657, | |
| "grad_norm": 0.7263102531433105, | |
| "learning_rate": 2.5200000000000003e-07, | |
| "loss": 0.7619, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.13101330603889458, | |
| "grad_norm": 0.846560537815094, | |
| "learning_rate": 2.54e-07, | |
| "loss": 0.7656, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.13203684749232344, | |
| "grad_norm": 1.296027421951294, | |
| "learning_rate": 2.56e-07, | |
| "loss": 0.7713, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.1330603889457523, | |
| "grad_norm": 1.0898621082305908, | |
| "learning_rate": 2.58e-07, | |
| "loss": 0.7644, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.13408393039918118, | |
| "grad_norm": 0.9062381982803345, | |
| "learning_rate": 2.6e-07, | |
| "loss": 0.7606, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.13510747185261002, | |
| "grad_norm": 0.9521744847297668, | |
| "learning_rate": 2.6200000000000004e-07, | |
| "loss": 0.7563, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.13613101330603888, | |
| "grad_norm": 0.5530220866203308, | |
| "learning_rate": 2.6400000000000003e-07, | |
| "loss": 0.7647, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.13715455475946775, | |
| "grad_norm": 0.6682366132736206, | |
| "learning_rate": 2.66e-07, | |
| "loss": 0.7605, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.13817809621289662, | |
| "grad_norm": 0.647662341594696, | |
| "learning_rate": 2.68e-07, | |
| "loss": 0.7725, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.13920163766632548, | |
| "grad_norm": 0.4923568665981293, | |
| "learning_rate": 2.7e-07, | |
| "loss": 0.7535, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.14022517911975435, | |
| "grad_norm": 0.661147952079773, | |
| "learning_rate": 2.72e-07, | |
| "loss": 0.7634, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.14124872057318322, | |
| "grad_norm": 0.5441955924034119, | |
| "learning_rate": 2.7400000000000004e-07, | |
| "loss": 0.7635, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.14227226202661208, | |
| "grad_norm": 0.4902069568634033, | |
| "learning_rate": 2.7600000000000004e-07, | |
| "loss": 0.7602, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.14329580348004095, | |
| "grad_norm": 0.9088981747627258, | |
| "learning_rate": 2.7800000000000003e-07, | |
| "loss": 0.7597, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.14431934493346982, | |
| "grad_norm": 0.8669309020042419, | |
| "learning_rate": 2.8e-07, | |
| "loss": 0.763, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.14534288638689866, | |
| "grad_norm": 1.1272106170654297, | |
| "learning_rate": 2.82e-07, | |
| "loss": 0.7685, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.14636642784032752, | |
| "grad_norm": 0.9253846406936646, | |
| "learning_rate": 2.8400000000000005e-07, | |
| "loss": 0.761, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1473899692937564, | |
| "grad_norm": 0.6668679118156433, | |
| "learning_rate": 2.8600000000000005e-07, | |
| "loss": 0.7636, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.14841351074718526, | |
| "grad_norm": 0.6984054446220398, | |
| "learning_rate": 2.8800000000000004e-07, | |
| "loss": 0.7639, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.14943705220061412, | |
| "grad_norm": 0.5088668465614319, | |
| "learning_rate": 2.9000000000000003e-07, | |
| "loss": 0.7575, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.150460593654043, | |
| "grad_norm": 0.8230961561203003, | |
| "learning_rate": 2.92e-07, | |
| "loss": 0.763, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.15148413510747186, | |
| "grad_norm": 0.5158461332321167, | |
| "learning_rate": 2.94e-07, | |
| "loss": 0.7626, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.15250767656090072, | |
| "grad_norm": 1.0442547798156738, | |
| "learning_rate": 2.9600000000000006e-07, | |
| "loss": 0.7615, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.1535312180143296, | |
| "grad_norm": 0.5645190477371216, | |
| "learning_rate": 2.9800000000000005e-07, | |
| "loss": 0.7549, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.15455475946775846, | |
| "grad_norm": 0.4772261083126068, | |
| "learning_rate": 3.0000000000000004e-07, | |
| "loss": 0.7645, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.1555783009211873, | |
| "grad_norm": 0.5313320159912109, | |
| "learning_rate": 3.0200000000000003e-07, | |
| "loss": 0.7589, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.15660184237461616, | |
| "grad_norm": 0.5733660459518433, | |
| "learning_rate": 3.04e-07, | |
| "loss": 0.7556, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.15762538382804503, | |
| "grad_norm": 0.7178833484649658, | |
| "learning_rate": 3.06e-07, | |
| "loss": 0.7575, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.1586489252814739, | |
| "grad_norm": 0.4526512026786804, | |
| "learning_rate": 3.0800000000000006e-07, | |
| "loss": 0.7578, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.15967246673490276, | |
| "grad_norm": 0.5467422008514404, | |
| "learning_rate": 3.1000000000000005e-07, | |
| "loss": 0.7574, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.16069600818833163, | |
| "grad_norm": 0.6100950241088867, | |
| "learning_rate": 3.12e-07, | |
| "loss": 0.7575, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.1617195496417605, | |
| "grad_norm": 0.44614189863204956, | |
| "learning_rate": 3.14e-07, | |
| "loss": 0.7581, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.16274309109518936, | |
| "grad_norm": 0.5598356127738953, | |
| "learning_rate": 3.160000000000001e-07, | |
| "loss": 0.7617, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.16376663254861823, | |
| "grad_norm": 0.5831120610237122, | |
| "learning_rate": 3.1800000000000007e-07, | |
| "loss": 0.7619, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1647901740020471, | |
| "grad_norm": 0.47710853815078735, | |
| "learning_rate": 3.2e-07, | |
| "loss": 0.7576, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.16581371545547594, | |
| "grad_norm": 0.4452659487724304, | |
| "learning_rate": 3.22e-07, | |
| "loss": 0.763, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.1668372569089048, | |
| "grad_norm": 0.5317380428314209, | |
| "learning_rate": 3.24e-07, | |
| "loss": 0.7594, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.16786079836233367, | |
| "grad_norm": 0.5500309467315674, | |
| "learning_rate": 3.26e-07, | |
| "loss": 0.7523, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.16888433981576254, | |
| "grad_norm": 0.5401707291603088, | |
| "learning_rate": 3.280000000000001e-07, | |
| "loss": 0.7635, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.1699078812691914, | |
| "grad_norm": 0.6862873435020447, | |
| "learning_rate": 3.3e-07, | |
| "loss": 0.7547, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.17093142272262027, | |
| "grad_norm": 0.4394344687461853, | |
| "learning_rate": 3.32e-07, | |
| "loss": 0.7542, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.17195496417604914, | |
| "grad_norm": 0.46235671639442444, | |
| "learning_rate": 3.34e-07, | |
| "loss": 0.7493, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.172978505629478, | |
| "grad_norm": 0.5838847756385803, | |
| "learning_rate": 3.36e-07, | |
| "loss": 0.7618, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.17400204708290687, | |
| "grad_norm": 0.760307788848877, | |
| "learning_rate": 3.38e-07, | |
| "loss": 0.75, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1750255885363357, | |
| "grad_norm": 0.5808561444282532, | |
| "learning_rate": 3.4000000000000003e-07, | |
| "loss": 0.7603, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.17604912998976457, | |
| "grad_norm": 0.7596063613891602, | |
| "learning_rate": 3.42e-07, | |
| "loss": 0.7549, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.17707267144319344, | |
| "grad_norm": 0.6428470015525818, | |
| "learning_rate": 3.44e-07, | |
| "loss": 0.7548, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.1780962128966223, | |
| "grad_norm": 0.47784337401390076, | |
| "learning_rate": 3.46e-07, | |
| "loss": 0.7647, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.17911975435005117, | |
| "grad_norm": 0.42269936203956604, | |
| "learning_rate": 3.48e-07, | |
| "loss": 0.7577, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.18014329580348004, | |
| "grad_norm": 0.6663862466812134, | |
| "learning_rate": 3.5000000000000004e-07, | |
| "loss": 0.7593, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.1811668372569089, | |
| "grad_norm": 0.443998783826828, | |
| "learning_rate": 3.5200000000000003e-07, | |
| "loss": 0.7524, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.18219037871033777, | |
| "grad_norm": 0.5712008476257324, | |
| "learning_rate": 3.54e-07, | |
| "loss": 0.7624, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.18321392016376664, | |
| "grad_norm": 0.5632140636444092, | |
| "learning_rate": 3.56e-07, | |
| "loss": 0.7631, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.1842374616171955, | |
| "grad_norm": 0.5184634327888489, | |
| "learning_rate": 3.58e-07, | |
| "loss": 0.7572, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.18526100307062435, | |
| "grad_norm": 0.5643100142478943, | |
| "learning_rate": 3.6e-07, | |
| "loss": 0.7588, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.1862845445240532, | |
| "grad_norm": 0.4550904333591461, | |
| "learning_rate": 3.6200000000000004e-07, | |
| "loss": 0.7603, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.18730808597748208, | |
| "grad_norm": 0.6727386713027954, | |
| "learning_rate": 3.6400000000000003e-07, | |
| "loss": 0.755, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.18833162743091095, | |
| "grad_norm": 0.4629902243614197, | |
| "learning_rate": 3.66e-07, | |
| "loss": 0.764, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.18935516888433981, | |
| "grad_norm": 0.5423149466514587, | |
| "learning_rate": 3.68e-07, | |
| "loss": 0.7583, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.19037871033776868, | |
| "grad_norm": 0.5308339595794678, | |
| "learning_rate": 3.7e-07, | |
| "loss": 0.7535, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.19140225179119755, | |
| "grad_norm": 0.497243732213974, | |
| "learning_rate": 3.72e-07, | |
| "loss": 0.7566, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.19242579324462641, | |
| "grad_norm": 0.5698720216751099, | |
| "learning_rate": 3.7400000000000004e-07, | |
| "loss": 0.7576, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.19344933469805528, | |
| "grad_norm": 0.546074628829956, | |
| "learning_rate": 3.7600000000000003e-07, | |
| "loss": 0.7543, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.19447287615148415, | |
| "grad_norm": 0.6073132157325745, | |
| "learning_rate": 3.78e-07, | |
| "loss": 0.7473, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.195496417604913, | |
| "grad_norm": 0.5039142370223999, | |
| "learning_rate": 3.8e-07, | |
| "loss": 0.7528, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.19651995905834185, | |
| "grad_norm": 0.5228015780448914, | |
| "learning_rate": 3.82e-07, | |
| "loss": 0.7657, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.19754350051177072, | |
| "grad_norm": 0.43683943152427673, | |
| "learning_rate": 3.84e-07, | |
| "loss": 0.7541, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.1985670419651996, | |
| "grad_norm": 0.42550554871559143, | |
| "learning_rate": 3.8600000000000004e-07, | |
| "loss": 0.752, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.19959058341862845, | |
| "grad_norm": 0.5053896307945251, | |
| "learning_rate": 3.8800000000000003e-07, | |
| "loss": 0.7558, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.20061412487205732, | |
| "grad_norm": 0.6906818151473999, | |
| "learning_rate": 3.9e-07, | |
| "loss": 0.7576, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.2016376663254862, | |
| "grad_norm": 0.46059173345565796, | |
| "learning_rate": 3.92e-07, | |
| "loss": 0.7549, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.20266120777891505, | |
| "grad_norm": 0.4829753339290619, | |
| "learning_rate": 3.94e-07, | |
| "loss": 0.7594, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.20368474923234392, | |
| "grad_norm": 0.504260241985321, | |
| "learning_rate": 3.9600000000000005e-07, | |
| "loss": 0.7564, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.2047082906857728, | |
| "grad_norm": 0.6687684655189514, | |
| "learning_rate": 3.9800000000000004e-07, | |
| "loss": 0.7524, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.20573183213920163, | |
| "grad_norm": 0.5243176817893982, | |
| "learning_rate": 4.0000000000000003e-07, | |
| "loss": 0.7576, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.2067553735926305, | |
| "grad_norm": 0.6751272082328796, | |
| "learning_rate": 4.02e-07, | |
| "loss": 0.7609, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.20777891504605936, | |
| "grad_norm": 0.5937652587890625, | |
| "learning_rate": 4.04e-07, | |
| "loss": 0.7561, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.20880245649948823, | |
| "grad_norm": 0.6328868269920349, | |
| "learning_rate": 4.06e-07, | |
| "loss": 0.7544, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2098259979529171, | |
| "grad_norm": 0.46846815943717957, | |
| "learning_rate": 4.0800000000000005e-07, | |
| "loss": 0.7582, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.21084953940634596, | |
| "grad_norm": 0.4920537769794464, | |
| "learning_rate": 4.1000000000000004e-07, | |
| "loss": 0.7542, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.21187308085977483, | |
| "grad_norm": 0.455229789018631, | |
| "learning_rate": 4.1200000000000004e-07, | |
| "loss": 0.7571, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.2128966223132037, | |
| "grad_norm": 0.4706554114818573, | |
| "learning_rate": 4.1400000000000003e-07, | |
| "loss": 0.7583, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.21392016376663256, | |
| "grad_norm": 0.5521472096443176, | |
| "learning_rate": 4.16e-07, | |
| "loss": 0.7564, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.21494370522006143, | |
| "grad_norm": 0.45866256952285767, | |
| "learning_rate": 4.18e-07, | |
| "loss": 0.755, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.21596724667349027, | |
| "grad_norm": 0.5188294649124146, | |
| "learning_rate": 4.2000000000000006e-07, | |
| "loss": 0.7544, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.21699078812691913, | |
| "grad_norm": 0.413843035697937, | |
| "learning_rate": 4.2200000000000005e-07, | |
| "loss": 0.7508, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.218014329580348, | |
| "grad_norm": 0.4665946066379547, | |
| "learning_rate": 4.2400000000000004e-07, | |
| "loss": 0.7557, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.21903787103377687, | |
| "grad_norm": 0.5579046010971069, | |
| "learning_rate": 4.2600000000000003e-07, | |
| "loss": 0.7511, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.22006141248720573, | |
| "grad_norm": 0.4209223687648773, | |
| "learning_rate": 4.28e-07, | |
| "loss": 0.7516, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.2210849539406346, | |
| "grad_norm": 0.7076475024223328, | |
| "learning_rate": 4.3e-07, | |
| "loss": 0.7507, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.22210849539406347, | |
| "grad_norm": 0.5781177282333374, | |
| "learning_rate": 4.3200000000000006e-07, | |
| "loss": 0.7475, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.22313203684749233, | |
| "grad_norm": 0.8091248273849487, | |
| "learning_rate": 4.3400000000000005e-07, | |
| "loss": 0.7503, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2241555783009212, | |
| "grad_norm": 0.46331843733787537, | |
| "learning_rate": 4.3600000000000004e-07, | |
| "loss": 0.756, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.22517911975435004, | |
| "grad_norm": 0.4318588674068451, | |
| "learning_rate": 4.3800000000000003e-07, | |
| "loss": 0.7534, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2262026612077789, | |
| "grad_norm": 0.5794275999069214, | |
| "learning_rate": 4.4e-07, | |
| "loss": 0.7563, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.22722620266120777, | |
| "grad_norm": 0.5097821354866028, | |
| "learning_rate": 4.4200000000000007e-07, | |
| "loss": 0.7553, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.22824974411463664, | |
| "grad_norm": 0.4925571084022522, | |
| "learning_rate": 4.4400000000000006e-07, | |
| "loss": 0.7549, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.2292732855680655, | |
| "grad_norm": 0.5328956842422485, | |
| "learning_rate": 4.4600000000000005e-07, | |
| "loss": 0.757, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.23029682702149437, | |
| "grad_norm": 0.4485833942890167, | |
| "learning_rate": 4.4800000000000004e-07, | |
| "loss": 0.7535, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.23132036847492324, | |
| "grad_norm": 0.538770854473114, | |
| "learning_rate": 4.5000000000000003e-07, | |
| "loss": 0.7551, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2323439099283521, | |
| "grad_norm": 0.5424289703369141, | |
| "learning_rate": 4.52e-07, | |
| "loss": 0.7528, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.23336745138178097, | |
| "grad_norm": 0.5607970356941223, | |
| "learning_rate": 4.5400000000000007e-07, | |
| "loss": 0.7534, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.23439099283520984, | |
| "grad_norm": 0.5806226134300232, | |
| "learning_rate": 4.5600000000000006e-07, | |
| "loss": 0.7516, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.23541453428863868, | |
| "grad_norm": 0.47338593006134033, | |
| "learning_rate": 4.5800000000000005e-07, | |
| "loss": 0.75, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.23643807574206754, | |
| "grad_norm": 0.805225670337677, | |
| "learning_rate": 4.6000000000000004e-07, | |
| "loss": 0.7498, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.2374616171954964, | |
| "grad_norm": 0.5711566209793091, | |
| "learning_rate": 4.6200000000000003e-07, | |
| "loss": 0.7527, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.23848515864892528, | |
| "grad_norm": 0.4319440722465515, | |
| "learning_rate": 4.64e-07, | |
| "loss": 0.7493, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.23950870010235414, | |
| "grad_norm": 0.49590882658958435, | |
| "learning_rate": 4.6600000000000007e-07, | |
| "loss": 0.752, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.240532241555783, | |
| "grad_norm": 0.6124209761619568, | |
| "learning_rate": 4.6800000000000006e-07, | |
| "loss": 0.7517, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.24155578300921188, | |
| "grad_norm": 0.4623505771160126, | |
| "learning_rate": 4.7000000000000005e-07, | |
| "loss": 0.7558, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.24257932446264074, | |
| "grad_norm": 0.43490421772003174, | |
| "learning_rate": 4.7200000000000004e-07, | |
| "loss": 0.754, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.2436028659160696, | |
| "grad_norm": 0.41201770305633545, | |
| "learning_rate": 4.7400000000000004e-07, | |
| "loss": 0.7554, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.24462640736949848, | |
| "grad_norm": 0.5332440137863159, | |
| "learning_rate": 4.760000000000001e-07, | |
| "loss": 0.7523, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.24564994882292732, | |
| "grad_norm": 0.45751631259918213, | |
| "learning_rate": 4.78e-07, | |
| "loss": 0.7501, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.24667349027635618, | |
| "grad_norm": 0.46563875675201416, | |
| "learning_rate": 4.800000000000001e-07, | |
| "loss": 0.7475, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.24769703172978505, | |
| "grad_norm": 0.4648246765136719, | |
| "learning_rate": 4.82e-07, | |
| "loss": 0.7607, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.24872057318321392, | |
| "grad_norm": 0.42473065853118896, | |
| "learning_rate": 4.84e-07, | |
| "loss": 0.7521, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.24974411463664278, | |
| "grad_norm": 0.4116007387638092, | |
| "learning_rate": 4.86e-07, | |
| "loss": 0.753, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.2507676560900716, | |
| "grad_norm": 0.4429609477519989, | |
| "learning_rate": 4.88e-07, | |
| "loss": 0.7513, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.2517911975435005, | |
| "grad_norm": 0.40569669008255005, | |
| "learning_rate": 4.900000000000001e-07, | |
| "loss": 0.7466, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.25281473899692936, | |
| "grad_norm": 0.6898592114448547, | |
| "learning_rate": 4.92e-07, | |
| "loss": 0.7488, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.2538382804503582, | |
| "grad_norm": 0.5965420603752136, | |
| "learning_rate": 4.940000000000001e-07, | |
| "loss": 0.7496, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.2548618219037871, | |
| "grad_norm": 0.4469277858734131, | |
| "learning_rate": 4.96e-07, | |
| "loss": 0.7554, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.25588536335721596, | |
| "grad_norm": 0.5688977837562561, | |
| "learning_rate": 4.98e-07, | |
| "loss": 0.761, | |
| "step": 250 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 100000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 103, | |
| "save_steps": 250, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1024, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |