| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9946524064171123, | |
| "eval_steps": 500, | |
| "global_step": 210, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0142602495543672, | |
| "grad_norm": 5.998478174665459, | |
| "learning_rate": 4.7619047619047623e-07, | |
| "loss": 0.7176, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0285204991087344, | |
| "grad_norm": 6.157130000522602, | |
| "learning_rate": 9.523809523809525e-07, | |
| "loss": 0.7167, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0427807486631016, | |
| "grad_norm": 6.037876002899973, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.7267, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0570409982174688, | |
| "grad_norm": 6.15687961618456, | |
| "learning_rate": 1.904761904761905e-06, | |
| "loss": 0.7342, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.07130124777183601, | |
| "grad_norm": 5.532445239097713, | |
| "learning_rate": 2.380952380952381e-06, | |
| "loss": 0.6988, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0855614973262032, | |
| "grad_norm": 4.591054025035638, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.6871, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.09982174688057041, | |
| "grad_norm": 4.163450801982825, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.6551, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.1140819964349376, | |
| "grad_norm": 3.0450299693083838, | |
| "learning_rate": 3.80952380952381e-06, | |
| "loss": 0.5935, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.12834224598930483, | |
| "grad_norm": 3.0171447511514304, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 0.61, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.14260249554367202, | |
| "grad_norm": 3.412973255907802, | |
| "learning_rate": 4.761904761904762e-06, | |
| "loss": 0.5653, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1568627450980392, | |
| "grad_norm": 3.2639215322983093, | |
| "learning_rate": 5.2380952380952384e-06, | |
| "loss": 0.5505, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.1711229946524064, | |
| "grad_norm": 2.7810256770169555, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.5189, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.18538324420677363, | |
| "grad_norm": 1.855708432778589, | |
| "learning_rate": 6.1904761904761914e-06, | |
| "loss": 0.4986, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.19964349376114082, | |
| "grad_norm": 2.4285867830525225, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.4825, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.21390374331550802, | |
| "grad_norm": 2.0406805172911455, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 0.4781, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.2281639928698752, | |
| "grad_norm": 1.7117644118774973, | |
| "learning_rate": 7.61904761904762e-06, | |
| "loss": 0.4717, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.24242424242424243, | |
| "grad_norm": 1.3431052662117142, | |
| "learning_rate": 8.095238095238097e-06, | |
| "loss": 0.4626, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.25668449197860965, | |
| "grad_norm": 1.2491319368394767, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.4483, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.2709447415329768, | |
| "grad_norm": 1.064103742518249, | |
| "learning_rate": 9.047619047619049e-06, | |
| "loss": 0.4309, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.28520499108734404, | |
| "grad_norm": 1.308684310053338, | |
| "learning_rate": 9.523809523809525e-06, | |
| "loss": 0.4186, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2994652406417112, | |
| "grad_norm": 1.0005997347773574, | |
| "learning_rate": 1e-05, | |
| "loss": 0.4289, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.3137254901960784, | |
| "grad_norm": 1.0855537879905526, | |
| "learning_rate": 9.99930927345553e-06, | |
| "loss": 0.4509, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.32798573975044565, | |
| "grad_norm": 0.9598047755603596, | |
| "learning_rate": 9.99723728466338e-06, | |
| "loss": 0.4166, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.3422459893048128, | |
| "grad_norm": 0.9378891290105057, | |
| "learning_rate": 9.993784606094612e-06, | |
| "loss": 0.4124, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.35650623885918004, | |
| "grad_norm": 0.8238281484919813, | |
| "learning_rate": 9.988952191691925e-06, | |
| "loss": 0.4111, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.37076648841354726, | |
| "grad_norm": 0.8249603588788698, | |
| "learning_rate": 9.982741376606077e-06, | |
| "loss": 0.4022, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.3850267379679144, | |
| "grad_norm": 0.7944536509888138, | |
| "learning_rate": 9.975153876827008e-06, | |
| "loss": 0.4096, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.39928698752228164, | |
| "grad_norm": 0.8877336723504325, | |
| "learning_rate": 9.966191788709716e-06, | |
| "loss": 0.4078, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.41354723707664887, | |
| "grad_norm": 0.9362944528113393, | |
| "learning_rate": 9.955857588395065e-06, | |
| "loss": 0.3974, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.42780748663101603, | |
| "grad_norm": 1.0365875514927283, | |
| "learning_rate": 9.944154131125643e-06, | |
| "loss": 0.3907, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.44206773618538325, | |
| "grad_norm": 0.9187628487659182, | |
| "learning_rate": 9.931084650456892e-06, | |
| "loss": 0.3788, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.4563279857397504, | |
| "grad_norm": 0.9346374036471025, | |
| "learning_rate": 9.916652757363698e-06, | |
| "loss": 0.3933, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 0.8638182734526192, | |
| "learning_rate": 9.900862439242719e-06, | |
| "loss": 0.377, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.48484848484848486, | |
| "grad_norm": 0.8897918969636153, | |
| "learning_rate": 9.883718058810708e-06, | |
| "loss": 0.3879, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.49910873440285203, | |
| "grad_norm": 0.905899013833491, | |
| "learning_rate": 9.86522435289912e-06, | |
| "loss": 0.393, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.5133689839572193, | |
| "grad_norm": 0.8313526898166544, | |
| "learning_rate": 9.84538643114539e-06, | |
| "loss": 0.3802, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.5276292335115864, | |
| "grad_norm": 0.7981624010904073, | |
| "learning_rate": 9.824209774581176e-06, | |
| "loss": 0.3798, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.5418894830659536, | |
| "grad_norm": 0.8107386340463024, | |
| "learning_rate": 9.801700234118e-06, | |
| "loss": 0.3776, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.5561497326203209, | |
| "grad_norm": 0.7619681977203558, | |
| "learning_rate": 9.777864028930705e-06, | |
| "loss": 0.3815, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.5704099821746881, | |
| "grad_norm": 0.8454306401600613, | |
| "learning_rate": 9.752707744739146e-06, | |
| "loss": 0.3776, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5846702317290553, | |
| "grad_norm": 0.8447995805383889, | |
| "learning_rate": 9.726238331988625e-06, | |
| "loss": 0.3704, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.5989304812834224, | |
| "grad_norm": 0.785542071668213, | |
| "learning_rate": 9.698463103929542e-06, | |
| "loss": 0.3713, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.6131907308377896, | |
| "grad_norm": 0.8441633538171278, | |
| "learning_rate": 9.669389734596819e-06, | |
| "loss": 0.3521, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.6274509803921569, | |
| "grad_norm": 0.8104799297356389, | |
| "learning_rate": 9.639026256689628e-06, | |
| "loss": 0.379, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.6417112299465241, | |
| "grad_norm": 0.7777936361439105, | |
| "learning_rate": 9.60738105935204e-06, | |
| "loss": 0.3682, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.6559714795008913, | |
| "grad_norm": 0.7900521212207957, | |
| "learning_rate": 9.574462885855173e-06, | |
| "loss": 0.3708, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.6702317290552585, | |
| "grad_norm": 0.8163359807292335, | |
| "learning_rate": 9.540280831181525e-06, | |
| "loss": 0.3605, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.6844919786096256, | |
| "grad_norm": 0.872836606578105, | |
| "learning_rate": 9.504844339512096e-06, | |
| "loss": 0.3661, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.6987522281639929, | |
| "grad_norm": 0.9033570080860079, | |
| "learning_rate": 9.468163201617063e-06, | |
| "loss": 0.3672, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.7130124777183601, | |
| "grad_norm": 0.8384671316276696, | |
| "learning_rate": 9.430247552150673e-06, | |
| "loss": 0.3671, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.8616090850522994, | |
| "learning_rate": 9.391107866851143e-06, | |
| "loss": 0.3765, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.7415329768270945, | |
| "grad_norm": 0.9146332401497722, | |
| "learning_rate": 9.350754959646306e-06, | |
| "loss": 0.3567, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.7557932263814616, | |
| "grad_norm": 0.7467925012563463, | |
| "learning_rate": 9.309199979665821e-06, | |
| "loss": 0.3624, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.7700534759358288, | |
| "grad_norm": 0.8106016103660585, | |
| "learning_rate": 9.266454408160779e-06, | |
| "loss": 0.3647, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 0.8134090391372545, | |
| "learning_rate": 9.22253005533154e-06, | |
| "loss": 0.3808, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.7985739750445633, | |
| "grad_norm": 0.7921832122229222, | |
| "learning_rate": 9.177439057064684e-06, | |
| "loss": 0.3596, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.8128342245989305, | |
| "grad_norm": 0.7303835443395563, | |
| "learning_rate": 9.131193871579975e-06, | |
| "loss": 0.349, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.8270944741532977, | |
| "grad_norm": 0.7871828583047863, | |
| "learning_rate": 9.083807275988285e-06, | |
| "loss": 0.35, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.8413547237076648, | |
| "grad_norm": 0.881224423943151, | |
| "learning_rate": 9.035292362761382e-06, | |
| "loss": 0.3666, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.8556149732620321, | |
| "grad_norm": 0.6948998923174846, | |
| "learning_rate": 8.985662536114614e-06, | |
| "loss": 0.3581, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8698752228163993, | |
| "grad_norm": 0.7996766809552465, | |
| "learning_rate": 8.934931508303446e-06, | |
| "loss": 0.3618, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.8841354723707665, | |
| "grad_norm": 0.7926297708544718, | |
| "learning_rate": 8.883113295834893e-06, | |
| "loss": 0.3654, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.8983957219251337, | |
| "grad_norm": 0.8076359165867002, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 0.3628, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.9126559714795008, | |
| "grad_norm": 0.8704725987761389, | |
| "learning_rate": 8.776272880892675e-06, | |
| "loss": 0.3615, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.9269162210338681, | |
| "grad_norm": 0.7259385563641405, | |
| "learning_rate": 8.721280197423259e-06, | |
| "loss": 0.3391, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 0.7641083584826656, | |
| "learning_rate": 8.665259359149132e-06, | |
| "loss": 0.3584, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.9554367201426025, | |
| "grad_norm": 0.8482726911049152, | |
| "learning_rate": 8.608225844102312e-06, | |
| "loss": 0.369, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.9696969696969697, | |
| "grad_norm": 0.8036946709490872, | |
| "learning_rate": 8.550195410107903e-06, | |
| "loss": 0.3736, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.983957219251337, | |
| "grad_norm": 0.7653533058452437, | |
| "learning_rate": 8.491184090430365e-06, | |
| "loss": 0.3401, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.9982174688057041, | |
| "grad_norm": 0.8073514579287697, | |
| "learning_rate": 8.43120818934367e-06, | |
| "loss": 0.3493, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.0124777183600713, | |
| "grad_norm": 0.6775703542593738, | |
| "learning_rate": 8.370284277626576e-06, | |
| "loss": 0.3162, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 1.0267379679144386, | |
| "grad_norm": 0.6849387062750477, | |
| "learning_rate": 8.308429187984298e-06, | |
| "loss": 0.3132, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.0409982174688057, | |
| "grad_norm": 0.7501115742708528, | |
| "learning_rate": 8.24566001039776e-06, | |
| "loss": 0.3107, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 1.0552584670231728, | |
| "grad_norm": 0.7419796127086248, | |
| "learning_rate": 8.181994087401819e-06, | |
| "loss": 0.3114, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.0695187165775402, | |
| "grad_norm": 0.7534872326532702, | |
| "learning_rate": 8.117449009293668e-06, | |
| "loss": 0.3228, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.0837789661319073, | |
| "grad_norm": 0.8094312306607193, | |
| "learning_rate": 8.052042609272817e-06, | |
| "loss": 0.3022, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.0980392156862746, | |
| "grad_norm": 0.7273023913777745, | |
| "learning_rate": 7.985792958513932e-06, | |
| "loss": 0.3077, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.1122994652406417, | |
| "grad_norm": 0.6547411987494043, | |
| "learning_rate": 7.918718361173951e-06, | |
| "loss": 0.3137, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.1265597147950088, | |
| "grad_norm": 0.7118409719814686, | |
| "learning_rate": 7.85083734933481e-06, | |
| "loss": 0.2952, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.1408199643493762, | |
| "grad_norm": 0.7635016933875783, | |
| "learning_rate": 7.782168677883206e-06, | |
| "loss": 0.3138, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.1550802139037433, | |
| "grad_norm": 0.6817008368615943, | |
| "learning_rate": 7.712731319328798e-06, | |
| "loss": 0.3035, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.1693404634581106, | |
| "grad_norm": 0.8174647964374863, | |
| "learning_rate": 7.642544458562278e-06, | |
| "loss": 0.3284, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.1836007130124777, | |
| "grad_norm": 0.6637911284519812, | |
| "learning_rate": 7.571627487554769e-06, | |
| "loss": 0.3174, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.1978609625668448, | |
| "grad_norm": 0.7495098318592474, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.3125, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.2121212121212122, | |
| "grad_norm": 0.6420213528206444, | |
| "learning_rate": 7.4276817859007615e-06, | |
| "loss": 0.2909, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.2263814616755793, | |
| "grad_norm": 0.7007340675237256, | |
| "learning_rate": 7.354692826101102e-06, | |
| "loss": 0.3114, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.2406417112299466, | |
| "grad_norm": 0.706271617054947, | |
| "learning_rate": 7.281053286765816e-06, | |
| "loss": 0.2988, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.2549019607843137, | |
| "grad_norm": 0.6255542997192427, | |
| "learning_rate": 7.206783513808721e-06, | |
| "loss": 0.3161, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.2691622103386808, | |
| "grad_norm": 0.6771293525704715, | |
| "learning_rate": 7.1319040272712705e-06, | |
| "loss": 0.3164, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.2834224598930482, | |
| "grad_norm": 0.7070886877032927, | |
| "learning_rate": 7.056435515653059e-06, | |
| "loss": 0.3312, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.2976827094474153, | |
| "grad_norm": 0.6693214606442824, | |
| "learning_rate": 6.980398830195785e-06, | |
| "loss": 0.3101, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.3119429590017826, | |
| "grad_norm": 0.7279758172512709, | |
| "learning_rate": 6.903814979122249e-06, | |
| "loss": 0.3197, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.3262032085561497, | |
| "grad_norm": 0.6520276206041827, | |
| "learning_rate": 6.8267051218319766e-06, | |
| "loss": 0.2975, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.3404634581105168, | |
| "grad_norm": 0.6107316374770684, | |
| "learning_rate": 6.749090563055075e-06, | |
| "loss": 0.2969, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.3547237076648841, | |
| "grad_norm": 0.6997488359926258, | |
| "learning_rate": 6.6709927469659385e-06, | |
| "loss": 0.2991, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.3689839572192513, | |
| "grad_norm": 0.6541906454481444, | |
| "learning_rate": 6.592433251258423e-06, | |
| "loss": 0.3153, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.3832442067736186, | |
| "grad_norm": 0.6674786478671763, | |
| "learning_rate": 6.513433781184131e-06, | |
| "loss": 0.2928, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.3975044563279857, | |
| "grad_norm": 0.6623084412543156, | |
| "learning_rate": 6.434016163555452e-06, | |
| "loss": 0.3043, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.4117647058823528, | |
| "grad_norm": 0.689312711915529, | |
| "learning_rate": 6.354202340715027e-06, | |
| "loss": 0.3344, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.4260249554367201, | |
| "grad_norm": 0.6311516943939547, | |
| "learning_rate": 6.274014364473274e-06, | |
| "loss": 0.2968, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.4402852049910875, | |
| "grad_norm": 0.6961729490571823, | |
| "learning_rate": 6.19347439001569e-06, | |
| "loss": 0.2925, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 0.6707000018979263, | |
| "learning_rate": 6.112604669781572e-06, | |
| "loss": 0.3126, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.4688057040998217, | |
| "grad_norm": 0.6027031632940896, | |
| "learning_rate": 6.031427547315889e-06, | |
| "loss": 0.3023, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.483065953654189, | |
| "grad_norm": 0.7096258454181166, | |
| "learning_rate": 5.949965451095952e-06, | |
| "loss": 0.3083, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.4973262032085561, | |
| "grad_norm": 0.627103846901669, | |
| "learning_rate": 5.8682408883346535e-06, | |
| "loss": 0.3129, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.5115864527629235, | |
| "grad_norm": 0.6885664921641609, | |
| "learning_rate": 5.786276438761928e-06, | |
| "loss": 0.3134, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.5258467023172906, | |
| "grad_norm": 0.6047321023835506, | |
| "learning_rate": 5.7040947483861845e-06, | |
| "loss": 0.3037, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 1.5401069518716577, | |
| "grad_norm": 0.6014431063746195, | |
| "learning_rate": 5.621718523237427e-06, | |
| "loss": 0.3066, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.5543672014260248, | |
| "grad_norm": 0.6759797800635752, | |
| "learning_rate": 5.539170523093794e-06, | |
| "loss": 0.2972, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.5686274509803921, | |
| "grad_norm": 0.6459776120064094, | |
| "learning_rate": 5.456473555193242e-06, | |
| "loss": 0.2868, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.5828877005347595, | |
| "grad_norm": 0.5990226927063328, | |
| "learning_rate": 5.373650467932122e-06, | |
| "loss": 0.3098, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.5971479500891266, | |
| "grad_norm": 0.6724529376774372, | |
| "learning_rate": 5.290724144552379e-06, | |
| "loss": 0.296, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.6114081996434937, | |
| "grad_norm": 0.6674952293711535, | |
| "learning_rate": 5.207717496819134e-06, | |
| "loss": 0.3061, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 1.6256684491978608, | |
| "grad_norm": 0.6574349669336045, | |
| "learning_rate": 5.1246534586903655e-06, | |
| "loss": 0.3028, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.6399286987522281, | |
| "grad_norm": 0.5996511909752443, | |
| "learning_rate": 5.041554979980487e-06, | |
| "loss": 0.306, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.6541889483065955, | |
| "grad_norm": 0.6324851444349666, | |
| "learning_rate": 4.958445020019516e-06, | |
| "loss": 0.3113, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.6684491978609626, | |
| "grad_norm": 0.5804606484309103, | |
| "learning_rate": 4.875346541309637e-06, | |
| "loss": 0.298, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 1.6827094474153297, | |
| "grad_norm": 0.6297463707956045, | |
| "learning_rate": 4.792282503180867e-06, | |
| "loss": 0.2915, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 1.696969696969697, | |
| "grad_norm": 0.59037299101606, | |
| "learning_rate": 4.7092758554476215e-06, | |
| "loss": 0.308, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 1.7112299465240641, | |
| "grad_norm": 0.6386954686267667, | |
| "learning_rate": 4.626349532067879e-06, | |
| "loss": 0.2963, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.7254901960784315, | |
| "grad_norm": 0.6145804027948849, | |
| "learning_rate": 4.5435264448067595e-06, | |
| "loss": 0.3089, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 1.7397504456327986, | |
| "grad_norm": 0.6053923239121506, | |
| "learning_rate": 4.460829476906208e-06, | |
| "loss": 0.307, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.7540106951871657, | |
| "grad_norm": 0.6502354241272797, | |
| "learning_rate": 4.3782814767625755e-06, | |
| "loss": 0.2997, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 1.768270944741533, | |
| "grad_norm": 0.6047031298299372, | |
| "learning_rate": 4.295905251613817e-06, | |
| "loss": 0.2988, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 1.7825311942959001, | |
| "grad_norm": 0.5997702616278708, | |
| "learning_rate": 4.213723561238074e-06, | |
| "loss": 0.3123, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.7967914438502675, | |
| "grad_norm": 0.6254870806600614, | |
| "learning_rate": 4.131759111665349e-06, | |
| "loss": 0.3026, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.8110516934046346, | |
| "grad_norm": 0.633357389799123, | |
| "learning_rate": 4.0500345489040515e-06, | |
| "loss": 0.2951, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 1.8253119429590017, | |
| "grad_norm": 0.6404830360753402, | |
| "learning_rate": 3.968572452684113e-06, | |
| "loss": 0.2999, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.839572192513369, | |
| "grad_norm": 0.6329066927776358, | |
| "learning_rate": 3.887395330218429e-06, | |
| "loss": 0.3035, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 1.8538324420677363, | |
| "grad_norm": 0.589200866233192, | |
| "learning_rate": 3.806525609984312e-06, | |
| "loss": 0.3124, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.8680926916221035, | |
| "grad_norm": 0.6082320573613086, | |
| "learning_rate": 3.7259856355267275e-06, | |
| "loss": 0.2956, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "grad_norm": 0.6241929627393371, | |
| "learning_rate": 3.6457976592849753e-06, | |
| "loss": 0.2843, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.8966131907308377, | |
| "grad_norm": 0.5731217918233906, | |
| "learning_rate": 3.5659838364445505e-06, | |
| "loss": 0.2962, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 1.910873440285205, | |
| "grad_norm": 0.7203946076740332, | |
| "learning_rate": 3.4865662188158713e-06, | |
| "loss": 0.3019, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 1.9251336898395723, | |
| "grad_norm": 0.5801797087810499, | |
| "learning_rate": 3.4075667487415785e-06, | |
| "loss": 0.2919, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.9393939393939394, | |
| "grad_norm": 0.6017940447903612, | |
| "learning_rate": 3.3290072530340628e-06, | |
| "loss": 0.289, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 1.9536541889483066, | |
| "grad_norm": 0.6783329890821536, | |
| "learning_rate": 3.250909436944928e-06, | |
| "loss": 0.3004, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 1.9679144385026737, | |
| "grad_norm": 0.6181255692540811, | |
| "learning_rate": 3.173294878168025e-06, | |
| "loss": 0.2907, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.982174688057041, | |
| "grad_norm": 0.5527014431122037, | |
| "learning_rate": 3.0961850208777527e-06, | |
| "loss": 0.3023, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 1.9964349376114083, | |
| "grad_norm": 0.6138241158100955, | |
| "learning_rate": 3.019601169804216e-06, | |
| "loss": 0.2956, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.0106951871657754, | |
| "grad_norm": 0.5795269162344017, | |
| "learning_rate": 2.9435644843469434e-06, | |
| "loss": 0.2645, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 2.0249554367201426, | |
| "grad_norm": 0.6133201918050623, | |
| "learning_rate": 2.8680959727287316e-06, | |
| "loss": 0.2519, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 2.0392156862745097, | |
| "grad_norm": 0.6132844962849042, | |
| "learning_rate": 2.7932164861912805e-06, | |
| "loss": 0.2813, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 2.053475935828877, | |
| "grad_norm": 0.6064305890385137, | |
| "learning_rate": 2.718946713234185e-06, | |
| "loss": 0.2678, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 2.0677361853832443, | |
| "grad_norm": 0.5071570057035093, | |
| "learning_rate": 2.645307173898901e-06, | |
| "loss": 0.2569, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 2.0819964349376114, | |
| "grad_norm": 0.5593533988862088, | |
| "learning_rate": 2.5723182140992385e-06, | |
| "loss": 0.2672, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 2.0962566844919786, | |
| "grad_norm": 0.5681953675884334, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.2509, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 2.1105169340463457, | |
| "grad_norm": 0.5238572661786828, | |
| "learning_rate": 2.428372512445233e-06, | |
| "loss": 0.2654, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 2.124777183600713, | |
| "grad_norm": 0.5212764514198992, | |
| "learning_rate": 2.357455541437723e-06, | |
| "loss": 0.2593, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 2.1390374331550803, | |
| "grad_norm": 0.5009569961577103, | |
| "learning_rate": 2.2872686806712037e-06, | |
| "loss": 0.2651, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.1532976827094474, | |
| "grad_norm": 0.5016236354852869, | |
| "learning_rate": 2.217831322116797e-06, | |
| "loss": 0.253, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 2.1675579322638145, | |
| "grad_norm": 0.5385410771208106, | |
| "learning_rate": 2.1491626506651914e-06, | |
| "loss": 0.2567, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 0.52490098008563, | |
| "learning_rate": 2.081281638826052e-06, | |
| "loss": 0.2653, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 2.196078431372549, | |
| "grad_norm": 0.5547010556928098, | |
| "learning_rate": 2.0142070414860704e-06, | |
| "loss": 0.2498, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 2.2103386809269163, | |
| "grad_norm": 0.5326534701282144, | |
| "learning_rate": 1.947957390727185e-06, | |
| "loss": 0.2677, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.2245989304812834, | |
| "grad_norm": 0.5676341178655185, | |
| "learning_rate": 1.8825509907063328e-06, | |
| "loss": 0.262, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 2.2388591800356505, | |
| "grad_norm": 0.5075550544962953, | |
| "learning_rate": 1.8180059125981826e-06, | |
| "loss": 0.2609, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 2.2531194295900177, | |
| "grad_norm": 0.5378049836987813, | |
| "learning_rate": 1.7543399896022406e-06, | |
| "loss": 0.2401, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 2.267379679144385, | |
| "grad_norm": 0.5313735495324294, | |
| "learning_rate": 1.6915708120157042e-06, | |
| "loss": 0.2478, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 2.2816399286987523, | |
| "grad_norm": 0.4997207720980407, | |
| "learning_rate": 1.6297157223734228e-06, | |
| "loss": 0.2608, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.2959001782531194, | |
| "grad_norm": 0.5354164171295689, | |
| "learning_rate": 1.5687918106563326e-06, | |
| "loss": 0.2521, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 2.3101604278074865, | |
| "grad_norm": 0.5521694352110376, | |
| "learning_rate": 1.5088159095696365e-06, | |
| "loss": 0.2618, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 2.3244206773618536, | |
| "grad_norm": 0.5419705137185197, | |
| "learning_rate": 1.4498045898920988e-06, | |
| "loss": 0.2415, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 2.338680926916221, | |
| "grad_norm": 0.5289333038282386, | |
| "learning_rate": 1.3917741558976894e-06, | |
| "loss": 0.2594, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 0.4958046194398315, | |
| "learning_rate": 1.3347406408508695e-06, | |
| "loss": 0.2547, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.3672014260249554, | |
| "grad_norm": 0.5275471951202156, | |
| "learning_rate": 1.2787198025767417e-06, | |
| "loss": 0.2498, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 2.3814616755793225, | |
| "grad_norm": 0.5439621033914177, | |
| "learning_rate": 1.223727119107327e-06, | |
| "loss": 0.2569, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 2.3957219251336896, | |
| "grad_norm": 0.5020641846014723, | |
| "learning_rate": 1.1697777844051105e-06, | |
| "loss": 0.2682, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 2.409982174688057, | |
| "grad_norm": 0.5332076852350075, | |
| "learning_rate": 1.1168867041651082e-06, | |
| "loss": 0.2522, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 2.4242424242424243, | |
| "grad_norm": 0.5479842841748694, | |
| "learning_rate": 1.065068491696556e-06, | |
| "loss": 0.242, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.4385026737967914, | |
| "grad_norm": 0.5337676381332789, | |
| "learning_rate": 1.0143374638853892e-06, | |
| "loss": 0.255, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 2.4527629233511585, | |
| "grad_norm": 0.4899023747726101, | |
| "learning_rate": 9.647076372386195e-07, | |
| "loss": 0.2552, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 2.4670231729055256, | |
| "grad_norm": 0.542407864218085, | |
| "learning_rate": 9.161927240117174e-07, | |
| "loss": 0.2662, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 2.481283422459893, | |
| "grad_norm": 0.47566063751571563, | |
| "learning_rate": 8.688061284200266e-07, | |
| "loss": 0.2581, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 2.4955436720142603, | |
| "grad_norm": 0.47291898298798923, | |
| "learning_rate": 8.225609429353187e-07, | |
| "loss": 0.2679, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.5098039215686274, | |
| "grad_norm": 0.4856493909985723, | |
| "learning_rate": 7.774699446684608e-07, | |
| "loss": 0.2558, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 2.5240641711229945, | |
| "grad_norm": 0.5468886611950984, | |
| "learning_rate": 7.33545591839222e-07, | |
| "loss": 0.2575, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 2.5383244206773616, | |
| "grad_norm": 0.5227642129317599, | |
| "learning_rate": 6.908000203341802e-07, | |
| "loss": 0.2726, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 2.552584670231729, | |
| "grad_norm": 0.5243649599139041, | |
| "learning_rate": 6.492450403536959e-07, | |
| "loss": 0.2691, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 2.5668449197860963, | |
| "grad_norm": 0.5038584609402851, | |
| "learning_rate": 6.088921331488568e-07, | |
| "loss": 0.2717, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.5811051693404634, | |
| "grad_norm": 0.48239876397554576, | |
| "learning_rate": 5.697524478493288e-07, | |
| "loss": 0.2598, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 2.5953654188948305, | |
| "grad_norm": 0.5437994297898453, | |
| "learning_rate": 5.318367983829393e-07, | |
| "loss": 0.259, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 2.6096256684491976, | |
| "grad_norm": 0.49097378469526687, | |
| "learning_rate": 4.951556604879049e-07, | |
| "loss": 0.262, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 2.623885918003565, | |
| "grad_norm": 0.5189871183074696, | |
| "learning_rate": 4.5971916881847543e-07, | |
| "loss": 0.2473, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 2.6381461675579323, | |
| "grad_norm": 0.4963009680587836, | |
| "learning_rate": 4.255371141448272e-07, | |
| "loss": 0.2621, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.6524064171122994, | |
| "grad_norm": 0.4845160599159596, | |
| "learning_rate": 3.9261894064796136e-07, | |
| "loss": 0.2496, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.5085689793959361, | |
| "learning_rate": 3.6097374331037326e-07, | |
| "loss": 0.2422, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 2.6809269162210336, | |
| "grad_norm": 0.4910420456640142, | |
| "learning_rate": 3.306102654031823e-07, | |
| "loss": 0.2691, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 2.695187165775401, | |
| "grad_norm": 0.5080309292352433, | |
| "learning_rate": 3.015368960704584e-07, | |
| "loss": 0.2769, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 2.7094474153297683, | |
| "grad_norm": 0.4486963045863036, | |
| "learning_rate": 2.737616680113758e-07, | |
| "loss": 0.2595, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.7237076648841354, | |
| "grad_norm": 0.5134768408162061, | |
| "learning_rate": 2.472922552608559e-07, | |
| "loss": 0.2466, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 2.7379679144385025, | |
| "grad_norm": 0.4729499265055225, | |
| "learning_rate": 2.2213597106929608e-07, | |
| "loss": 0.2438, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 2.7522281639928696, | |
| "grad_norm": 0.4756332341553558, | |
| "learning_rate": 1.982997658820013e-07, | |
| "loss": 0.2619, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 2.766488413547237, | |
| "grad_norm": 0.5111999088028844, | |
| "learning_rate": 1.757902254188254e-07, | |
| "loss": 0.242, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 2.7807486631016043, | |
| "grad_norm": 0.5009830915446124, | |
| "learning_rate": 1.5461356885461077e-07, | |
| "loss": 0.2463, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.7950089126559714, | |
| "grad_norm": 0.5063176331660432, | |
| "learning_rate": 1.3477564710088097e-07, | |
| "loss": 0.2629, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 2.809269162210339, | |
| "grad_norm": 0.5389279146565609, | |
| "learning_rate": 1.1628194118929403e-07, | |
| "loss": 0.2654, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 2.8235294117647056, | |
| "grad_norm": 0.5002474822940156, | |
| "learning_rate": 9.913756075728088e-08, | |
| "loss": 0.256, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 2.837789661319073, | |
| "grad_norm": 0.4856526207889643, | |
| "learning_rate": 8.334724263630301e-08, | |
| "loss": 0.2672, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 2.8520499108734403, | |
| "grad_norm": 0.4773574713690703, | |
| "learning_rate": 6.891534954310886e-08, | |
| "loss": 0.2569, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.8663101604278074, | |
| "grad_norm": 0.4725556890683267, | |
| "learning_rate": 5.584586887435739e-08, | |
| "loss": 0.2484, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 2.880570409982175, | |
| "grad_norm": 0.5061174548555849, | |
| "learning_rate": 4.41424116049366e-08, | |
| "loss": 0.2564, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 2.8948306595365416, | |
| "grad_norm": 0.4978103192012945, | |
| "learning_rate": 3.3808211290284886e-08, | |
| "loss": 0.2626, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 0.4646163044086365, | |
| "learning_rate": 2.4846123172992953e-08, | |
| "loss": 0.2605, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 2.9233511586452763, | |
| "grad_norm": 0.4968912078361725, | |
| "learning_rate": 1.725862339392259e-08, | |
| "loss": 0.2543, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 2.9376114081996434, | |
| "grad_norm": 0.47130252532014427, | |
| "learning_rate": 1.1047808308075059e-08, | |
| "loss": 0.2533, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 2.951871657754011, | |
| "grad_norm": 0.5105837310688802, | |
| "learning_rate": 6.215393905388278e-09, | |
| "loss": 0.2753, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 2.966131907308378, | |
| "grad_norm": 0.5097896921787389, | |
| "learning_rate": 2.7627153366222014e-09, | |
| "loss": 0.2676, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 2.980392156862745, | |
| "grad_norm": 0.5475905566370015, | |
| "learning_rate": 6.907265444716649e-10, | |
| "loss": 0.2399, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 2.9946524064171123, | |
| "grad_norm": 0.4933114168605027, | |
| "learning_rate": 0.0, | |
| "loss": 0.2595, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.9946524064171123, | |
| "step": 210, | |
| "total_flos": 95562719805440.0, | |
| "train_loss": 0.3320285594179517, | |
| "train_runtime": 9818.8898, | |
| "train_samples_per_second": 1.371, | |
| "train_steps_per_second": 0.021 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 210, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 95562719805440.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |