| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.7776935019376858, | |
| "global_step": 300000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.243, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.6562, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 0.4946, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.46, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.6666666666666664e-05, | |
| "loss": 0.4355, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9999999999999998e-05, | |
| "loss": 0.4203, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.3333333333333332e-05, | |
| "loss": 0.4075, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.3986, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.9999999999999997e-05, | |
| "loss": 0.3922, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.333333333333333e-05, | |
| "loss": 0.3856, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.666666666666666e-05, | |
| "loss": 0.3801, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.9999999999999996e-05, | |
| "loss": 0.3758, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.333333333333333e-05, | |
| "loss": 0.3712, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6666666666666665e-05, | |
| "loss": 0.3671, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9999999999999996e-05, | |
| "loss": 0.364, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 0.3605, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 5.666666666666666e-05, | |
| "loss": 0.3574, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 5.9999999999999995e-05, | |
| "loss": 0.3553, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 6.333333333333333e-05, | |
| "loss": 0.3516, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 6.666666666666666e-05, | |
| "loss": 0.3494, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 7e-05, | |
| "loss": 0.346, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 7.333333333333332e-05, | |
| "loss": 0.3432, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 7.666666666666666e-05, | |
| "loss": 0.3415, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 7.999999999999999e-05, | |
| "loss": 0.3385, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.333333333333333e-05, | |
| "loss": 0.3367, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 8.666666666666665e-05, | |
| "loss": 0.3344, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 8.999999999999999e-05, | |
| "loss": 0.3322, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.333333333333333e-05, | |
| "loss": 0.331, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.666666666666667e-05, | |
| "loss": 0.3287, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 9.999999999999999e-05, | |
| "loss": 0.3266, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00010333333333333333, | |
| "loss": 0.3252, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00010666666666666667, | |
| "loss": 0.3237, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00010999999999999998, | |
| "loss": 0.3215, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00011333333333333331, | |
| "loss": 0.3207, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00011666666666666665, | |
| "loss": 0.3197, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00011999999999999999, | |
| "loss": 0.3181, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001233333333333333, | |
| "loss": 0.3178, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00012666666666666666, | |
| "loss": 0.3163, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00013, | |
| "loss": 0.3154, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001333333333333333, | |
| "loss": 0.3146, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00013666666666666666, | |
| "loss": 0.313, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00014, | |
| "loss": 0.3131, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00014333333333333334, | |
| "loss": 0.3115, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00014666666666666664, | |
| "loss": 0.3111, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00015, | |
| "loss": 0.3102, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00014999833143445625, | |
| "loss": 0.3094, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.000149993325817371, | |
| "loss": 0.3091, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00014998498338737861, | |
| "loss": 0.3077, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00014997330454219024, | |
| "loss": 0.3075, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001499582898385749, | |
| "loss": 0.3067, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00014993993999233321, | |
| "loss": 0.3054, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00014991825587826286, | |
| "loss": 0.3048, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00014989323853011714, | |
| "loss": 0.3043, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00014986488914055563, | |
| "loss": 0.3036, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00014983320906108733, | |
| "loss": 0.3025, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0001497981998020062, | |
| "loss": 0.3025, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001497598630323192, | |
| "loss": 0.3022, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00014971820057966672, | |
| "loss": 0.3016, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00014967321443023538, | |
| "loss": 0.3012, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00014962490672866348, | |
| "loss": 0.3006, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00014957327977793857, | |
| "loss": 0.3003, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00014951833603928788, | |
| "loss": 0.3008, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001494600781320608, | |
| "loss": 0.2995, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00014939850883360407, | |
| "loss": 0.2988, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001493336310791294, | |
| "loss": 0.2988, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00014926544796157356, | |
| "loss": 0.2985, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014919396273145084, | |
| "loss": 0.2979, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014911917879669812, | |
| "loss": 0.2967, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014904109972251249, | |
| "loss": 0.2976, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014895972923118116, | |
| "loss": 0.2972, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00014887507120190407, | |
| "loss": 0.2965, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014878712967060896, | |
| "loss": 0.2956, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014869590882975894, | |
| "loss": 0.2954, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00014860141302815264, | |
| "loss": 0.2952, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00014850364677071684, | |
| "loss": 0.2955, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00014840261471829187, | |
| "loss": 0.2948, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00014829832168740913, | |
| "loss": 0.2945, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001481907726500617, | |
| "loss": 0.2938, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00014807997273346728, | |
| "loss": 0.2936, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00014796592721982364, | |
| "loss": 0.2932, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0001478486415460569, | |
| "loss": 0.2938, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00014772812130356235, | |
| "loss": 0.2931, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00014760437223793778, | |
| "loss": 0.2933, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014747740024870972, | |
| "loss": 0.2924, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014734721138905203, | |
| "loss": 0.2915, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014721381186549743, | |
| "loss": 0.2916, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00014707720803764163, | |
| "loss": 0.2921, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00014693740641784, | |
| "loss": 0.2914, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014679441367089737, | |
| "loss": 0.292, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014664823661374998, | |
| "loss": 0.2909, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001464988822151407, | |
| "loss": 0.2913, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00014634635759528682, | |
| "loss": 0.2905, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00014619067002554046, | |
| "loss": 0.2911, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.000146031826928042, | |
| "loss": 0.2901, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00014586983587536634, | |
| "loss": 0.2902, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001457047045901617, | |
| "loss": 0.2897, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001455364409447816, | |
| "loss": 0.29, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00014536505296090948, | |
| "loss": 0.2891, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00014519054880917634, | |
| "loss": 0.2896, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00014501293680877115, | |
| "loss": 0.2887, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00014483222542704434, | |
| "loss": 0.2891, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00014464842327910406, | |
| "loss": 0.2892, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00014446153912740542, | |
| "loss": 0.2885, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00014427158188133295, | |
| "loss": 0.2888, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00014407856059677564, | |
| "loss": 0.288, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001438824844756953, | |
| "loss": 0.2877, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00014368336286568802, | |
| "loss": 0.2875, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00014348120525953817, | |
| "loss": 0.2879, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0001432760212947663, | |
| "loss": 0.287, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00014306782075316932, | |
| "loss": 0.2875, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00014285661356035442, | |
| "loss": 0.287, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001426424097852657, | |
| "loss": 0.2868, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00014242521963970427, | |
| "loss": 0.2868, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00014220505347784133, | |
| "loss": 0.287, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00014198192179572467, | |
| "loss": 0.2863, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00014175583523077823, | |
| "loss": 0.2867, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00014152680456129484, | |
| "loss": 0.2864, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001412948407059227, | |
| "loss": 0.2861, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00014105995472314447, | |
| "loss": 0.2865, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00014082215781075044, | |
| "loss": 0.2857, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00014058146130530442, | |
| "loss": 0.2857, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00014033787668160343, | |
| "loss": 0.2852, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0001400914155521306, | |
| "loss": 0.2851, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00013984208966650165, | |
| "loss": 0.2856, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00013958991091090456, | |
| "loss": 0.2849, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00013933489130753317, | |
| "loss": 0.2845, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00013907704301401386, | |
| "loss": 0.2844, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00013881637832282597, | |
| "loss": 0.2852, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00013855290966071585, | |
| "loss": 0.2841, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00013828664958810441, | |
| "loss": 0.2843, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00013801761079848836, | |
| "loss": 0.2834, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00013774580611783485, | |
| "loss": 0.2842, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00013747124850397037, | |
| "loss": 0.2832, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00013719395104596277, | |
| "loss": 0.2836, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00013691392696349726, | |
| "loss": 0.2838, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0001366311896062463, | |
| "loss": 0.2835, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00013634575245323311, | |
| "loss": 0.2835, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00013605762911218905, | |
| "loss": 0.2831, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00013576683331890498, | |
| "loss": 0.2829, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0001354733789365763, | |
| "loss": 0.2825, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00013517727995514226, | |
| "loss": 0.2828, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00013487855049061875, | |
| "loss": 0.2826, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00013457720478442554, | |
| "loss": 0.2826, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00013427325720270729, | |
| "loss": 0.2824, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00013396672223564855, | |
| "loss": 0.2822, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00013365761449678324, | |
| "loss": 0.2819, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00013334594872229763, | |
| "loss": 0.282, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00013303173977032815, | |
| "loss": 0.2815, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00013271500262025278, | |
| "loss": 0.2815, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00013239575237197705, | |
| "loss": 0.2818, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00013207400424521432, | |
| "loss": 0.2818, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0001317497735787599, | |
| "loss": 0.2817, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0001314230758297601, | |
| "loss": 0.2817, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.000131093926572975, | |
| "loss": 0.2809, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00013076234150003634, | |
| "loss": 0.281, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00013042833641869922, | |
| "loss": 0.2816, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00013009192725208846, | |
| "loss": 0.2812, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00012975313003793962, | |
| "loss": 0.2814, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00012941196092783439, | |
| "loss": 0.2803, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00012906843618643046, | |
| "loss": 0.2813, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00012872257219068633, | |
| "loss": 0.2809, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00012837438542908053, | |
| "loss": 0.2802, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00012802389250082536, | |
| "loss": 0.2806, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001276711101150758, | |
| "loss": 0.2806, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0001273160550901328, | |
| "loss": 0.2801, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00012695874435264155, | |
| "loss": 0.2797, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00012659919493678443, | |
| "loss": 0.2801, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0001262374239834691, | |
| "loss": 0.28, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00012587344873951116, | |
| "loss": 0.2797, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.0001255072865568121, | |
| "loss": 0.2798, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00012513895489153193, | |
| "loss": 0.2798, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00012476847130325712, | |
| "loss": 0.2792, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.0001243958534541634, | |
| "loss": 0.2795, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00012402111910817367, | |
| "loss": 0.2796, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00012364428613011134, | |
| "loss": 0.2785, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0001232653724848485, | |
| "loss": 0.2793, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0001228843962364495, | |
| "loss": 0.2787, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0001225013755473098, | |
| "loss": 0.2797, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00012211632867729008, | |
| "loss": 0.2791, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00012172927398284578, | |
| "loss": 0.2787, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00012134022991615195, | |
| "loss": 0.279, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00012094921502422359, | |
| "loss": 0.2788, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00012055624794803143, | |
| "loss": 0.2787, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00012016134742161328, | |
| "loss": 0.2789, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0001197645322711809, | |
| "loss": 0.2783, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00011936582141422247, | |
| "loss": 0.2782, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00011896523385860084, | |
| "loss": 0.2783, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00011856278870164717, | |
| "loss": 0.2775, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00011815850512925069, | |
| "loss": 0.2779, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00011775240241494394, | |
| "loss": 0.2777, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00011734449991898394, | |
| "loss": 0.2772, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00011693481708742928, | |
| "loss": 0.2778, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.000116523373451213, | |
| "loss": 0.2773, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0001161101886252115, | |
| "loss": 0.2779, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00011569528230730951, | |
| "loss": 0.2778, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00011527867427746085, | |
| "loss": 0.2777, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.00011486038439674568, | |
| "loss": 0.2772, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.00011444043260642344, | |
| "loss": 0.2773, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00011401883892698241, | |
| "loss": 0.2775, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00011359562345718505, | |
| "loss": 0.2766, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00011317080637310989, | |
| "loss": 0.2763, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.00011274440792718983, | |
| "loss": 0.2765, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0001123164484472464, | |
| "loss": 0.2768, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00011188694833552081, | |
| "loss": 0.276, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00011145592806770127, | |
| "loss": 0.2765, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00011102340819194689, | |
| "loss": 0.2763, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00011058940932790795, | |
| "loss": 0.2763, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00011015395216574308, | |
| "loss": 0.2764, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00010971705746513272, | |
| "loss": 0.276, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00010927874605428955, | |
| "loss": 0.2759, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00010883903882896555, | |
| "loss": 0.276, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00010839795675145566, | |
| "loss": 0.2756, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00010795552084959867, | |
| "loss": 0.2756, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.00010751175221577465, | |
| "loss": 0.2753, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.00010706667200589931, | |
| "loss": 0.2762, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00010662030143841562, | |
| "loss": 0.2754, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00010617266179328208, | |
| "loss": 0.2755, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00010572377441095834, | |
| "loss": 0.2759, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.00010527366069138784, | |
| "loss": 0.2754, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.00010482234209297747, | |
| "loss": 0.2756, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.0001043698401315748, | |
| "loss": 0.2752, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00010391617637944215, | |
| "loss": 0.2751, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00010346137246422819, | |
| "loss": 0.2745, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00010300545006793699, | |
| "loss": 0.2747, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00010254843092589434, | |
| "loss": 0.2746, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00010209033682571144, | |
| "loss": 0.2749, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00010163118960624632, | |
| "loss": 0.2739, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.0001011710111565627, | |
| "loss": 0.2746, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00010070982341488645, | |
| "loss": 0.2744, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00010024764836755973, | |
| "loss": 0.2742, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 9.978450804799277e-05, | |
| "loss": 0.2737, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 9.932042453561354e-05, | |
| "loss": 0.2747, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 9.885541995481517e-05, | |
| "loss": 0.2741, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 9.838951647390106e-05, | |
| "loss": 0.2747, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 9.792273630402829e-05, | |
| "loss": 0.2739, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 9.745510169814845e-05, | |
| "loss": 0.2748, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 9.698663494994703e-05, | |
| "loss": 0.2736, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 9.65173583927804e-05, | |
| "loss": 0.2732, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 9.604729439861125e-05, | |
| "loss": 0.274, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 9.5576465376942e-05, | |
| "loss": 0.2739, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 9.510489377374642e-05, | |
| "loss": 0.2723, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 9.463260207039961e-05, | |
| "loss": 0.2732, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 9.415961278260625e-05, | |
| "loss": 0.2742, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 9.36859484593271e-05, | |
| "loss": 0.2729, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 9.321163168170416e-05, | |
| "loss": 0.2732, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 9.273668506198407e-05, | |
| "loss": 0.2728, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 9.226113124244006e-05, | |
| "loss": 0.2726, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 9.178499289429266e-05, | |
| "loss": 0.273, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 9.130829271662884e-05, | |
| "loss": 0.2731, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 9.083105343531967e-05, | |
| "loss": 0.2726, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 9.035329780193725e-05, | |
| "loss": 0.2728, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 8.987504859266978e-05, | |
| "loss": 0.2727, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 8.939632860723587e-05, | |
| "loss": 0.272, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 8.891716066779766e-05, | |
| "loss": 0.2726, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 8.843756761787259e-05, | |
| "loss": 0.272, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 8.79575723212447e-05, | |
| "loss": 0.2716, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 8.747719766087431e-05, | |
| "loss": 0.272, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 8.69964665378073e-05, | |
| "loss": 0.2723, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 8.651540187008339e-05, | |
| "loss": 0.2719, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 8.603402659164335e-05, | |
| "loss": 0.2723, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 8.555236365123586e-05, | |
| "loss": 0.2714, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 8.507043601132336e-05, | |
| "loss": 0.2719, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 8.458826664698743e-05, | |
| "loss": 0.2717, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 8.410587854483341e-05, | |
| "loss": 0.2719, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 8.362329470189466e-05, | |
| "loss": 0.2715, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 8.314053812453604e-05, | |
| "loss": 0.272, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 8.265763182735732e-05, | |
| "loss": 0.271, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 8.217459883209592e-05, | |
| "loss": 0.2715, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 8.169146216652924e-05, | |
| "loss": 0.2713, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 8.12082448633772e-05, | |
| "loss": 0.2708, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 8.072496995920378e-05, | |
| "loss": 0.2708, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 8.024166049331909e-05, | |
| "loss": 0.2706, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 7.975833950668091e-05, | |
| "loss": 0.2705, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 7.927503004079623e-05, | |
| "loss": 0.2707, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 7.87917551366228e-05, | |
| "loss": 0.2705, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 7.830853783347074e-05, | |
| "loss": 0.2703, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 7.782540116790411e-05, | |
| "loss": 0.2703, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 7.734236817264267e-05, | |
| "loss": 0.2697, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 7.685946187546397e-05, | |
| "loss": 0.2699, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 7.637670529810532e-05, | |
| "loss": 0.2703, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 7.589412145516656e-05, | |
| "loss": 0.2701, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 7.541173335301255e-05, | |
| "loss": 0.2704, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 7.492956398867663e-05, | |
| "loss": 0.2701, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 7.444763634876413e-05, | |
| "loss": 0.2703, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 7.396597340835663e-05, | |
| "loss": 0.2701, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 7.34845981299166e-05, | |
| "loss": 0.2703, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 7.300353346219268e-05, | |
| "loss": 0.2694, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 7.252280233912569e-05, | |
| "loss": 0.2695, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 7.20424276787553e-05, | |
| "loss": 0.2694, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.156243238212739e-05, | |
| "loss": 0.2701, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.108283933220234e-05, | |
| "loss": 0.2696, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 7.060367139276414e-05, | |
| "loss": 0.2696, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 7.012495140733023e-05, | |
| "loss": 0.2694, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 6.964670219806273e-05, | |
| "loss": 0.2691, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 6.91689465646803e-05, | |
| "loss": 0.2686, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 6.869170728337114e-05, | |
| "loss": 0.2692, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 6.82150071057073e-05, | |
| "loss": 0.2691, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 6.773886875755992e-05, | |
| "loss": 0.2684, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 6.726331493801593e-05, | |
| "loss": 0.2688, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 6.678836831829584e-05, | |
| "loss": 0.2693, | |
| "step": 300000 | |
| } | |
| ], | |
| "max_steps": 500000, | |
| "num_train_epochs": 3, | |
| "total_flos": 1.1529473128203587e+21, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |