| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 28.5, | |
| "global_step": 2786844, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.020000000000000004, | |
| "loss": 5.3687, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.04000000000000001, | |
| "loss": 4.0531, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.04993567245443037, | |
| "loss": 3.8149, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0498070173632911, | |
| "loss": 3.6549, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.04967836227215183, | |
| "loss": 3.5544, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.049549707181012564, | |
| "loss": 3.4935, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0494210520898733, | |
| "loss": 3.447, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.049292396998734035, | |
| "loss": 3.4134, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.04916374190759477, | |
| "loss": 3.3861, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.049035086816455506, | |
| "loss": 3.3523, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.04890643172531624, | |
| "loss": 3.3063, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.04877777663417697, | |
| "loss": 3.2983, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.0486491215430377, | |
| "loss": 3.2874, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.04852046645189844, | |
| "loss": 3.2785, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.04839181136075917, | |
| "loss": 3.2698, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.048263156269619904, | |
| "loss": 3.2599, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.04813450117848064, | |
| "loss": 3.2503, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.048005846087341375, | |
| "loss": 3.2432, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.04787719099620211, | |
| "loss": 3.2347, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.04774853590506284, | |
| "loss": 3.1812, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 0.04761988081392357, | |
| "loss": 3.1865, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 0.0474912257227843, | |
| "loss": 3.1873, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 0.047362570631645035, | |
| "loss": 3.1842, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 0.047233915540505766, | |
| "loss": 3.1824, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.047105260449366505, | |
| "loss": 3.1806, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 0.04697660535822724, | |
| "loss": 3.177, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 0.04684795026708797, | |
| "loss": 3.1741, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 0.04671929517594871, | |
| "loss": 3.1709, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 0.04659064008480944, | |
| "loss": 3.1682, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 0.04646198499367017, | |
| "loss": 3.1382, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 0.046333329902530904, | |
| "loss": 3.1283, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 0.04620467481139164, | |
| "loss": 3.1318, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 0.046076019720252374, | |
| "loss": 3.1319, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 0.045947364629113106, | |
| "loss": 3.1335, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 0.04581870953797384, | |
| "loss": 3.1326, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 0.04569005444683458, | |
| "loss": 3.1306, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 0.04556139935569531, | |
| "loss": 3.1289, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 0.04543274426455604, | |
| "loss": 3.1275, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 0.04530408917341677, | |
| "loss": 3.1259, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 0.045175434082277505, | |
| "loss": 3.0882, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 0.04504677899113824, | |
| "loss": 3.094, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 0.04491812389999897, | |
| "loss": 3.0958, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 0.04478946880885971, | |
| "loss": 3.0976, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 0.04466081371772044, | |
| "loss": 3.0996, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 0.04453215862658117, | |
| "loss": 3.099, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 0.04440350353544191, | |
| "loss": 3.1, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 0.04427484844430264, | |
| "loss": 3.0993, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 0.044146193353163374, | |
| "loss": 3.0985, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 0.044017538262024106, | |
| "loss": 3.094, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 0.043888883170884845, | |
| "loss": 3.0646, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 0.043760228079745576, | |
| "loss": 3.0694, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 0.04363157298860631, | |
| "loss": 3.0712, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 0.04350291789746704, | |
| "loss": 3.0736, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 0.04337426280632778, | |
| "loss": 3.0752, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 0.04324560771518851, | |
| "loss": 3.0759, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 0.04311695262404924, | |
| "loss": 3.0762, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 0.042988297532909975, | |
| "loss": 3.0756, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 0.04285964244177071, | |
| "loss": 3.076, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 0.04273098735063144, | |
| "loss": 3.0412, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 0.04260233225949217, | |
| "loss": 3.0464, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 0.04247367716835291, | |
| "loss": 3.05, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 0.04234502207721364, | |
| "loss": 3.0539, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 0.04221636698607437, | |
| "loss": 3.0554, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 0.042087711894935105, | |
| "loss": 3.0558, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 0.041959056803795844, | |
| "loss": 3.057, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 0.041830401712656576, | |
| "loss": 3.0584, | |
| "step": 660000 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 0.04170174662151731, | |
| "loss": 3.0585, | |
| "step": 670000 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 0.04157309153037805, | |
| "loss": 3.0593, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 0.04144443643923878, | |
| "loss": 3.0408, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 0.04131578134809951, | |
| "loss": 3.0325, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 0.04118712625696024, | |
| "loss": 3.035, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 0.04105847116582098, | |
| "loss": 3.0373, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 0.04092981607468171, | |
| "loss": 3.0405, | |
| "step": 730000 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 0.040801160983542445, | |
| "loss": 3.0403, | |
| "step": 740000 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 0.04067250589240318, | |
| "loss": 3.0431, | |
| "step": 750000 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 0.04054385080126391, | |
| "loss": 3.0444, | |
| "step": 760000 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 0.04041519571012464, | |
| "loss": 3.0445, | |
| "step": 770000 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 0.04028654061898537, | |
| "loss": 3.0452, | |
| "step": 780000 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 0.04015788552784611, | |
| "loss": 3.0217, | |
| "step": 790000 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 0.04002923043670684, | |
| "loss": 3.02, | |
| "step": 800000 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "learning_rate": 0.039900575345567575, | |
| "loss": 3.0233, | |
| "step": 810000 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 0.03977192025442831, | |
| "loss": 3.0259, | |
| "step": 820000 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "learning_rate": 0.039643265163289046, | |
| "loss": 3.0271, | |
| "step": 830000 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "learning_rate": 0.03951461007214978, | |
| "loss": 3.0121, | |
| "step": 840000 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 0.03938595498101051, | |
| "loss": 3.0161, | |
| "step": 850000 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "learning_rate": 0.03925729988987125, | |
| "loss": 3.0195, | |
| "step": 860000 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 0.03912864479873198, | |
| "loss": 3.021, | |
| "step": 870000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 0.03899998970759271, | |
| "loss": 3.0228, | |
| "step": 880000 | |
| }, | |
| { | |
| "epoch": 9.1, | |
| "learning_rate": 0.038871334616453444, | |
| "loss": 3.0073, | |
| "step": 890000 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 0.03874267952531418, | |
| "loss": 3.0114, | |
| "step": 900000 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 0.038614024434174915, | |
| "loss": 3.0151, | |
| "step": 910000 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 0.03848536934303565, | |
| "loss": 3.0175, | |
| "step": 920000 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 0.03835671425189638, | |
| "loss": 3.0193, | |
| "step": 930000 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "learning_rate": 0.03822805916075711, | |
| "loss": 3.0185, | |
| "step": 940000 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "learning_rate": 0.03809940406961784, | |
| "loss": 3.0228, | |
| "step": 950000 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 0.037970748978478575, | |
| "loss": 3.0226, | |
| "step": 960000 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "learning_rate": 0.03784209388733931, | |
| "loss": 3.0227, | |
| "step": 970000 | |
| }, | |
| { | |
| "epoch": 10.02, | |
| "learning_rate": 0.037713438796200045, | |
| "loss": 3.017, | |
| "step": 980000 | |
| }, | |
| { | |
| "epoch": 10.12, | |
| "learning_rate": 0.03758478370506078, | |
| "loss": 2.9992, | |
| "step": 990000 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "learning_rate": 0.03745612861392151, | |
| "loss": 3.0007, | |
| "step": 1000000 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "learning_rate": 0.03732747352278225, | |
| "loss": 3.0047, | |
| "step": 1010000 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 0.03719881843164298, | |
| "loss": 3.0075, | |
| "step": 1020000 | |
| }, | |
| { | |
| "epoch": 10.53, | |
| "learning_rate": 0.03707016334050371, | |
| "loss": 3.0095, | |
| "step": 1030000 | |
| }, | |
| { | |
| "epoch": 10.64, | |
| "learning_rate": 0.03694150824936445, | |
| "loss": 3.0101, | |
| "step": 1040000 | |
| }, | |
| { | |
| "epoch": 10.74, | |
| "learning_rate": 0.03681285315822518, | |
| "loss": 3.0123, | |
| "step": 1050000 | |
| }, | |
| { | |
| "epoch": 10.84, | |
| "learning_rate": 0.036684198067085914, | |
| "loss": 3.013, | |
| "step": 1060000 | |
| }, | |
| { | |
| "epoch": 10.94, | |
| "learning_rate": 0.036555542975946646, | |
| "loss": 3.0121, | |
| "step": 1070000 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "learning_rate": 0.03642688788480738, | |
| "loss": 2.9892, | |
| "step": 1080000 | |
| }, | |
| { | |
| "epoch": 11.15, | |
| "learning_rate": 0.03629823279366811, | |
| "loss": 2.9908, | |
| "step": 1090000 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "learning_rate": 0.03616957770252884, | |
| "loss": 2.9941, | |
| "step": 1100000 | |
| }, | |
| { | |
| "epoch": 11.35, | |
| "learning_rate": 0.036040922611389574, | |
| "loss": 2.9976, | |
| "step": 1110000 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "learning_rate": 0.03591226752025031, | |
| "loss": 2.9992, | |
| "step": 1120000 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "learning_rate": 0.035783612429111045, | |
| "loss": 3.0007, | |
| "step": 1130000 | |
| }, | |
| { | |
| "epoch": 11.66, | |
| "learning_rate": 0.03565495733797178, | |
| "loss": 3.0021, | |
| "step": 1140000 | |
| }, | |
| { | |
| "epoch": 11.76, | |
| "learning_rate": 0.035526302246832515, | |
| "loss": 3.0032, | |
| "step": 1150000 | |
| }, | |
| { | |
| "epoch": 11.86, | |
| "learning_rate": 0.03539764715569325, | |
| "loss": 3.0057, | |
| "step": 1160000 | |
| }, | |
| { | |
| "epoch": 11.97, | |
| "learning_rate": 0.03526899206455398, | |
| "loss": 3.0053, | |
| "step": 1170000 | |
| }, | |
| { | |
| "epoch": 12.07, | |
| "learning_rate": 0.03514033697341471, | |
| "loss": 2.9898, | |
| "step": 1180000 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "learning_rate": 0.03501168188227545, | |
| "loss": 2.9848, | |
| "step": 1190000 | |
| }, | |
| { | |
| "epoch": 12.27, | |
| "learning_rate": 0.03488302679113618, | |
| "loss": 2.9871, | |
| "step": 1200000 | |
| }, | |
| { | |
| "epoch": 12.37, | |
| "learning_rate": 0.034754371699996914, | |
| "loss": 2.9903, | |
| "step": 1210000 | |
| }, | |
| { | |
| "epoch": 12.48, | |
| "learning_rate": 0.03462571660885765, | |
| "loss": 2.9918, | |
| "step": 1220000 | |
| }, | |
| { | |
| "epoch": 12.58, | |
| "learning_rate": 0.034497061517718385, | |
| "loss": 2.9948, | |
| "step": 1230000 | |
| }, | |
| { | |
| "epoch": 12.68, | |
| "learning_rate": 0.034368406426579116, | |
| "loss": 2.9955, | |
| "step": 1240000 | |
| }, | |
| { | |
| "epoch": 12.78, | |
| "learning_rate": 0.03423975133543985, | |
| "loss": 2.9971, | |
| "step": 1250000 | |
| }, | |
| { | |
| "epoch": 12.89, | |
| "learning_rate": 0.03411109624430058, | |
| "loss": 2.9978, | |
| "step": 1260000 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "learning_rate": 0.03398244115316131, | |
| "loss": 2.9985, | |
| "step": 1270000 | |
| }, | |
| { | |
| "epoch": 13.09, | |
| "learning_rate": 0.033853786062022044, | |
| "loss": 2.9789, | |
| "step": 1280000 | |
| }, | |
| { | |
| "epoch": 13.19, | |
| "learning_rate": 0.033725130970882776, | |
| "loss": 2.9795, | |
| "step": 1290000 | |
| }, | |
| { | |
| "epoch": 13.29, | |
| "learning_rate": 0.033596475879743515, | |
| "loss": 2.9835, | |
| "step": 1300000 | |
| }, | |
| { | |
| "epoch": 13.4, | |
| "learning_rate": 0.03346782078860425, | |
| "loss": 2.9829, | |
| "step": 1310000 | |
| }, | |
| { | |
| "epoch": 13.5, | |
| "learning_rate": 0.03333916569746498, | |
| "loss": 2.9869, | |
| "step": 1320000 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "learning_rate": 0.03321051060632572, | |
| "loss": 2.9755, | |
| "step": 1330000 | |
| }, | |
| { | |
| "epoch": 13.7, | |
| "learning_rate": 0.03308185551518645, | |
| "loss": 2.978, | |
| "step": 1340000 | |
| }, | |
| { | |
| "epoch": 13.81, | |
| "learning_rate": 0.03295320042404718, | |
| "loss": 2.9811, | |
| "step": 1350000 | |
| }, | |
| { | |
| "epoch": 13.91, | |
| "learning_rate": 0.03282454533290791, | |
| "loss": 2.9824, | |
| "step": 1360000 | |
| }, | |
| { | |
| "epoch": 14.01, | |
| "learning_rate": 0.03269589024176865, | |
| "loss": 2.9832, | |
| "step": 1370000 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "learning_rate": 0.032567235150629384, | |
| "loss": 2.9726, | |
| "step": 1380000 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "learning_rate": 0.032438580059490116, | |
| "loss": 2.9762, | |
| "step": 1390000 | |
| }, | |
| { | |
| "epoch": 14.32, | |
| "learning_rate": 0.03230992496835085, | |
| "loss": 2.9786, | |
| "step": 1400000 | |
| }, | |
| { | |
| "epoch": 14.42, | |
| "learning_rate": 0.03218126987721159, | |
| "loss": 2.9804, | |
| "step": 1410000 | |
| }, | |
| { | |
| "epoch": 14.52, | |
| "learning_rate": 0.03205261478607232, | |
| "loss": 2.9821, | |
| "step": 1420000 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "learning_rate": 0.03192395969493305, | |
| "loss": 2.9825, | |
| "step": 1430000 | |
| }, | |
| { | |
| "epoch": 14.73, | |
| "learning_rate": 0.03179530460379378, | |
| "loss": 2.985, | |
| "step": 1440000 | |
| }, | |
| { | |
| "epoch": 14.83, | |
| "learning_rate": 0.031666649512654514, | |
| "loss": 2.9851, | |
| "step": 1450000 | |
| }, | |
| { | |
| "epoch": 14.93, | |
| "learning_rate": 0.031537994421515246, | |
| "loss": 2.9859, | |
| "step": 1460000 | |
| }, | |
| { | |
| "epoch": 15.03, | |
| "learning_rate": 0.03140933933037598, | |
| "loss": 2.9795, | |
| "step": 1470000 | |
| }, | |
| { | |
| "epoch": 15.14, | |
| "learning_rate": 0.03128068423923672, | |
| "loss": 2.9681, | |
| "step": 1480000 | |
| }, | |
| { | |
| "epoch": 15.24, | |
| "learning_rate": 0.03115202914809745, | |
| "loss": 2.9707, | |
| "step": 1490000 | |
| }, | |
| { | |
| "epoch": 15.34, | |
| "learning_rate": 0.03102337405695818, | |
| "loss": 2.9727, | |
| "step": 1500000 | |
| }, | |
| { | |
| "epoch": 15.44, | |
| "learning_rate": 0.03089471896581892, | |
| "loss": 2.9747, | |
| "step": 1510000 | |
| }, | |
| { | |
| "epoch": 15.54, | |
| "learning_rate": 0.03076606387467965, | |
| "loss": 2.9769, | |
| "step": 1520000 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "learning_rate": 0.030637408783540383, | |
| "loss": 2.9778, | |
| "step": 1530000 | |
| }, | |
| { | |
| "epoch": 15.75, | |
| "learning_rate": 0.030508753692401115, | |
| "loss": 2.9788, | |
| "step": 1540000 | |
| }, | |
| { | |
| "epoch": 15.85, | |
| "learning_rate": 0.030380098601261854, | |
| "loss": 2.9789, | |
| "step": 1550000 | |
| }, | |
| { | |
| "epoch": 15.95, | |
| "learning_rate": 0.030251443510122586, | |
| "loss": 2.9807, | |
| "step": 1560000 | |
| }, | |
| { | |
| "epoch": 16.06, | |
| "learning_rate": 0.030122788418983318, | |
| "loss": 2.9619, | |
| "step": 1570000 | |
| }, | |
| { | |
| "epoch": 16.16, | |
| "learning_rate": 0.02999413332784405, | |
| "loss": 2.9638, | |
| "step": 1580000 | |
| }, | |
| { | |
| "epoch": 16.26, | |
| "learning_rate": 0.029865478236704785, | |
| "loss": 2.9654, | |
| "step": 1590000 | |
| }, | |
| { | |
| "epoch": 16.36, | |
| "learning_rate": 0.029736823145565517, | |
| "loss": 2.9679, | |
| "step": 1600000 | |
| }, | |
| { | |
| "epoch": 16.46, | |
| "learning_rate": 0.02960816805442625, | |
| "loss": 2.9704, | |
| "step": 1610000 | |
| }, | |
| { | |
| "epoch": 16.57, | |
| "learning_rate": 0.029479512963286988, | |
| "loss": 2.9726, | |
| "step": 1620000 | |
| }, | |
| { | |
| "epoch": 16.67, | |
| "learning_rate": 0.02935085787214772, | |
| "loss": 2.9725, | |
| "step": 1630000 | |
| }, | |
| { | |
| "epoch": 16.77, | |
| "learning_rate": 0.02922220278100845, | |
| "loss": 2.9738, | |
| "step": 1640000 | |
| }, | |
| { | |
| "epoch": 16.87, | |
| "learning_rate": 0.029093547689869183, | |
| "loss": 2.9747, | |
| "step": 1650000 | |
| }, | |
| { | |
| "epoch": 16.98, | |
| "learning_rate": 0.02896489259872992, | |
| "loss": 2.9763, | |
| "step": 1660000 | |
| }, | |
| { | |
| "epoch": 17.08, | |
| "learning_rate": 0.02883623750759065, | |
| "loss": 2.9617, | |
| "step": 1670000 | |
| }, | |
| { | |
| "epoch": 17.18, | |
| "learning_rate": 0.028707582416451383, | |
| "loss": 2.9618, | |
| "step": 1680000 | |
| }, | |
| { | |
| "epoch": 17.28, | |
| "learning_rate": 0.028578927325312115, | |
| "loss": 2.9612, | |
| "step": 1690000 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "learning_rate": 0.028450272234172853, | |
| "loss": 2.9632, | |
| "step": 1700000 | |
| }, | |
| { | |
| "epoch": 17.49, | |
| "learning_rate": 0.028321617143033585, | |
| "loss": 2.9655, | |
| "step": 1710000 | |
| }, | |
| { | |
| "epoch": 17.59, | |
| "learning_rate": 0.028192962051894317, | |
| "loss": 2.9672, | |
| "step": 1720000 | |
| }, | |
| { | |
| "epoch": 17.69, | |
| "learning_rate": 0.028064306960755056, | |
| "loss": 2.9691, | |
| "step": 1730000 | |
| }, | |
| { | |
| "epoch": 17.79, | |
| "learning_rate": 0.027935651869615788, | |
| "loss": 2.9698, | |
| "step": 1740000 | |
| }, | |
| { | |
| "epoch": 17.9, | |
| "learning_rate": 0.02780699677847652, | |
| "loss": 2.9702, | |
| "step": 1750000 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 0.027678341687337252, | |
| "loss": 2.9719, | |
| "step": 1760000 | |
| }, | |
| { | |
| "epoch": 18.1, | |
| "learning_rate": 0.027549686596197987, | |
| "loss": 2.9546, | |
| "step": 1770000 | |
| }, | |
| { | |
| "epoch": 18.2, | |
| "learning_rate": 0.02742103150505872, | |
| "loss": 2.9567, | |
| "step": 1780000 | |
| }, | |
| { | |
| "epoch": 18.31, | |
| "learning_rate": 0.02729237641391945, | |
| "loss": 2.9586, | |
| "step": 1790000 | |
| }, | |
| { | |
| "epoch": 18.41, | |
| "learning_rate": 0.02716372132278019, | |
| "loss": 2.9606, | |
| "step": 1800000 | |
| }, | |
| { | |
| "epoch": 18.51, | |
| "learning_rate": 0.02703506623164092, | |
| "loss": 2.9506, | |
| "step": 1810000 | |
| }, | |
| { | |
| "epoch": 18.61, | |
| "learning_rate": 0.026906411140501654, | |
| "loss": 2.9518, | |
| "step": 1820000 | |
| }, | |
| { | |
| "epoch": 18.71, | |
| "learning_rate": 0.026777756049362385, | |
| "loss": 2.9575, | |
| "step": 1830000 | |
| }, | |
| { | |
| "epoch": 18.82, | |
| "learning_rate": 0.02664910095822312, | |
| "loss": 2.9584, | |
| "step": 1840000 | |
| }, | |
| { | |
| "epoch": 18.92, | |
| "learning_rate": 0.026520445867083853, | |
| "loss": 2.9594, | |
| "step": 1850000 | |
| }, | |
| { | |
| "epoch": 19.02, | |
| "learning_rate": 0.026391790775944585, | |
| "loss": 2.9578, | |
| "step": 1860000 | |
| }, | |
| { | |
| "epoch": 19.12, | |
| "learning_rate": 0.026263135684805317, | |
| "loss": 2.9535, | |
| "step": 1870000 | |
| }, | |
| { | |
| "epoch": 19.23, | |
| "learning_rate": 0.026134480593666055, | |
| "loss": 2.9552, | |
| "step": 1880000 | |
| }, | |
| { | |
| "epoch": 19.33, | |
| "learning_rate": 0.026005825502526787, | |
| "loss": 2.9568, | |
| "step": 1890000 | |
| }, | |
| { | |
| "epoch": 19.43, | |
| "learning_rate": 0.02587717041138752, | |
| "loss": 2.9583, | |
| "step": 1900000 | |
| }, | |
| { | |
| "epoch": 19.53, | |
| "learning_rate": 0.025748515320248258, | |
| "loss": 2.9596, | |
| "step": 1910000 | |
| }, | |
| { | |
| "epoch": 19.64, | |
| "learning_rate": 0.02561986022910899, | |
| "loss": 2.9599, | |
| "step": 1920000 | |
| }, | |
| { | |
| "epoch": 19.74, | |
| "learning_rate": 0.025491205137969722, | |
| "loss": 2.9615, | |
| "step": 1930000 | |
| }, | |
| { | |
| "epoch": 19.84, | |
| "learning_rate": 0.025362550046830454, | |
| "loss": 2.9622, | |
| "step": 1940000 | |
| }, | |
| { | |
| "epoch": 19.94, | |
| "learning_rate": 0.02523389495569119, | |
| "loss": 2.9637, | |
| "step": 1950000 | |
| }, | |
| { | |
| "epoch": 20.04, | |
| "learning_rate": 0.02510523986455192, | |
| "loss": 2.9559, | |
| "step": 1960000 | |
| }, | |
| { | |
| "epoch": 20.15, | |
| "learning_rate": 0.024976584773412653, | |
| "loss": 2.9496, | |
| "step": 1970000 | |
| }, | |
| { | |
| "epoch": 20.25, | |
| "learning_rate": 0.02484792968227339, | |
| "loss": 2.9516, | |
| "step": 1980000 | |
| }, | |
| { | |
| "epoch": 20.35, | |
| "learning_rate": 0.024719274591134124, | |
| "loss": 2.9522, | |
| "step": 1990000 | |
| }, | |
| { | |
| "epoch": 20.45, | |
| "learning_rate": 0.024590619499994856, | |
| "loss": 2.9538, | |
| "step": 2000000 | |
| }, | |
| { | |
| "epoch": 20.56, | |
| "learning_rate": 0.02446196440885559, | |
| "loss": 2.9552, | |
| "step": 2010000 | |
| }, | |
| { | |
| "epoch": 20.66, | |
| "learning_rate": 0.024333309317716323, | |
| "loss": 2.957, | |
| "step": 2020000 | |
| }, | |
| { | |
| "epoch": 20.76, | |
| "learning_rate": 0.024204654226577055, | |
| "loss": 2.9572, | |
| "step": 2030000 | |
| }, | |
| { | |
| "epoch": 20.86, | |
| "learning_rate": 0.024075999135437787, | |
| "loss": 2.9586, | |
| "step": 2040000 | |
| }, | |
| { | |
| "epoch": 20.96, | |
| "learning_rate": 0.023947344044298522, | |
| "loss": 2.9609, | |
| "step": 2050000 | |
| }, | |
| { | |
| "epoch": 21.07, | |
| "learning_rate": 0.023818688953159254, | |
| "loss": 2.9429, | |
| "step": 2060000 | |
| }, | |
| { | |
| "epoch": 21.17, | |
| "learning_rate": 0.02369003386201999, | |
| "loss": 2.947, | |
| "step": 2070000 | |
| }, | |
| { | |
| "epoch": 21.27, | |
| "learning_rate": 0.023561378770880725, | |
| "loss": 2.9488, | |
| "step": 2080000 | |
| }, | |
| { | |
| "epoch": 21.37, | |
| "learning_rate": 0.023432723679741457, | |
| "loss": 2.9491, | |
| "step": 2090000 | |
| }, | |
| { | |
| "epoch": 21.48, | |
| "learning_rate": 0.023304068588602192, | |
| "loss": 2.9514, | |
| "step": 2100000 | |
| }, | |
| { | |
| "epoch": 21.58, | |
| "learning_rate": 0.023175413497462924, | |
| "loss": 2.9513, | |
| "step": 2110000 | |
| }, | |
| { | |
| "epoch": 21.68, | |
| "learning_rate": 0.023046758406323656, | |
| "loss": 2.9537, | |
| "step": 2120000 | |
| }, | |
| { | |
| "epoch": 21.78, | |
| "learning_rate": 0.022918103315184388, | |
| "loss": 2.9548, | |
| "step": 2130000 | |
| }, | |
| { | |
| "epoch": 21.88, | |
| "learning_rate": 0.022789448224045123, | |
| "loss": 2.9573, | |
| "step": 2140000 | |
| }, | |
| { | |
| "epoch": 21.99, | |
| "learning_rate": 0.022660793132905855, | |
| "loss": 2.9565, | |
| "step": 2150000 | |
| }, | |
| { | |
| "epoch": 22.09, | |
| "learning_rate": 0.02253213804176659, | |
| "loss": 2.9432, | |
| "step": 2160000 | |
| }, | |
| { | |
| "epoch": 22.19, | |
| "learning_rate": 0.022403482950627322, | |
| "loss": 2.9445, | |
| "step": 2170000 | |
| }, | |
| { | |
| "epoch": 22.29, | |
| "learning_rate": 0.022274827859488058, | |
| "loss": 2.9465, | |
| "step": 2180000 | |
| }, | |
| { | |
| "epoch": 22.4, | |
| "learning_rate": 0.022146172768348793, | |
| "loss": 2.9478, | |
| "step": 2190000 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "learning_rate": 0.022017517677209525, | |
| "loss": 2.9474, | |
| "step": 2200000 | |
| }, | |
| { | |
| "epoch": 22.6, | |
| "learning_rate": 0.021888862586070257, | |
| "loss": 2.9497, | |
| "step": 2210000 | |
| }, | |
| { | |
| "epoch": 22.7, | |
| "learning_rate": 0.02176020749493099, | |
| "loss": 2.9507, | |
| "step": 2220000 | |
| }, | |
| { | |
| "epoch": 22.81, | |
| "learning_rate": 0.021631552403791724, | |
| "loss": 2.9518, | |
| "step": 2230000 | |
| }, | |
| { | |
| "epoch": 22.91, | |
| "learning_rate": 0.021502897312652456, | |
| "loss": 2.9523, | |
| "step": 2240000 | |
| }, | |
| { | |
| "epoch": 23.01, | |
| "learning_rate": 0.02137424222151319, | |
| "loss": 2.951, | |
| "step": 2250000 | |
| }, | |
| { | |
| "epoch": 23.11, | |
| "learning_rate": 0.021245587130373923, | |
| "loss": 2.9394, | |
| "step": 2260000 | |
| }, | |
| { | |
| "epoch": 23.21, | |
| "learning_rate": 0.02111693203923466, | |
| "loss": 2.9426, | |
| "step": 2270000 | |
| }, | |
| { | |
| "epoch": 23.32, | |
| "learning_rate": 0.020988276948095394, | |
| "loss": 2.9425, | |
| "step": 2280000 | |
| }, | |
| { | |
| "epoch": 23.42, | |
| "learning_rate": 0.020859621856956126, | |
| "loss": 2.9449, | |
| "step": 2290000 | |
| }, | |
| { | |
| "epoch": 23.52, | |
| "learning_rate": 0.020730966765816858, | |
| "loss": 2.939, | |
| "step": 2300000 | |
| }, | |
| { | |
| "epoch": 23.62, | |
| "learning_rate": 0.02060231167467759, | |
| "loss": 2.939, | |
| "step": 2310000 | |
| }, | |
| { | |
| "epoch": 23.73, | |
| "learning_rate": 0.020473656583538325, | |
| "loss": 2.9414, | |
| "step": 2320000 | |
| }, | |
| { | |
| "epoch": 23.83, | |
| "learning_rate": 0.020345001492399057, | |
| "loss": 2.9433, | |
| "step": 2330000 | |
| }, | |
| { | |
| "epoch": 23.93, | |
| "learning_rate": 0.020216346401259792, | |
| "loss": 2.9436, | |
| "step": 2340000 | |
| }, | |
| { | |
| "epoch": 24.03, | |
| "learning_rate": 0.020087691310120524, | |
| "loss": 2.9421, | |
| "step": 2350000 | |
| }, | |
| { | |
| "epoch": 24.13, | |
| "learning_rate": 0.01995903621898126, | |
| "loss": 2.9385, | |
| "step": 2360000 | |
| }, | |
| { | |
| "epoch": 24.24, | |
| "learning_rate": 0.019830381127841995, | |
| "loss": 2.9413, | |
| "step": 2370000 | |
| }, | |
| { | |
| "epoch": 24.34, | |
| "learning_rate": 0.019701726036702727, | |
| "loss": 2.9426, | |
| "step": 2380000 | |
| }, | |
| { | |
| "epoch": 24.44, | |
| "learning_rate": 0.01957307094556346, | |
| "loss": 2.9423, | |
| "step": 2390000 | |
| }, | |
| { | |
| "epoch": 24.54, | |
| "learning_rate": 0.01944441585442419, | |
| "loss": 2.9442, | |
| "step": 2400000 | |
| }, | |
| { | |
| "epoch": 24.65, | |
| "learning_rate": 0.019315760763284926, | |
| "loss": 2.9457, | |
| "step": 2410000 | |
| }, | |
| { | |
| "epoch": 24.75, | |
| "learning_rate": 0.019187105672145658, | |
| "loss": 2.9455, | |
| "step": 2420000 | |
| }, | |
| { | |
| "epoch": 24.85, | |
| "learning_rate": 0.019058450581006393, | |
| "loss": 2.947, | |
| "step": 2430000 | |
| }, | |
| { | |
| "epoch": 24.95, | |
| "learning_rate": 0.018929795489867125, | |
| "loss": 2.9464, | |
| "step": 2440000 | |
| }, | |
| { | |
| "epoch": 25.06, | |
| "learning_rate": 0.01880114039872786, | |
| "loss": 2.9411, | |
| "step": 2450000 | |
| }, | |
| { | |
| "epoch": 25.16, | |
| "learning_rate": 0.018672485307588593, | |
| "loss": 2.9362, | |
| "step": 2460000 | |
| }, | |
| { | |
| "epoch": 25.26, | |
| "learning_rate": 0.018543830216449324, | |
| "loss": 2.9373, | |
| "step": 2470000 | |
| }, | |
| { | |
| "epoch": 25.36, | |
| "learning_rate": 0.01841517512531006, | |
| "loss": 2.9395, | |
| "step": 2480000 | |
| }, | |
| { | |
| "epoch": 25.46, | |
| "learning_rate": 0.018286520034170792, | |
| "loss": 2.9404, | |
| "step": 2490000 | |
| }, | |
| { | |
| "epoch": 25.57, | |
| "learning_rate": 0.018157864943031527, | |
| "loss": 2.9412, | |
| "step": 2500000 | |
| }, | |
| { | |
| "epoch": 25.67, | |
| "learning_rate": 0.01802920985189226, | |
| "loss": 2.9425, | |
| "step": 2510000 | |
| }, | |
| { | |
| "epoch": 25.77, | |
| "learning_rate": 0.017900554760752994, | |
| "loss": 2.943, | |
| "step": 2520000 | |
| }, | |
| { | |
| "epoch": 25.87, | |
| "learning_rate": 0.017771899669613726, | |
| "loss": 2.9446, | |
| "step": 2530000 | |
| }, | |
| { | |
| "epoch": 25.98, | |
| "learning_rate": 0.01764324457847446, | |
| "loss": 2.945, | |
| "step": 2540000 | |
| }, | |
| { | |
| "epoch": 26.08, | |
| "learning_rate": 0.017514589487335194, | |
| "loss": 2.9327, | |
| "step": 2550000 | |
| }, | |
| { | |
| "epoch": 26.18, | |
| "learning_rate": 0.017385934396195925, | |
| "loss": 2.9341, | |
| "step": 2560000 | |
| }, | |
| { | |
| "epoch": 26.28, | |
| "learning_rate": 0.01725727930505666, | |
| "loss": 2.9366, | |
| "step": 2570000 | |
| }, | |
| { | |
| "epoch": 26.38, | |
| "learning_rate": 0.017128624213917393, | |
| "loss": 2.9376, | |
| "step": 2580000 | |
| }, | |
| { | |
| "epoch": 26.49, | |
| "learning_rate": 0.016999969122778128, | |
| "loss": 2.9373, | |
| "step": 2590000 | |
| }, | |
| { | |
| "epoch": 26.59, | |
| "learning_rate": 0.01687131403163886, | |
| "loss": 2.9388, | |
| "step": 2600000 | |
| }, | |
| { | |
| "epoch": 26.69, | |
| "learning_rate": 0.016742658940499595, | |
| "loss": 2.9404, | |
| "step": 2610000 | |
| }, | |
| { | |
| "epoch": 26.79, | |
| "learning_rate": 0.016614003849360327, | |
| "loss": 2.9411, | |
| "step": 2620000 | |
| }, | |
| { | |
| "epoch": 26.9, | |
| "learning_rate": 0.016485348758221063, | |
| "loss": 2.943, | |
| "step": 2630000 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 0.016356693667081795, | |
| "loss": 2.9421, | |
| "step": 2640000 | |
| }, | |
| { | |
| "epoch": 27.1, | |
| "learning_rate": 0.016228038575942526, | |
| "loss": 2.9313, | |
| "step": 2650000 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "learning_rate": 0.016099383484803262, | |
| "loss": 2.9337, | |
| "step": 2660000 | |
| }, | |
| { | |
| "epoch": 27.31, | |
| "learning_rate": 0.015970728393663994, | |
| "loss": 2.9341, | |
| "step": 2670000 | |
| }, | |
| { | |
| "epoch": 27.41, | |
| "learning_rate": 0.01584207330252473, | |
| "loss": 2.9353, | |
| "step": 2680000 | |
| }, | |
| { | |
| "epoch": 27.51, | |
| "learning_rate": 0.01571341821138546, | |
| "loss": 2.9359, | |
| "step": 2690000 | |
| }, | |
| { | |
| "epoch": 27.61, | |
| "learning_rate": 0.015584763120246196, | |
| "loss": 2.9363, | |
| "step": 2700000 | |
| }, | |
| { | |
| "epoch": 27.71, | |
| "learning_rate": 0.015456108029106928, | |
| "loss": 2.9387, | |
| "step": 2710000 | |
| }, | |
| { | |
| "epoch": 27.82, | |
| "learning_rate": 0.015327452937967662, | |
| "loss": 2.9388, | |
| "step": 2720000 | |
| }, | |
| { | |
| "epoch": 27.92, | |
| "learning_rate": 0.015198797846828394, | |
| "loss": 2.9399, | |
| "step": 2730000 | |
| }, | |
| { | |
| "epoch": 28.02, | |
| "learning_rate": 0.01507014275568913, | |
| "loss": 2.9384, | |
| "step": 2740000 | |
| }, | |
| { | |
| "epoch": 28.12, | |
| "learning_rate": 0.014941487664549863, | |
| "loss": 2.9305, | |
| "step": 2750000 | |
| }, | |
| { | |
| "epoch": 28.23, | |
| "learning_rate": 0.014812832573410595, | |
| "loss": 2.9325, | |
| "step": 2760000 | |
| }, | |
| { | |
| "epoch": 28.33, | |
| "learning_rate": 0.01468417748227133, | |
| "loss": 2.9332, | |
| "step": 2770000 | |
| }, | |
| { | |
| "epoch": 28.43, | |
| "learning_rate": 0.014555522391132062, | |
| "loss": 2.9339, | |
| "step": 2780000 | |
| } | |
| ], | |
| "max_steps": 3911360, | |
| "num_train_epochs": 40, | |
| "total_flos": 1.8137583487197538e+20, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |