{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0013335833802173, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.0, "epoch": 0.0, "learning_rate": 2e-05, "loss": 0.1583, "rl_loss": 0.0773, "step": 1 }, { "acc": 0.0, "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.1955, "rl_loss": -0.0025, "step": 10 }, { "acc": 0.0, "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.1993, "rl_loss": -0.0069, "step": 20 }, { "acc": 0.0, "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.2039, "rl_loss": -0.0095, "step": 30 }, { "acc": 0.0, "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.1866, "rl_loss": -0.0069, "step": 40 }, { "acc": 0.0, "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.2016, "rl_loss": -0.012, "step": 50 }, { "acc": 0.0, "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.1938, "rl_loss": -0.0042, "step": 60 }, { "acc": 0.0, "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.1973, "rl_loss": -0.0091, "step": 70 }, { "acc": 0.0, "epoch": 0.05, "learning_rate": 2e-05, "loss": 0.1937, "rl_loss": -0.0073, "step": 80 }, { "acc": 0.0, "epoch": 0.06, "learning_rate": 2e-05, "loss": 0.1995, "rl_loss": -0.0092, "step": 90 }, { "acc": 0.0, "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.1954, "rl_loss": -0.0138, "step": 100 }, { "acc": 0.0, "epoch": 0.07, "learning_rate": 2e-05, "loss": 0.1963, "rl_loss": -0.009, "step": 110 }, { "acc": 0.0, "epoch": 0.08, "learning_rate": 2e-05, "loss": 0.2045, "rl_loss": -0.0015, "step": 120 }, { "acc": 0.0, "epoch": 0.09, "learning_rate": 2e-05, "loss": 0.1878, "rl_loss": -0.0053, "step": 130 }, { "acc": 0.0, "epoch": 0.09, "learning_rate": 2e-05, "loss": 0.1941, "rl_loss": -0.0131, "step": 140 }, { "acc": 0.0, "epoch": 0.1, "learning_rate": 2e-05, "loss": 0.1934, "rl_loss": -0.0095, "step": 150 }, { "acc": 0.0, "epoch": 0.11, "learning_rate": 2e-05, "loss": 0.1897, "rl_loss": -0.0079, "step": 160 }, { "acc": 0.0, "epoch": 0.11, "learning_rate": 2e-05, "loss": 0.1909, "rl_loss": -0.0124, "step": 170 }, { "acc": 0.0, "epoch": 0.12, "learning_rate": 2e-05, "loss": 0.192, "rl_loss": -0.0121, "step": 180 }, { "acc": 0.0, "epoch": 0.13, "learning_rate": 2e-05, "loss": 0.1972, "rl_loss": 0.0008, "step": 190 }, { "acc": 0.0, "epoch": 0.13, "learning_rate": 2e-05, "loss": 0.2, "rl_loss": -0.0046, "step": 200 }, { "acc": 0.0, "epoch": 0.14, "learning_rate": 2e-05, "loss": 0.198, "rl_loss": -0.0104, "step": 210 }, { "acc": 0.0, "epoch": 0.15, "learning_rate": 2e-05, "loss": 0.1969, "rl_loss": -0.0194, "step": 220 }, { "acc": 0.0, "epoch": 0.15, "learning_rate": 2e-05, "loss": 0.1933, "rl_loss": -0.0105, "step": 230 }, { "acc": 0.0, "epoch": 0.16, "learning_rate": 2e-05, "loss": 0.181, "rl_loss": -0.0166, "step": 240 }, { "acc": 0.0, "epoch": 0.17, "learning_rate": 2e-05, "loss": 0.1853, "rl_loss": -0.0174, "step": 250 }, { "acc": 0.0, "epoch": 0.17, "learning_rate": 2e-05, "loss": 0.206, "rl_loss": -0.0192, "step": 260 }, { "acc": 0.0, "epoch": 0.18, "learning_rate": 2e-05, "loss": 0.1902, "rl_loss": -0.0071, "step": 270 }, { "acc": 0.0, "epoch": 0.19, "learning_rate": 2e-05, "loss": 0.2093, "rl_loss": -0.0172, "step": 280 }, { "acc": 0.0, "epoch": 0.19, "learning_rate": 2e-05, "loss": 0.1916, "rl_loss": -0.0226, "step": 290 }, { "acc": 0.0, "epoch": 0.2, "learning_rate": 2e-05, "loss": 0.1899, "rl_loss": -0.0175, "step": 300 }, { "acc": 0.0, "epoch": 0.21, "learning_rate": 2e-05, "loss": 0.2031, "rl_loss": -0.0283, "step": 310 }, { "acc": 0.0, "epoch": 0.21, "learning_rate": 2e-05, "loss": 0.2002, "rl_loss": -0.0079, "step": 320 }, { "acc": 0.0, "epoch": 0.22, "learning_rate": 2e-05, "loss": 0.2052, "rl_loss": -0.0195, "step": 330 }, { "acc": 0.0, "epoch": 0.23, "learning_rate": 2e-05, "loss": 0.207, "rl_loss": -0.0204, "step": 340 }, { "acc": 0.0, "epoch": 0.23, "learning_rate": 2e-05, "loss": 0.1994, "rl_loss": -0.0027, "step": 350 }, { "acc": 0.0, "epoch": 0.24, "learning_rate": 2e-05, "loss": 0.1969, "rl_loss": -0.0091, "step": 360 }, { "acc": 0.0, "epoch": 0.25, "learning_rate": 2e-05, "loss": 0.1933, "rl_loss": -0.0141, "step": 370 }, { "acc": 0.0, "epoch": 0.25, "learning_rate": 2e-05, "loss": 0.1981, "rl_loss": -0.0188, "step": 380 }, { "acc": 0.0, "epoch": 0.26, "learning_rate": 2e-05, "loss": 0.1954, "rl_loss": -0.0101, "step": 390 }, { "acc": 0.0, "epoch": 0.27, "learning_rate": 2e-05, "loss": 0.1946, "rl_loss": -0.0218, "step": 400 }, { "acc": 0.0, "epoch": 0.27, "learning_rate": 2e-05, "loss": 0.1913, "rl_loss": -0.0112, "step": 410 }, { "acc": 0.0, "epoch": 0.28, "learning_rate": 2e-05, "loss": 0.2008, "rl_loss": -0.006, "step": 420 }, { "acc": 0.0, "epoch": 0.29, "learning_rate": 2e-05, "loss": 0.1908, "rl_loss": -0.0199, "step": 430 }, { "acc": 0.0, "epoch": 0.29, "learning_rate": 2e-05, "loss": 0.192, "rl_loss": -0.0201, "step": 440 }, { "acc": 0.0, "epoch": 0.3, "learning_rate": 2e-05, "loss": 0.2064, "rl_loss": -0.0144, "step": 450 }, { "acc": 0.0, "epoch": 0.31, "learning_rate": 2e-05, "loss": 0.1998, "rl_loss": -0.0295, "step": 460 }, { "acc": 0.0, "epoch": 0.31, "learning_rate": 2e-05, "loss": 0.1996, "rl_loss": -0.0151, "step": 470 }, { "acc": 0.0, "epoch": 0.32, "learning_rate": 2e-05, "loss": 0.2179, "rl_loss": -0.0062, "step": 480 }, { "acc": 0.0, "epoch": 0.33, "learning_rate": 2e-05, "loss": 0.189, "rl_loss": -0.014, "step": 490 }, { "acc": 0.0, "epoch": 0.33, "learning_rate": 2e-05, "loss": 0.2153, "rl_loss": -0.0176, "step": 500 }, { "acc": 0.0, "epoch": 0.34, "learning_rate": 2e-05, "loss": 0.1954, "rl_loss": -0.0051, "step": 510 }, { "acc": 0.0, "epoch": 0.35, "learning_rate": 2e-05, "loss": 0.1877, "rl_loss": -0.0055, "step": 520 }, { "acc": 0.0, "epoch": 0.35, "learning_rate": 2e-05, "loss": 0.1952, "rl_loss": -0.0128, "step": 530 }, { "acc": 0.0, "epoch": 0.36, "learning_rate": 2e-05, "loss": 0.1946, "rl_loss": -0.0079, "step": 540 }, { "acc": 0.0, "epoch": 0.37, "learning_rate": 2e-05, "loss": 0.2009, "rl_loss": -0.0594, "step": 550 }, { "acc": 0.0, "epoch": 0.37, "learning_rate": 2e-05, "loss": 0.2025, "rl_loss": -0.0103, "step": 560 }, { "acc": 0.0, "epoch": 0.38, "learning_rate": 2e-05, "loss": 0.1932, "rl_loss": -0.0185, "step": 570 }, { "acc": 0.0, "epoch": 0.39, "learning_rate": 2e-05, "loss": 0.207, "rl_loss": -0.0175, "step": 580 }, { "acc": 0.0, "epoch": 0.39, "learning_rate": 2e-05, "loss": 0.1869, "rl_loss": -0.0559, "step": 590 }, { "acc": 0.0, "epoch": 0.4, "learning_rate": 2e-05, "loss": 0.2019, "rl_loss": -0.009, "step": 600 }, { "acc": 0.0, "epoch": 0.41, "learning_rate": 2e-05, "loss": 0.2078, "rl_loss": -0.0115, "step": 610 }, { "acc": 0.0, "epoch": 0.41, "learning_rate": 2e-05, "loss": 0.1918, "rl_loss": -0.0094, "step": 620 }, { "acc": 0.0, "epoch": 0.42, "learning_rate": 2e-05, "loss": 0.2049, "rl_loss": -0.0436, "step": 630 }, { "acc": 0.0, "epoch": 0.43, "learning_rate": 2e-05, "loss": 0.1891, "rl_loss": -0.0128, "step": 640 }, { "acc": 0.0, "epoch": 0.43, "learning_rate": 2e-05, "loss": 0.1971, "rl_loss": -0.0107, "step": 650 }, { "acc": 0.0, "epoch": 0.44, "learning_rate": 2e-05, "loss": 0.2326, "rl_loss": -0.0503, "step": 660 }, { "acc": 0.0, "epoch": 0.45, "learning_rate": 2e-05, "loss": 0.2079, "rl_loss": -0.0093, "step": 670 }, { "acc": 0.0, "epoch": 0.45, "learning_rate": 2e-05, "loss": 0.1986, "rl_loss": -0.0099, "step": 680 }, { "acc": 0.0, "epoch": 0.46, "learning_rate": 2e-05, "loss": 0.2051, "rl_loss": -0.0126, "step": 690 }, { "acc": 0.0, "epoch": 0.47, "learning_rate": 2e-05, "loss": 0.2038, "rl_loss": -0.0255, "step": 700 }, { "acc": 0.0, "epoch": 0.47, "learning_rate": 2e-05, "loss": 0.1982, "rl_loss": -0.1333, "step": 710 }, { "acc": 0.0, "epoch": 0.48, "learning_rate": 2e-05, "loss": 0.1991, "rl_loss": -0.0114, "step": 720 }, { "acc": 0.0, "epoch": 0.49, "learning_rate": 2e-05, "loss": 0.1953, "rl_loss": -0.0084, "step": 730 }, { "acc": 0.0, "epoch": 0.49, "learning_rate": 2e-05, "loss": 0.1884, "rl_loss": -0.0047, "step": 740 }, { "acc": 0.0, "epoch": 0.5, "learning_rate": 2e-05, "loss": 0.2078, "rl_loss": -0.0588, "step": 750 }, { "acc": 0.0, "epoch": 0.51, "learning_rate": 2e-05, "loss": 0.2105, "rl_loss": -0.0238, "step": 760 }, { "acc": 0.0, "epoch": 0.51, "learning_rate": 2e-05, "loss": 0.1908, "rl_loss": -0.0122, "step": 770 }, { "acc": 0.0, "epoch": 0.52, "learning_rate": 2e-05, "loss": 0.1997, "rl_loss": -0.039, "step": 780 }, { "acc": 0.0, "epoch": 0.53, "learning_rate": 2e-05, "loss": 0.1942, "rl_loss": -0.0666, "step": 790 }, { "acc": 0.0, "epoch": 0.53, "learning_rate": 2e-05, "loss": 0.2019, "rl_loss": -0.0694, "step": 800 }, { "acc": 0.0, "epoch": 0.54, "learning_rate": 2e-05, "loss": 0.2407, "rl_loss": -0.0509, "step": 810 }, { "acc": 0.0, "epoch": 0.55, "learning_rate": 2e-05, "loss": 0.2672, "rl_loss": 0.0018, "step": 820 }, { "acc": 0.0, "epoch": 0.55, "learning_rate": 2e-05, "loss": 0.2007, "rl_loss": -0.0075, "step": 830 }, { "acc": 0.0, "epoch": 0.56, "learning_rate": 2e-05, "loss": 0.2021, "rl_loss": -0.072, "step": 840 }, { "acc": 0.0, "epoch": 0.57, "learning_rate": 2e-05, "loss": 0.2401, "rl_loss": -0.0257, "step": 850 }, { "acc": 0.0, "epoch": 0.57, "learning_rate": 2e-05, "loss": 0.2168, "rl_loss": -0.0371, "step": 860 }, { "acc": 0.0, "epoch": 0.58, "learning_rate": 2e-05, "loss": 0.2008, "rl_loss": -0.0549, "step": 870 }, { "acc": 0.0, "epoch": 0.59, "learning_rate": 2e-05, "loss": 0.1992, "rl_loss": -0.0534, "step": 880 }, { "acc": 0.0, "epoch": 0.59, "learning_rate": 2e-05, "loss": 0.2226, "rl_loss": -0.0824, "step": 890 }, { "acc": 0.0, "epoch": 0.6, "learning_rate": 2e-05, "loss": 0.2067, "rl_loss": -0.0092, "step": 900 }, { "acc": 0.0, "epoch": 0.61, "learning_rate": 2e-05, "loss": 0.2192, "rl_loss": -0.0334, "step": 910 }, { "acc": 0.0, "epoch": 0.61, "learning_rate": 2e-05, "loss": 0.2127, "rl_loss": -0.0402, "step": 920 }, { "acc": 0.0, "epoch": 0.62, "learning_rate": 2e-05, "loss": 0.2399, "rl_loss": -0.0901, "step": 930 }, { "acc": 0.0, "epoch": 0.63, "learning_rate": 2e-05, "loss": 0.2302, "rl_loss": -0.0274, "step": 940 }, { "acc": 0.0, "epoch": 0.63, "learning_rate": 2e-05, "loss": 0.2229, "rl_loss": -0.0317, "step": 950 }, { "acc": 0.0, "epoch": 0.64, "learning_rate": 2e-05, "loss": 0.1961, "rl_loss": 0.0009, "step": 960 }, { "acc": 0.0, "epoch": 0.65, "learning_rate": 2e-05, "loss": 0.1909, "rl_loss": -0.015, "step": 970 }, { "acc": 0.0, "epoch": 0.65, "learning_rate": 2e-05, "loss": 0.2071, "rl_loss": -0.0675, "step": 980 }, { "acc": 0.0, "epoch": 0.66, "learning_rate": 2e-05, "loss": 0.2073, "rl_loss": -0.0286, "step": 990 }, { "acc": 0.0, "epoch": 0.67, "learning_rate": 2e-05, "loss": 0.2251, "rl_loss": 0.0071, "step": 1000 }, { "acc": 0.0, "epoch": 0.67, "learning_rate": 2e-05, "loss": 0.1874, "rl_loss": -0.0176, "step": 1010 }, { "acc": 0.0, "epoch": 0.68, "learning_rate": 2e-05, "loss": 0.2342, "rl_loss": -0.0224, "step": 1020 }, { "acc": 0.0, "epoch": 0.69, "learning_rate": 2e-05, "loss": 0.2244, "rl_loss": -0.0204, "step": 1030 }, { "acc": 0.0, "epoch": 0.69, "learning_rate": 2e-05, "loss": 0.2059, "rl_loss": -0.0206, "step": 1040 }, { "acc": 0.0, "epoch": 0.7, "learning_rate": 2e-05, "loss": 0.1919, "rl_loss": -0.0619, "step": 1050 }, { "acc": 0.0, "epoch": 0.71, "learning_rate": 2e-05, "loss": 0.2103, "rl_loss": -0.0141, "step": 1060 }, { "acc": 0.0, "epoch": 0.71, "learning_rate": 2e-05, "loss": 0.2018, "rl_loss": -0.0241, "step": 1070 }, { "acc": 0.0, "epoch": 0.72, "learning_rate": 2e-05, "loss": 0.2551, "rl_loss": -0.0541, "step": 1080 }, { "acc": 0.0, "epoch": 0.73, "learning_rate": 2e-05, "loss": 0.201, "rl_loss": -0.0098, "step": 1090 }, { "acc": 0.0, "epoch": 0.73, "learning_rate": 2e-05, "loss": 0.2008, "rl_loss": -0.0105, "step": 1100 }, { "acc": 0.0, "epoch": 0.74, "learning_rate": 2e-05, "loss": 0.2028, "rl_loss": -0.05, "step": 1110 }, { "acc": 0.0, "epoch": 0.75, "learning_rate": 2e-05, "loss": 0.1966, "rl_loss": -0.006, "step": 1120 }, { "acc": 0.0, "epoch": 0.75, "learning_rate": 2e-05, "loss": 0.2089, "rl_loss": -0.0066, "step": 1130 }, { "acc": 0.0, "epoch": 0.76, "learning_rate": 2e-05, "loss": 0.1951, "rl_loss": -0.0268, "step": 1140 }, { "acc": 0.0, "epoch": 0.77, "learning_rate": 2e-05, "loss": 0.1885, "rl_loss": -0.0478, "step": 1150 }, { "acc": 0.0, "epoch": 0.77, "learning_rate": 2e-05, "loss": 0.2258, "rl_loss": -0.0383, "step": 1160 }, { "acc": 0.0, "epoch": 0.78, "learning_rate": 2e-05, "loss": 0.2002, "rl_loss": -0.0201, "step": 1170 }, { "acc": 0.0, "epoch": 0.79, "learning_rate": 2e-05, "loss": 0.2319, "rl_loss": -0.1138, "step": 1180 }, { "acc": 0.0, "epoch": 0.79, "learning_rate": 2e-05, "loss": 0.2828, "rl_loss": -0.0367, "step": 1190 }, { "acc": 0.0, "epoch": 0.8, "learning_rate": 2e-05, "loss": 0.2212, "rl_loss": -0.1415, "step": 1200 }, { "acc": 0.0, "epoch": 0.81, "learning_rate": 2e-05, "loss": 0.2619, "rl_loss": -0.0687, "step": 1210 }, { "acc": 0.0, "epoch": 0.81, "learning_rate": 2e-05, "loss": 0.2034, "rl_loss": -0.0475, "step": 1220 }, { "acc": 0.0, "epoch": 0.82, "learning_rate": 2e-05, "loss": 0.2583, "rl_loss": -0.2057, "step": 1230 }, { "acc": 0.0, "epoch": 0.83, "learning_rate": 2e-05, "loss": 0.2258, "rl_loss": -0.0758, "step": 1240 }, { "acc": 0.0, "epoch": 0.83, "learning_rate": 2e-05, "loss": 0.2015, "rl_loss": -0.2222, "step": 1250 }, { "acc": 0.0, "epoch": 0.84, "learning_rate": 2e-05, "loss": 0.2088, "rl_loss": -0.0407, "step": 1260 }, { "acc": 0.0, "epoch": 0.85, "learning_rate": 2e-05, "loss": 0.1986, "rl_loss": -0.0952, "step": 1270 }, { "acc": 0.0, "epoch": 0.85, "learning_rate": 2e-05, "loss": 0.2644, "rl_loss": -0.0654, "step": 1280 }, { "acc": 0.0, "epoch": 0.86, "learning_rate": 2e-05, "loss": 0.2119, "rl_loss": -0.099, "step": 1290 }, { "acc": 0.0, "epoch": 0.87, "learning_rate": 2e-05, "loss": 0.2162, "rl_loss": 0.003, "step": 1300 }, { "acc": 0.0, "epoch": 0.87, "learning_rate": 2e-05, "loss": 0.2345, "rl_loss": -0.1278, "step": 1310 }, { "acc": 0.0, "epoch": 0.88, "learning_rate": 2e-05, "loss": 0.2147, "rl_loss": 0.0178, "step": 1320 }, { "acc": 0.0, "epoch": 0.89, "learning_rate": 2e-05, "loss": 0.3013, "rl_loss": -0.0425, "step": 1330 }, { "acc": 0.0, "epoch": 0.89, "learning_rate": 2e-05, "loss": 0.2049, "rl_loss": -0.0238, "step": 1340 }, { "acc": 0.0, "epoch": 0.9, "learning_rate": 2e-05, "loss": 0.2489, "rl_loss": -0.0539, "step": 1350 }, { "acc": 0.0, "epoch": 0.91, "learning_rate": 2e-05, "loss": 0.2009, "rl_loss": -0.0724, "step": 1360 }, { "acc": 0.0, "epoch": 0.91, "learning_rate": 2e-05, "loss": 0.2411, "rl_loss": -0.1009, "step": 1370 }, { "acc": 0.0, "epoch": 0.92, "learning_rate": 2e-05, "loss": 0.2153, "rl_loss": -0.059, "step": 1380 }, { "acc": 0.0, "epoch": 0.93, "learning_rate": 2e-05, "loss": 0.2806, "rl_loss": -0.0582, "step": 1390 }, { "acc": 0.0, "epoch": 0.93, "learning_rate": 2e-05, "loss": 0.211, "rl_loss": -0.0547, "step": 1400 }, { "acc": 0.0, "epoch": 0.94, "learning_rate": 2e-05, "loss": 0.2064, "rl_loss": -0.0155, "step": 1410 }, { "acc": 0.0, "epoch": 0.95, "learning_rate": 2e-05, "loss": 0.2573, "rl_loss": -0.206, "step": 1420 }, { "acc": 0.0, "epoch": 0.95, "learning_rate": 2e-05, "loss": 0.3427, "rl_loss": -0.4052, "step": 1430 }, { "acc": 0.0, "epoch": 0.96, "learning_rate": 2e-05, "loss": 0.2292, "rl_loss": -0.0413, "step": 1440 }, { "acc": 0.0, "epoch": 0.97, "learning_rate": 2e-05, "loss": 0.2241, "rl_loss": -0.0907, "step": 1450 }, { "acc": 0.0, "epoch": 0.97, "learning_rate": 2e-05, "loss": 0.2004, "rl_loss": -0.2518, "step": 1460 }, { "acc": 0.0, "epoch": 0.98, "learning_rate": 2e-05, "loss": 0.2882, "rl_loss": -0.1383, "step": 1470 }, { "acc": 0.0, "epoch": 0.99, "learning_rate": 2e-05, "loss": 0.4526, "rl_loss": -0.3262, "step": 1480 }, { "acc": 0.0, "epoch": 0.99, "learning_rate": 2e-05, "loss": 0.2733, "rl_loss": -0.078, "step": 1490 }, { "acc": 0.0, "epoch": 1.0, "learning_rate": 2e-05, "loss": 0.24, "rl_loss": -0.0143, "step": 1500 }, { "acc": 0.0, "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.2716, "rl_loss": -0.1058, "step": 1510 }, { "acc": 0.0, "epoch": 1.01, "learning_rate": 2e-05, "loss": 0.3491, "rl_loss": -0.2102, "step": 1520 }, { "acc": 0.0, "epoch": 1.02, "learning_rate": 2e-05, "loss": 0.2348, "rl_loss": -0.1394, "step": 1530 }, { "acc": 0.0, "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.2861, "rl_loss": -0.1298, "step": 1540 }, { "acc": 0.0, "epoch": 1.03, "learning_rate": 2e-05, "loss": 0.2077, "rl_loss": -0.1497, "step": 1550 }, { "acc": 0.0, "epoch": 1.04, "learning_rate": 2e-05, "loss": 0.2019, "rl_loss": -0.2238, "step": 1560 }, { "acc": 0.0, "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.2567, "rl_loss": -0.1693, "step": 1570 }, { "acc": 0.0, "epoch": 1.05, "learning_rate": 2e-05, "loss": 0.2814, "rl_loss": -0.1605, "step": 1580 }, { "acc": 0.0, "epoch": 1.06, "learning_rate": 2e-05, "loss": 0.2215, "rl_loss": -0.1134, "step": 1590 }, { "acc": 0.0, "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.2175, "rl_loss": -0.3902, "step": 1600 }, { "acc": 0.0, "epoch": 1.07, "learning_rate": 2e-05, "loss": 0.2196, "rl_loss": -0.301, "step": 1610 }, { "acc": 0.0, "epoch": 1.08, "learning_rate": 2e-05, "loss": 0.292, "rl_loss": -0.213, "step": 1620 }, { "acc": 0.0, "epoch": 1.09, "learning_rate": 2e-05, "loss": 0.2572, "rl_loss": -0.1575, "step": 1630 }, { "acc": 0.0, "epoch": 1.09, "learning_rate": 2e-05, "loss": 0.2362, "rl_loss": -0.1837, "step": 1640 }, { "acc": 0.0, "epoch": 1.1, "learning_rate": 2e-05, "loss": 0.4009, "rl_loss": -0.2471, "step": 1650 }, { "acc": 0.0, "epoch": 1.11, "learning_rate": 2e-05, "loss": 0.2692, "rl_loss": -0.1602, "step": 1660 }, { "acc": 0.0, "epoch": 1.11, "learning_rate": 2e-05, "loss": 0.3368, "rl_loss": -0.3032, "step": 1670 }, { "acc": 0.0, "epoch": 1.12, "learning_rate": 2e-05, "loss": 0.2074, "rl_loss": -0.1879, "step": 1680 }, { "acc": 0.0, "epoch": 1.13, "learning_rate": 2e-05, "loss": 0.2025, "rl_loss": -0.2248, "step": 1690 }, { "acc": 0.0, "epoch": 1.13, "learning_rate": 2e-05, "loss": 0.2245, "rl_loss": -0.1585, "step": 1700 }, { "acc": 0.0, "epoch": 1.14, "learning_rate": 2e-05, "loss": 0.2777, "rl_loss": -0.2912, "step": 1710 }, { "acc": 0.0, "epoch": 1.15, "learning_rate": 2e-05, "loss": 0.2496, "rl_loss": -0.3379, "step": 1720 }, { "acc": 0.0, "epoch": 1.15, "learning_rate": 2e-05, "loss": 0.2507, "rl_loss": -0.1577, "step": 1730 }, { "acc": 0.0, "epoch": 1.16, "learning_rate": 2e-05, "loss": 0.2275, "rl_loss": -0.2425, "step": 1740 }, { "acc": 0.0, "epoch": 1.17, "learning_rate": 2e-05, "loss": 0.2019, "rl_loss": -0.3212, "step": 1750 }, { "acc": 0.0, "epoch": 1.17, "learning_rate": 2e-05, "loss": 0.2525, "rl_loss": -0.1072, "step": 1760 }, { "acc": 0.0, "epoch": 1.18, "learning_rate": 2e-05, "loss": 0.224, "rl_loss": -0.3149, "step": 1770 }, { "acc": 0.0, "epoch": 1.19, "learning_rate": 2e-05, "loss": 0.3125, "rl_loss": -0.2268, "step": 1780 }, { "acc": 0.0, "epoch": 1.19, "learning_rate": 2e-05, "loss": 0.2726, "rl_loss": -0.3026, "step": 1790 }, { "acc": 0.0, "epoch": 1.2, "learning_rate": 2e-05, "loss": 0.2067, "rl_loss": -0.0679, "step": 1800 }, { "acc": 0.0, "epoch": 1.21, "learning_rate": 2e-05, "loss": 0.2192, "rl_loss": -0.2496, "step": 1810 }, { "acc": 0.0, "epoch": 1.21, "learning_rate": 2e-05, "loss": 0.2192, "rl_loss": -0.3184, "step": 1820 }, { "acc": 0.0, "epoch": 1.22, "learning_rate": 2e-05, "loss": 0.2152, "rl_loss": -0.0508, "step": 1830 }, { "acc": 0.0, "epoch": 1.23, "learning_rate": 2e-05, "loss": 0.3504, "rl_loss": -0.1447, "step": 1840 }, { "acc": 0.0, "epoch": 1.23, "learning_rate": 2e-05, "loss": 0.2511, "rl_loss": -0.1003, "step": 1850 }, { "acc": 0.0, "epoch": 1.24, "learning_rate": 2e-05, "loss": 0.2549, "rl_loss": -0.2655, "step": 1860 }, { "acc": 0.0, "epoch": 1.25, "learning_rate": 2e-05, "loss": 0.2788, "rl_loss": -0.1448, "step": 1870 }, { "acc": 0.0, "epoch": 1.25, "learning_rate": 2e-05, "loss": 0.2365, "rl_loss": -0.3471, "step": 1880 }, { "acc": 0.0, "epoch": 1.26, "learning_rate": 2e-05, "loss": 0.2466, "rl_loss": -0.2494, "step": 1890 }, { "acc": 0.0, "epoch": 1.27, "learning_rate": 2e-05, "loss": 0.2206, "rl_loss": -0.4574, "step": 1900 }, { "acc": 0.0, "epoch": 1.27, "learning_rate": 2e-05, "loss": 0.3148, "rl_loss": -0.1643, "step": 1910 }, { "acc": 0.0, "epoch": 1.28, "learning_rate": 2e-05, "loss": 0.262, "rl_loss": -0.2892, "step": 1920 }, { "acc": 0.0, "epoch": 1.29, "learning_rate": 2e-05, "loss": 0.339, "rl_loss": -0.0317, "step": 1930 }, { "acc": 0.0, "epoch": 1.29, "learning_rate": 2e-05, "loss": 0.2181, "rl_loss": -0.2939, "step": 1940 }, { "acc": 0.0, "epoch": 1.3, "learning_rate": 2e-05, "loss": 0.3218, "rl_loss": -0.1217, "step": 1950 }, { "acc": 0.0, "epoch": 1.31, "learning_rate": 2e-05, "loss": 0.2099, "rl_loss": -0.4231, "step": 1960 }, { "acc": 0.0, "epoch": 1.31, "learning_rate": 2e-05, "loss": 0.3301, "rl_loss": -0.1967, "step": 1970 }, { "acc": 0.0, "epoch": 1.32, "learning_rate": 2e-05, "loss": 0.3348, "rl_loss": -0.2999, "step": 1980 }, { "acc": 0.0, "epoch": 1.33, "learning_rate": 2e-05, "loss": 0.2197, "rl_loss": -0.2697, "step": 1990 }, { "acc": 0.0, "epoch": 1.33, "learning_rate": 2e-05, "loss": 0.2636, "rl_loss": -0.1534, "step": 2000 }, { "acc": 0.0, "epoch": 1.34, "learning_rate": 2e-05, "loss": 0.3831, "rl_loss": -0.3402, "step": 2010 }, { "acc": 0.0, "epoch": 1.35, "learning_rate": 2e-05, "loss": 0.2709, "rl_loss": -0.2815, "step": 2020 }, { "acc": 0.0, "epoch": 1.35, "learning_rate": 2e-05, "loss": 0.4735, "rl_loss": -0.1193, "step": 2030 }, { "acc": 0.0, "epoch": 1.36, "learning_rate": 2e-05, "loss": 0.3152, "rl_loss": -0.3396, "step": 2040 }, { "acc": 0.0, "epoch": 1.37, "learning_rate": 2e-05, "loss": 0.213, "rl_loss": -0.1602, "step": 2050 }, { "acc": 0.0, "epoch": 1.37, "learning_rate": 2e-05, "loss": 0.2118, "rl_loss": -0.2494, "step": 2060 }, { "acc": 0.0, "epoch": 1.38, "learning_rate": 2e-05, "loss": 0.2877, "rl_loss": -0.2309, "step": 2070 }, { "acc": 0.0, "epoch": 1.39, "learning_rate": 2e-05, "loss": 0.2999, "rl_loss": -0.2919, "step": 2080 }, { "acc": 0.0, "epoch": 1.39, "learning_rate": 2e-05, "loss": 0.3178, "rl_loss": -0.3203, "step": 2090 }, { "acc": 0.0, "epoch": 1.4, "learning_rate": 2e-05, "loss": 0.2346, "rl_loss": -0.3028, "step": 2100 }, { "acc": 0.0, "epoch": 1.41, "learning_rate": 2e-05, "loss": 0.2955, "rl_loss": -0.2407, "step": 2110 }, { "acc": 0.0, "epoch": 1.41, "learning_rate": 2e-05, "loss": 0.3054, "rl_loss": -0.3303, "step": 2120 }, { "acc": 0.0, "epoch": 1.42, "learning_rate": 2e-05, "loss": 0.221, "rl_loss": -0.0344, "step": 2130 }, { "acc": 0.0, "epoch": 1.43, "learning_rate": 2e-05, "loss": 0.2423, "rl_loss": -0.4771, "step": 2140 }, { "acc": 0.0, "epoch": 1.43, "learning_rate": 2e-05, "loss": 0.3343, "rl_loss": -0.7419, "step": 2150 }, { "acc": 0.0, "epoch": 1.44, "learning_rate": 2e-05, "loss": 0.2997, "rl_loss": -0.183, "step": 2160 }, { "acc": 0.0, "epoch": 1.45, "learning_rate": 2e-05, "loss": 0.2399, "rl_loss": -0.2342, "step": 2170 }, { "acc": 0.0, "epoch": 1.45, "learning_rate": 2e-05, "loss": 0.2391, "rl_loss": -0.3045, "step": 2180 }, { "acc": 0.0, "epoch": 1.46, "learning_rate": 2e-05, "loss": 0.4321, "rl_loss": -0.3114, "step": 2190 }, { "acc": 0.0, "epoch": 1.47, "learning_rate": 2e-05, "loss": 0.2387, "rl_loss": -0.2203, "step": 2200 }, { "acc": 0.0, "epoch": 1.47, "learning_rate": 2e-05, "loss": 0.2195, "rl_loss": -0.1261, "step": 2210 }, { "acc": 0.0, "epoch": 1.48, "learning_rate": 2e-05, "loss": 0.2744, "rl_loss": -0.2352, "step": 2220 }, { "acc": 0.0, "epoch": 1.49, "learning_rate": 2e-05, "loss": 0.56, "rl_loss": -0.4791, "step": 2230 }, { "acc": 0.0, "epoch": 1.49, "learning_rate": 2e-05, "loss": 0.2127, "rl_loss": -0.0956, "step": 2240 }, { "acc": 0.0, "epoch": 1.5, "learning_rate": 2e-05, "loss": 0.3765, "rl_loss": -0.1513, "step": 2250 }, { "acc": 0.0, "epoch": 1.51, "learning_rate": 2e-05, "loss": 0.2626, "rl_loss": -0.0945, "step": 2260 }, { "acc": 0.0, "epoch": 1.51, "learning_rate": 2e-05, "loss": 0.2584, "rl_loss": -0.2644, "step": 2270 }, { "acc": 0.0, "epoch": 1.52, "learning_rate": 2e-05, "loss": 0.3124, "rl_loss": -0.1856, "step": 2280 }, { "acc": 0.0, "epoch": 1.53, "learning_rate": 2e-05, "loss": 0.3314, "rl_loss": -0.1497, "step": 2290 }, { "acc": 0.0, "epoch": 1.53, "learning_rate": 2e-05, "loss": 0.222, "rl_loss": -0.2127, "step": 2300 }, { "acc": 0.0, "epoch": 1.54, "learning_rate": 2e-05, "loss": 0.2053, "rl_loss": -0.2785, "step": 2310 }, { "acc": 0.0, "epoch": 1.55, "learning_rate": 2e-05, "loss": 0.2848, "rl_loss": -0.1348, "step": 2320 }, { "acc": 0.0, "epoch": 1.55, "learning_rate": 2e-05, "loss": 0.2008, "rl_loss": -0.3603, "step": 2330 }, { "acc": 0.0, "epoch": 1.56, "learning_rate": 2e-05, "loss": 0.334, "rl_loss": -0.1207, "step": 2340 }, { "acc": 0.0, "epoch": 1.57, "learning_rate": 2e-05, "loss": 0.2366, "rl_loss": -0.556, "step": 2350 }, { "acc": 0.0, "epoch": 1.57, "learning_rate": 2e-05, "loss": 0.3089, "rl_loss": -0.1413, "step": 2360 }, { "acc": 0.0, "epoch": 1.58, "learning_rate": 2e-05, "loss": 0.2942, "rl_loss": -0.1651, "step": 2370 }, { "acc": 0.0, "epoch": 1.59, "learning_rate": 2e-05, "loss": 0.2984, "rl_loss": -0.3665, "step": 2380 }, { "acc": 0.0, "epoch": 1.59, "learning_rate": 2e-05, "loss": 0.2923, "rl_loss": -0.1287, "step": 2390 }, { "acc": 0.0, "epoch": 1.6, "learning_rate": 2e-05, "loss": 0.2936, "rl_loss": -0.2642, "step": 2400 }, { "acc": 0.0, "epoch": 1.61, "learning_rate": 2e-05, "loss": 0.2817, "rl_loss": -0.0497, "step": 2410 }, { "acc": 0.0, "epoch": 1.61, "learning_rate": 2e-05, "loss": 0.2176, "rl_loss": -0.1427, "step": 2420 }, { "acc": 0.0, "epoch": 1.62, "learning_rate": 2e-05, "loss": 0.2181, "rl_loss": -0.2153, "step": 2430 }, { "acc": 0.0, "epoch": 1.63, "learning_rate": 2e-05, "loss": 0.353, "rl_loss": -0.0923, "step": 2440 }, { "acc": 0.0, "epoch": 1.63, "learning_rate": 2e-05, "loss": 0.2725, "rl_loss": -0.1337, "step": 2450 }, { "acc": 0.0, "epoch": 1.64, "learning_rate": 2e-05, "loss": 0.3012, "rl_loss": -0.2073, "step": 2460 }, { "acc": 0.0, "epoch": 1.65, "learning_rate": 2e-05, "loss": 0.4572, "rl_loss": -0.2452, "step": 2470 }, { "acc": 0.0, "epoch": 1.65, "learning_rate": 2e-05, "loss": 0.3193, "rl_loss": -0.322, "step": 2480 }, { "acc": 0.0, "epoch": 1.66, "learning_rate": 2e-05, "loss": 0.2718, "rl_loss": -0.1434, "step": 2490 }, { "acc": 0.0, "epoch": 1.67, "learning_rate": 2e-05, "loss": 0.2396, "rl_loss": -0.3019, "step": 2500 }, { "acc": 0.0, "epoch": 1.67, "learning_rate": 2e-05, "loss": 0.2117, "rl_loss": -0.1138, "step": 2510 }, { "acc": 0.0, "epoch": 1.68, "learning_rate": 2e-05, "loss": 0.305, "rl_loss": -0.1993, "step": 2520 }, { "acc": 0.0, "epoch": 1.69, "learning_rate": 2e-05, "loss": 0.2979, "rl_loss": -0.1591, "step": 2530 }, { "acc": 0.0, "epoch": 1.69, "learning_rate": 2e-05, "loss": 0.2312, "rl_loss": -0.0862, "step": 2540 }, { "acc": 0.0, "epoch": 1.7, "learning_rate": 2e-05, "loss": 0.2726, "rl_loss": -0.3785, "step": 2550 }, { "acc": 0.0, "epoch": 1.71, "learning_rate": 2e-05, "loss": 0.3373, "rl_loss": -0.0622, "step": 2560 }, { "acc": 0.0, "epoch": 1.71, "learning_rate": 2e-05, "loss": 0.2486, "rl_loss": -0.0971, "step": 2570 }, { "acc": 0.0, "epoch": 1.72, "learning_rate": 2e-05, "loss": 0.2828, "rl_loss": -0.1688, "step": 2580 }, { "acc": 0.0, "epoch": 1.73, "learning_rate": 2e-05, "loss": 0.2374, "rl_loss": -0.1622, "step": 2590 }, { "acc": 0.0, "epoch": 1.73, "learning_rate": 2e-05, "loss": 0.428, "rl_loss": -0.2429, "step": 2600 }, { "acc": 0.0, "epoch": 1.74, "learning_rate": 2e-05, "loss": 0.265, "rl_loss": -0.314, "step": 2610 }, { "acc": 0.0, "epoch": 1.75, "learning_rate": 2e-05, "loss": 0.3008, "rl_loss": -0.2175, "step": 2620 }, { "acc": 0.0, "epoch": 1.75, "learning_rate": 2e-05, "loss": 0.212, "rl_loss": -0.2465, "step": 2630 }, { "acc": 0.0, "epoch": 1.76, "learning_rate": 2e-05, "loss": 0.2471, "rl_loss": -0.0837, "step": 2640 }, { "acc": 0.0, "epoch": 1.77, "learning_rate": 2e-05, "loss": 0.2352, "rl_loss": -0.0888, "step": 2650 }, { "acc": 0.0, "epoch": 1.77, "learning_rate": 2e-05, "loss": 0.4229, "rl_loss": -0.3808, "step": 2660 }, { "acc": 0.0, "epoch": 1.78, "learning_rate": 2e-05, "loss": 0.2738, "rl_loss": -0.1199, "step": 2670 }, { "acc": 0.0, "epoch": 1.79, "learning_rate": 2e-05, "loss": 0.2162, "rl_loss": -0.0656, "step": 2680 }, { "acc": 0.0, "epoch": 1.79, "learning_rate": 2e-05, "loss": 0.246, "rl_loss": -0.2123, "step": 2690 }, { "acc": 0.0, "epoch": 1.8, "learning_rate": 2e-05, "loss": 0.2648, "rl_loss": -0.2322, "step": 2700 }, { "acc": 0.0, "epoch": 1.81, "learning_rate": 2e-05, "loss": 0.2433, "rl_loss": -0.0837, "step": 2710 }, { "acc": 0.0, "epoch": 1.81, "learning_rate": 2e-05, "loss": 0.269, "rl_loss": -0.0561, "step": 2720 }, { "acc": 0.0, "epoch": 1.82, "learning_rate": 2e-05, "loss": 0.2715, "rl_loss": -0.3806, "step": 2730 }, { "acc": 0.0, "epoch": 1.83, "learning_rate": 2e-05, "loss": 0.3139, "rl_loss": -0.0817, "step": 2740 }, { "acc": 0.0, "epoch": 1.83, "learning_rate": 2e-05, "loss": 0.2513, "rl_loss": -0.2584, "step": 2750 }, { "acc": 0.0, "epoch": 1.84, "learning_rate": 2e-05, "loss": 0.3049, "rl_loss": -0.2728, "step": 2760 }, { "acc": 0.0, "epoch": 1.85, "learning_rate": 2e-05, "loss": 0.228, "rl_loss": -0.165, "step": 2770 }, { "acc": 0.0, "epoch": 1.85, "learning_rate": 2e-05, "loss": 0.2464, "rl_loss": -0.2593, "step": 2780 }, { "acc": 0.0, "epoch": 1.86, "learning_rate": 2e-05, "loss": 0.3339, "rl_loss": -0.1801, "step": 2790 }, { "acc": 0.0, "epoch": 1.87, "learning_rate": 2e-05, "loss": 0.2498, "rl_loss": -0.0794, "step": 2800 }, { "acc": 0.0, "epoch": 1.87, "learning_rate": 2e-05, "loss": 0.2176, "rl_loss": -0.2592, "step": 2810 }, { "acc": 0.0, "epoch": 1.88, "learning_rate": 2e-05, "loss": 0.2339, "rl_loss": -0.1273, "step": 2820 }, { "acc": 0.0, "epoch": 1.89, "learning_rate": 2e-05, "loss": 0.3225, "rl_loss": -0.2312, "step": 2830 }, { "acc": 0.0, "epoch": 1.89, "learning_rate": 2e-05, "loss": 0.3005, "rl_loss": -0.2849, "step": 2840 }, { "acc": 0.0, "epoch": 1.9, "learning_rate": 2e-05, "loss": 0.3244, "rl_loss": -0.4541, "step": 2850 }, { "acc": 0.0, "epoch": 1.91, "learning_rate": 2e-05, "loss": 0.2361, "rl_loss": -0.4858, "step": 2860 }, { "acc": 0.0, "epoch": 1.91, "learning_rate": 2e-05, "loss": 0.2548, "rl_loss": -0.0967, "step": 2870 }, { "acc": 0.0, "epoch": 1.92, "learning_rate": 2e-05, "loss": 0.2556, "rl_loss": -0.4954, "step": 2880 }, { "acc": 0.0, "epoch": 1.93, "learning_rate": 2e-05, "loss": 0.2572, "rl_loss": -0.19, "step": 2890 }, { "acc": 0.0, "epoch": 1.93, "learning_rate": 2e-05, "loss": 0.3286, "rl_loss": -0.2709, "step": 2900 }, { "acc": 0.0, "epoch": 1.94, "learning_rate": 2e-05, "loss": 0.2768, "rl_loss": -0.1072, "step": 2910 }, { "acc": 0.0, "epoch": 1.95, "learning_rate": 2e-05, "loss": 0.2248, "rl_loss": -0.4267, "step": 2920 }, { "acc": 0.0, "epoch": 1.95, "learning_rate": 2e-05, "loss": 0.2976, "rl_loss": -0.3855, "step": 2930 }, { "acc": 0.0, "epoch": 1.96, "learning_rate": 2e-05, "loss": 0.3344, "rl_loss": -0.1849, "step": 2940 }, { "acc": 0.0, "epoch": 1.97, "learning_rate": 2e-05, "loss": 0.2909, "rl_loss": -0.4777, "step": 2950 }, { "acc": 0.0, "epoch": 1.97, "learning_rate": 2e-05, "loss": 0.368, "rl_loss": -0.5148, "step": 2960 }, { "acc": 0.0, "epoch": 1.98, "learning_rate": 2e-05, "loss": 0.2897, "rl_loss": -0.4314, "step": 2970 }, { "acc": 0.0, "epoch": 1.99, "learning_rate": 2e-05, "loss": 0.3902, "rl_loss": -0.3644, "step": 2980 }, { "acc": 0.0, "epoch": 1.99, "learning_rate": 2e-05, "loss": 0.2532, "rl_loss": -0.5152, "step": 2990 }, { "acc": 0.0, "epoch": 2.0, "learning_rate": 2e-05, "loss": 0.2424, "rl_loss": -0.534, "step": 3000 } ], "max_steps": 14990, "num_train_epochs": 10, "total_flos": 4.873690607616e+17, "trial_name": null, "trial_params": null }