{ "parameters": { "alpha": 0.1, "gamma": 0.9, "num_states": 5 }, "final_values": [ 2.4226557928072148, 4.849139749443705, 6.905915907047484, 8.666970414669663, 0.0 ], "training_metrics": { "episodes": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 ], "total_rewards": [ 8, 9, 7, 4, 9, 5, 9, 6, 5, 9, 4, 8, 1, 10, 5, 4, 3, 8, 9, 4, 5, 3, 8, 1, 6, 1, 9, 6, 6, 6, 7, 10, 9, 7, 10, 7, 9, 9, 8, 3, 9, 10, 10, 9, 3, 8, 10, 2, 3, 9, 8, 9, 8, 6, 8, 6, 9, 10, 6, -1, 5, 6, 9, 4, 6, 3, 10, 9, 8, 9, 7, 2, 5, 9, 6, 5, 2, 9, 5, 8, 8, 3, 6, 9, 6, 4, -2, 6, 7, 10, 3, 5, 9, 8, 8, 9, 5, 7, 7, 6 ], "avg_td_error": [ 2.033333333333333, 2.2198, 1.8849527285714285, 1.6257408316913282, 2.2409036527050694, 1.7690025253817012, 2.3169964437231814, 1.978934813114277, 1.6878488966174738, 2.1895832397013995, 1.6000185269692841, 2.02395836711244, 1.5361175410162775, 2.469452984396626, 1.7584882628186589, 1.745363459935443, 1.90793710160227, 2.025191967182734, 2.145936970564764, 1.5388025637478056, 1.9761181412232216, 1.8770185214543695, 1.9892156046073115, 1.6225413892800722, 1.6588580331966507, 1.6516402653241946, 2.2051790946867986, 2.0894924039604943, 1.9374763482513744, 2.008228806269229, 1.8974765410094696, 2.014410223583452, 1.8422867278637896, 1.9272140535779434, 1.827365199248982, 1.6439986499800294, 1.6374624768898347, 1.7130435522332745, 1.5189397393629755, 1.6070139792429077, 1.5860249958056498, 1.6251978458303251, 1.557803194879734, 1.6149840263733573, 1.6422754130249115, 1.4630010126666715, 1.3864051214373625, 1.495856110881942, 1.5782812220684717, 1.557254977933596, 1.536539441597822, 1.4905745124609602, 1.4781581012559315, 1.5559644975905367, 1.4026481697559576, 1.5569442976587937, 1.2885102835021471, 1.1610793298558097, 1.5162915375210728, 1.6121247717681355, 1.456387593164241, 1.3548005997891672, 1.3428628849243989, 1.6177717713467825, 1.5288949041919568, 1.5699277700788898, 1.1858354218459601, 1.2221588406121118, 1.3507695261772976, 1.327836703796593, 1.344935083414974, 1.5873649238349878, 1.3643663650832445, 1.3551087751566147, 1.4189498457533678, 1.4515385026130485, 1.4397077280812531, 1.3926190335198008, 1.4856597326497052, 1.367812878320538, 1.5102663641085339, 1.507945820519064, 1.3966365764464983, 1.4342109103349971, 1.487238604169598, 1.4895058191802542, 1.430271607104611, 1.6931310004900475, 1.4680902614624227, 1.515934311614811, 1.5736626654455783, 1.6133770921164656, 1.5440603605651673, 1.4784728828511058, 1.583378414179804, 1.5073781163363513, 1.3810227729215594, 1.4142614946350214, 1.5200640494865278, 1.544714832127803 ], "convergence_rate": [ 0, 0.7781715502381207, 1.1117538646385106, 1.4507348744995145, 1.7009513831849405, 1.890649036642109, 2.0320663048041787, 2.0688967224456403, 2.3006734005090417, 2.4619316466723267, 2.674558949250953, 2.8081555505389235, 2.9725483338507304, 3.0553800490418292, 3.12524268846074, 3.1704349215107412, 3.059689167517758, 3.131249254436853, 3.1979464341094874, 3.345768705399018, 3.2406950843562705, 3.138946647278955, 3.1859319990387505, 3.233038176188746, 3.3373152796793892, 3.310414565644266, 3.313946852974448, 3.157876959147332, 3.092572276595742, 2.961462142114039, 2.9519940228940786, 2.997550492499237, 3.0421394577556784, 2.9321107379154085, 2.977542854312515, 3.018616315807758, 3.0742557573571085, 3.088796321479315, 3.153624081357507, 3.1236988163642856, 3.164624457921428, 3.1855624822385304, 3.20563646767002, 3.1755925272375434, 3.08804408095583, 3.1165353755634233, 3.1459017666320825, 3.1491791843626546, 3.117705081895419, 3.1000448353140135, 3.087029467034646, 3.0712786535410226, 3.0633614761996393, 3.0004314340325298, 3.006381932100693, 2.938052252372418, 2.9777411982844875, 3.01818975893983, 2.943920364154121, 2.7704227943063673, 2.7925643456754434, 2.8627591466897275, 2.8957189075231224, 2.757541248386892, 2.6948922289605712, 2.5854394197292865, 2.653714776600433, 2.7266676203754026, 2.751972847966562, 2.765548022599627, 2.8148586423377204, 2.682899341029985, 2.75525476730875, 2.767112166745431, 2.7942972417045358, 2.7851417313300115, 2.86125043058861, 2.868271288938607, 2.8470369941372438, 2.8902115755167674, 2.8498945363125308, 2.8317140065541695, 2.8858706208142237, 2.8906811185287036, 2.8859089042381227, 2.8962631778977226, 3.0107515418014326, 2.9439125558689763, 3.0142958072578545, 3.050507878202059, 3.0130613944417437, 2.976273213176825, 2.9969407755218755, 3.031466602051056, 2.9996474512677818, 2.9884178703760855, 3.080170567069491, 3.1371114548062136, 3.128055155097193, 3.0924285017068938 ] }, "episode_rewards": [ 8, 9, 7, 4, 9, 5, 9, 6, 5, 9, 4, 8, 1, 10, 5, 4, 3, 8, 9, 4, 5, 3, 8, 1, 6, 1, 9, 6, 6, 6, 7, 10, 9, 7, 10, 7, 9, 9, 8, 3, 9, 10, 10, 9, 3, 8, 10, 2, 3, 9, 8, 9, 8, 6, 8, 6, 9, 10, 6, -1, 5, 6, 9, 4, 6, 3, 10, 9, 8, 9, 7, 2, 5, 9, 6, 5, 2, 9, 5, 8, 8, 3, 6, 9, 6, 4, -2, 6, 7, 10, 3, 5, 9, 8, 8, 9, 5, 7, 7, 6 ] }