TemporalDifferenceLearning / td_learning_20250802_094606.json
karthik-2905's picture
Upload folder using huggingface_hub
3e1fa1a verified
{
"parameters": {
"alpha": 0.1,
"gamma": 0.9,
"num_states": 5
},
"final_values": [
2.4226557928072148,
4.849139749443705,
6.905915907047484,
8.666970414669663,
0.0
],
"training_metrics": {
"episodes": [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71,
72,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
83,
84,
85,
86,
87,
88,
89,
90,
91,
92,
93,
94,
95,
96,
97,
98,
99
],
"total_rewards": [
8,
9,
7,
4,
9,
5,
9,
6,
5,
9,
4,
8,
1,
10,
5,
4,
3,
8,
9,
4,
5,
3,
8,
1,
6,
1,
9,
6,
6,
6,
7,
10,
9,
7,
10,
7,
9,
9,
8,
3,
9,
10,
10,
9,
3,
8,
10,
2,
3,
9,
8,
9,
8,
6,
8,
6,
9,
10,
6,
-1,
5,
6,
9,
4,
6,
3,
10,
9,
8,
9,
7,
2,
5,
9,
6,
5,
2,
9,
5,
8,
8,
3,
6,
9,
6,
4,
-2,
6,
7,
10,
3,
5,
9,
8,
8,
9,
5,
7,
7,
6
],
"avg_td_error": [
2.033333333333333,
2.2198,
1.8849527285714285,
1.6257408316913282,
2.2409036527050694,
1.7690025253817012,
2.3169964437231814,
1.978934813114277,
1.6878488966174738,
2.1895832397013995,
1.6000185269692841,
2.02395836711244,
1.5361175410162775,
2.469452984396626,
1.7584882628186589,
1.745363459935443,
1.90793710160227,
2.025191967182734,
2.145936970564764,
1.5388025637478056,
1.9761181412232216,
1.8770185214543695,
1.9892156046073115,
1.6225413892800722,
1.6588580331966507,
1.6516402653241946,
2.2051790946867986,
2.0894924039604943,
1.9374763482513744,
2.008228806269229,
1.8974765410094696,
2.014410223583452,
1.8422867278637896,
1.9272140535779434,
1.827365199248982,
1.6439986499800294,
1.6374624768898347,
1.7130435522332745,
1.5189397393629755,
1.6070139792429077,
1.5860249958056498,
1.6251978458303251,
1.557803194879734,
1.6149840263733573,
1.6422754130249115,
1.4630010126666715,
1.3864051214373625,
1.495856110881942,
1.5782812220684717,
1.557254977933596,
1.536539441597822,
1.4905745124609602,
1.4781581012559315,
1.5559644975905367,
1.4026481697559576,
1.5569442976587937,
1.2885102835021471,
1.1610793298558097,
1.5162915375210728,
1.6121247717681355,
1.456387593164241,
1.3548005997891672,
1.3428628849243989,
1.6177717713467825,
1.5288949041919568,
1.5699277700788898,
1.1858354218459601,
1.2221588406121118,
1.3507695261772976,
1.327836703796593,
1.344935083414974,
1.5873649238349878,
1.3643663650832445,
1.3551087751566147,
1.4189498457533678,
1.4515385026130485,
1.4397077280812531,
1.3926190335198008,
1.4856597326497052,
1.367812878320538,
1.5102663641085339,
1.507945820519064,
1.3966365764464983,
1.4342109103349971,
1.487238604169598,
1.4895058191802542,
1.430271607104611,
1.6931310004900475,
1.4680902614624227,
1.515934311614811,
1.5736626654455783,
1.6133770921164656,
1.5440603605651673,
1.4784728828511058,
1.583378414179804,
1.5073781163363513,
1.3810227729215594,
1.4142614946350214,
1.5200640494865278,
1.544714832127803
],
"convergence_rate": [
0,
0.7781715502381207,
1.1117538646385106,
1.4507348744995145,
1.7009513831849405,
1.890649036642109,
2.0320663048041787,
2.0688967224456403,
2.3006734005090417,
2.4619316466723267,
2.674558949250953,
2.8081555505389235,
2.9725483338507304,
3.0553800490418292,
3.12524268846074,
3.1704349215107412,
3.059689167517758,
3.131249254436853,
3.1979464341094874,
3.345768705399018,
3.2406950843562705,
3.138946647278955,
3.1859319990387505,
3.233038176188746,
3.3373152796793892,
3.310414565644266,
3.313946852974448,
3.157876959147332,
3.092572276595742,
2.961462142114039,
2.9519940228940786,
2.997550492499237,
3.0421394577556784,
2.9321107379154085,
2.977542854312515,
3.018616315807758,
3.0742557573571085,
3.088796321479315,
3.153624081357507,
3.1236988163642856,
3.164624457921428,
3.1855624822385304,
3.20563646767002,
3.1755925272375434,
3.08804408095583,
3.1165353755634233,
3.1459017666320825,
3.1491791843626546,
3.117705081895419,
3.1000448353140135,
3.087029467034646,
3.0712786535410226,
3.0633614761996393,
3.0004314340325298,
3.006381932100693,
2.938052252372418,
2.9777411982844875,
3.01818975893983,
2.943920364154121,
2.7704227943063673,
2.7925643456754434,
2.8627591466897275,
2.8957189075231224,
2.757541248386892,
2.6948922289605712,
2.5854394197292865,
2.653714776600433,
2.7266676203754026,
2.751972847966562,
2.765548022599627,
2.8148586423377204,
2.682899341029985,
2.75525476730875,
2.767112166745431,
2.7942972417045358,
2.7851417313300115,
2.86125043058861,
2.868271288938607,
2.8470369941372438,
2.8902115755167674,
2.8498945363125308,
2.8317140065541695,
2.8858706208142237,
2.8906811185287036,
2.8859089042381227,
2.8962631778977226,
3.0107515418014326,
2.9439125558689763,
3.0142958072578545,
3.050507878202059,
3.0130613944417437,
2.976273213176825,
2.9969407755218755,
3.031466602051056,
2.9996474512677818,
2.9884178703760855,
3.080170567069491,
3.1371114548062136,
3.128055155097193,
3.0924285017068938
]
},
"episode_rewards": [
8,
9,
7,
4,
9,
5,
9,
6,
5,
9,
4,
8,
1,
10,
5,
4,
3,
8,
9,
4,
5,
3,
8,
1,
6,
1,
9,
6,
6,
6,
7,
10,
9,
7,
10,
7,
9,
9,
8,
3,
9,
10,
10,
9,
3,
8,
10,
2,
3,
9,
8,
9,
8,
6,
8,
6,
9,
10,
6,
-1,
5,
6,
9,
4,
6,
3,
10,
9,
8,
9,
7,
2,
5,
9,
6,
5,
2,
9,
5,
8,
8,
3,
6,
9,
6,
4,
-2,
6,
7,
10,
3,
5,
9,
8,
8,
9,
5,
7,
7,
6
]
}