Instructions to use garvitsachdeva/spindleflow-rl with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- stable-baselines3
How to use garvitsachdeva/spindleflow-rl with stable-baselines3:
from huggingface_sb3 import load_from_hub checkpoint = load_from_hub( repo_id="garvitsachdeva/spindleflow-rl", filename="{MODEL FILENAME}.zip", ) - Notebooks
- Google Colab
- Kaggle
Checkpoint at step 20,000
Browse files- reward_curve.json +1 -1
- reward_curve.png +0 -0
- spindleflow_model_latest.zip +2 -2
- training_log.txt +100 -0
- vec_normalize_latest.pkl +1 -1
reward_curve.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"episodes": [0,
|
|
|
|
| 1 |
+
{"episodes": [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630, 672, 714, 756, 798, 840, 882, 924, 966, 1008, 1050, 1092, 1134, 1176, 1218, 1260, 1302, 1344, 1386, 1428, 1470, 1512, 1554, 1596, 1638, 1680, 1722, 1764, 1806, 1848, 1890, 1932, 1974, 2016, 2058, 2100, 2142, 2184, 2226, 2268, 2310, 2352, 2394, 2436, 2478, 2520, 2562, 2604, 2646, 2688, 2730, 2772, 2814, 2856, 2898, 2940, 2982, 3024, 3066, 3108, 3150, 3192, 3234, 3276, 3318, 3360, 3402, 3444, 3486, 3528, 3570, 3612, 3654, 3696, 3738, 3780, 3822, 3864, 3906, 3948, 3990, 4032, 4074, 4116, 4158, 4200, 4242, 4284, 4326, 4368, 4410, 4452, 4494, 4536, 4578, 4620, 4662, 4704, 4746, 4788, 4830, 4872, 4914, 4956, 4998, 5040, 5082, 5124, 5166, 5208, 5250, 5292, 5334, 5376, 5418, 5460, 5502, 5544, 5586, 5628, 5670, 5712, 5754, 5796, 5838, 5880, 5922, 5964, 6006, 6048, 6090, 6132, 6174, 6216, 6258, 6300, 6342, 6384, 6426, 6468, 6510, 6552, 6594, 6636, 6678, 6720, 6762, 6804, 6846, 6888, 6930, 6972, 7014, 7056, 7098, 7140, 7182, 7224, 7266, 7308, 7350, 7392, 7434, 7476, 7518, 7560, 7602, 7644, 7686, 7728, 7770, 7812, 7854, 7896, 7938, 7980, 8022, 8064, 8106, 8148, 8190, 8232, 8274, 8316, 8358, 8400, 8442], "mean_rewards": [7.869385480880737, -0.2949352840524773, -0.4952996876748169, -0.5199973956043795, -0.6174052359106449, -0.5373458652470208, -0.49625358010896226, -0.47238873628804745, -0.4706542715678981, -0.47499917030069083, -0.5006424231640552, -0.5145500502043914, -0.5115612921467543, -0.4771832878161046, -0.4433126796612446, -0.4632618394482635, -0.456389642276643, -0.447543691553808, -0.4684159628484609, -0.4349985238832404, -0.3944579961507599, -0.33797204329737657, -0.2693970410179628, -0.24447371565104234, -0.16345096840347462, -0.11572722794119057, -0.09881766136946543, -0.07444735454782, -0.012611925175835221, 0.022194254436126014, 0.10274628089863877, 0.14136686710913157, 0.1848921677655593, 0.20126053468403318, 0.23584040265556705, 0.2791989072498356, 0.3556912602101154, 0.3711379658791043, 0.41904636950425384, 0.4871723190354148, 0.49190893542309255, 0.5197475980214976, 0.5344401113332012, 0.6119090946877753, 0.6077940996537503, 0.6080971452850137, 0.585951357499481, 0.671064635268127, 0.6593267258980425, 0.6873161014875634, 0.7041500217637118, 0.715485646066537, 0.8034420476211587, 0.7868460612098029, 0.818281575807974, 0.9098733552803441, 0.9931412801042653, 0.9620735797011291, 1.0410858473792617, 1.047098892554579, 1.1130033807529678, 1.1595609719266955, 1.1603956472084083, 1.189735481143492, 1.2016556340301046, 1.171680602527285, 1.1892861477088377, 1.2292941701027371, 1.2535419590674552, 1.3050417096731868, 1.2991100276143834, 1.3035879157426469, 1.3206478618678443, 1.3221849669693566, 1.3032227260201186, 1.3922748728918861, 1.3904742702253092, 1.4075497126268257, 1.3999544971508309, 1.3585951193719048, 1.401334112760488, 1.4241763773255647, 1.4504369786470461, 1.4591521133695078, 1.4979405961359658, 1.5108637377347376, 1.4940121352655005, 1.5021781849527713, 1.5182731466857664, 1.5629961201185272, 1.515429445260143, 1.5572113326897863, 1.5093122213048737, 1.5162708755763235, 1.4925893841122984, 1.5068567290713786, 1.5223539526421106, 1.5419064695666267, 1.561460198238459, 1.570351040876253, 1.570216936346216, 1.5761766723768438, 1.6068325873735065, 1.613772955745072, 1.6552447412857119, 1.6462390915587333, 1.6372316681285122, 1.6073656982873457, 1.592204071781504, 1.6016886271615802, 1.6301611374807043, 1.6145569434889557, 1.6107396750306024, 1.6238551464572462, 1.6357820498482878, 1.6619270062760305, 1.6894643379106677, 1.7275706677727665, 1.785425909732541, 1.7937826996195274, 1.8181767136666853, 1.8143030544116936, 1.8381750764328555, 1.8158522040225162, 1.8260731318941392, 1.7974370324934497, 1.7981947634833424, 1.7741890611028999, 1.7521160172363348, 1.712481968780817, 1.7148109309071617, 1.6759713221748276, 1.6941063414843431, 1.7073139056482656, 1.7217200488049234, 1.7622721078142551, 1.7518380036230807, 1.735415602409976, 1.7477551485732514, 1.7851004104705457, 1.7679061309746378, 1.7976991302610532, 1.8257586488759425, 1.8042287188559472, 1.777249003771259, 1.7483236596297775, 1.7362275231376805, 1.7692375490285759, 1.7370202693765127, 1.7360723117135592, 1.768777926935451, 1.7647074289960176, 1.7475759104234487, 1.7903672372657002, 1.8103490486842106, 1.8558027431794952, 1.9250150367657144, 1.9203967902093317, 1.9439025071849343, 1.938004134038907, 1.9217347231532123, 1.9146441257001947, 1.9125079813831758, 1.8749578054648393, 1.825682910269605, 1.7965219531802434, 1.7844053784496248, 1.7467980947755146, 1.7836011220095076, 1.7822839737896432, 1.7661089524238223, 1.786190782425039, 1.8014363352104172, 1.820912442409092, 1.822568523602693, 1.8153345679073816, 1.822242390221223, 1.8705398875596453, 1.8258761658026004, 1.8366791110157412, 1.8185347392055466, 1.8701389919316291, 1.8588750906604923, 1.8892388550459214, 1.9062272127155426, 1.9422058453827322, 1.9403631834111195, 1.9122930205721442, 1.894687196589075, 1.9080439570144627, 1.8851423501893703, 1.8484355723748933, 1.84887556860856, 1.8620412626725464, 1.8754797673186745, 1.8643548319002017, 1.856698218289376, 1.9023429482652507, 1.9486914368913035, 1.9762877621797774, 2.0026946232307106, 2.019449805252184], "raw_rewards": [7.869385480880737, 0.30212289094924927, -0.1653035283088684, -0.8742947578430176, -1.4332927465438843, 0.9507952705025673, -1.6128928735852242, -1.4756847620010376, 0.12714916467666626, -1.4666680097579956, -1.7659942060709, 0.0034030526876449585, -1.0772457867860794, -0.939087450504303, -0.09268271923065186, -0.2025727927684784, 1.2409679293632507, -1.549471378326416, -2.000951513648033, 1.5722772777080536, -1.3175745010375977, 2.127745598554611, 3.3618789315223694, -1.929141715168953, 0.09446060657501221, -0.08261801302433014, 0.2618691325187683, -0.5134999752044678, -2.073127508163452, 0.27165186405181885, 1.1141240745782852, 0.22413843870162964, 0.6378590166568756, 0.7410027906298637, -1.7340275347232819, -1.6151320934295654, -1.6202170848846436, 2.1107622385025024, 1.6147027611732483, 2.2008373141288757, 0.20370317995548248, 0.1037561446428299, 0.7834433019161224, 3.9312584400177, 1.9446720480918884, 0.6823182106018066, -0.11399078369140625, 4.084675073623657, 0.44461843371391296, 1.9997044205665588, -1.3315133452415466, 1.0650655627250671, 1.8860607743263245, -0.960464209318161, -0.8703119158744812, 2.588159680366516, 2.276316821575165, 0.3519379496574402, 3.8637195825576782, 0.9018396735191345, 1.850459337234497, 3.7698827385902405, 2.1691126227378845, -0.4405495524406433, -0.8176892399787903, 2.8762494027614594, 0.05018448829650879, -1.7433454543352127, 3.2934361696243286, 0.27119821310043335, 0.051368117332458496, 3.860495090484619, 1.6872042417526245, -1.595702052116394, 0.2707443833351135, 2.98825940489769, 2.3140229284763336, -0.6839565932750702, 1.537642002105713, 2.6002658009529114, 2.653698205947876, 2.018460303544998, 2.1577287912368774, 1.5650858879089355, 1.5288212597370148, 1.0946676433086395, 0.005232870578765869, -0.5721810460090637, 2.591393381357193, 3.504615902900696, -0.6234785914421082, 1.2751400172710419, 2.967794418334961, 0.6752512454986572, 0.929683268070221, 2.873240500688553, 1.3088274002075195, -1.563977837562561, 1.0894655883312225, 1.6831098198890686, 2.79997581243515, 0.9091663900762796, 1.039250373840332, -0.2278135120868683, 2.2370247542858124, 0.9630968570709229, 2.1286914348602295, 1.1031403839588165, 2.2346373796463013, 0.984773188829422, 3.545325219631195, 0.7524199932813644, 1.964707374572754, 0.95658740401268, 1.6900435984134674, 0.7646693168208003, 0.784693107008934, 2.9248103499412537, 2.764322817325592, 2.37806236743927, 1.6957352757453918, 2.5313061475753784, 2.9660938680171967, 2.546180844306946, 1.4264174401760101, 2.6010509729385376, 3.131833851337433, 2.490403026342392, 2.2305434346199036, 0.45485207438468933, -0.2049075961112976, 3.9700870513916016, 2.6260368824005127, 1.745518147945404, 2.2572388648986816, 1.7456613183021545, 2.950973927974701, 1.3027393817901611, 1.7355260252952576, 2.3226271867752075, 0.8437174558639526, 2.2532759606838226, 1.7347663044929504, -0.0050980448722839355, 1.169892281293869, 0.5850276499986649, 1.1715667843818665, 0.593328595161438, 1.353189766407013, 0.7872850801795721, -1.6005812138319016, -1.0769227743148804, 2.288356304168701, 3.108502149581909, 0.614934578537941, 0.6843967437744141, 2.204077661037445, -0.0004414021968841553, 3.388222098350525, 2.3059264421463013, 2.2062636613845825, -0.07324928045272827, 1.3386163115501404, 1.6970834732055664, 2.4391518235206604, 0.4920702278614044, 0.9900135844945908, 2.2905810475349426, 0.4571004608296789, 1.2120925784111023, 2.885680675506592, 1.2613102197647095, 0.6581690907478333, 3.0120148062705994, 2.863319218158722, 2.0909981727600098, 1.5308949947357178, 1.694356083869934, 1.9248302578926086, 2.037512093782425, 0.16763997077941895, 2.313068985939026, 0.20004019141197205, 2.676566481590271, 1.5560699701309204, 2.6811817288398743, 2.6683337092399597, 0.7397091090679169, 0.6628532111644745, 3.093649685382843, 3.0954139828681946, 1.6091610789299011, -0.8415878489613533, 1.2594474256038666, 3.1203470826148987, 1.0595019459724426, 0.6205319166183472, 2.963775932788849, 0.620332658290863, 2.686708927154541, 0.3639736622571945, 2.836131751537323], "step": 20000}
|
reward_curve.png
CHANGED
|
|
spindleflow_model_latest.zip
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7aec515b0112638963c9c37fec3156be35df48402601b9a342c59b24557ae238
|
| 3 |
+
size 143819553
|
training_log.txt
CHANGED
|
@@ -265,3 +265,103 @@
|
|
| 265 |
[08:15:53] Ep 6000 | reward +3.170 | Phase 3/3 | Rolling mean: 1.835 / β | Episodes in phase: 3775
|
| 266 |
[08:16:08] Ep 6025 | reward +0.068 | Phase 3/3 | Rolling mean: 1.834 / β | Episodes in phase: 3800
|
| 267 |
[08:16:08] Periodic save at step 15,000 ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
[08:15:53] Ep 6000 | reward +3.170 | Phase 3/3 | Rolling mean: 1.835 / β | Episodes in phase: 3775
|
| 266 |
[08:16:08] Ep 6025 | reward +0.068 | Phase 3/3 | Rolling mean: 1.834 / β | Episodes in phase: 3800
|
| 267 |
[08:16:08] Periodic save at step 15,000 ...
|
| 268 |
+
[08:16:11] Periodic push done β 5 files at step 15,000
|
| 269 |
+
[08:16:12] Ep 6050 | reward +1.294 | Phase 3/3 | Rolling mean: 1.760 / β | Episodes in phase: 3825
|
| 270 |
+
[08:16:13] Ep 6075 | reward -0.963 | Phase 3/3 | Rolling mean: 1.749 / β | Episodes in phase: 3850
|
| 271 |
+
[08:16:15] Ep 6100 | reward +1.004 | Phase 3/3 | Rolling mean: 1.748 / β | Episodes in phase: 3875
|
| 272 |
+
[08:16:30] Ep 6125 | reward +1.414 | Phase 3/3 | Rolling mean: 1.696 / β | Episodes in phase: 3900
|
| 273 |
+
[08:16:32] Ep 6150 | reward +1.373 | Phase 3/3 | Rolling mean: 1.669 / β | Episodes in phase: 3925
|
| 274 |
+
[08:16:33] Ep 6175 | reward +0.593 | Phase 3/3 | Rolling mean: 1.626 / β | Episodes in phase: 3950
|
| 275 |
+
[08:16:34] Ep 6200 | reward +2.699 | Phase 3/3 | Rolling mean: 1.643 / β | Episodes in phase: 3975
|
| 276 |
+
[08:16:54] Ep 6225 | reward +2.586 | Phase 3/3 | Rolling mean: 1.619 / β | Episodes in phase: 4000
|
| 277 |
+
[08:16:55] Ep 6250 | reward +1.268 | Phase 3/3 | Rolling mean: 1.669 / β | Episodes in phase: 4025
|
| 278 |
+
[08:16:57] Ep 6275 | reward +3.113 | Phase 3/3 | Rolling mean: 1.792 / β | Episodes in phase: 4050
|
| 279 |
+
[08:16:58] Ep 6300 | reward +2.942 | Phase 3/3 | Rolling mean: 1.832 / β | Episodes in phase: 4075
|
| 280 |
+
[08:17:16] Ep 6325 | reward +2.941 | Phase 3/3 | Rolling mean: 1.865 / β | Episodes in phase: 4100
|
| 281 |
+
[08:17:17] Ep 6350 | reward +1.657 | Phase 3/3 | Rolling mean: 1.826 / β | Episodes in phase: 4125
|
| 282 |
+
[08:17:19] Ep 6375 | reward +1.085 | Phase 3/3 | Rolling mean: 1.824 / β | Episodes in phase: 4150
|
| 283 |
+
[08:17:20] Ep 6400 | reward +0.326 | Phase 3/3 | Rolling mean: 1.899 / β | Episodes in phase: 4175
|
| 284 |
+
[08:17:34] Ep 6425 | reward -0.289 | Phase 3/3 | Rolling mean: 1.936 / β | Episodes in phase: 4200
|
| 285 |
+
[08:17:35] Ep 6450 | reward +0.661 | Phase 3/3 | Rolling mean: 1.959 / β | Episodes in phase: 4225
|
| 286 |
+
[08:17:39] Ep 6475 | reward +3.628 | Phase 3/3 | Rolling mean: 1.923 / β | Episodes in phase: 4250
|
| 287 |
+
[08:17:41] Ep 6500 | reward +0.684 | Phase 3/3 | Rolling mean: 1.907 / β | Episodes in phase: 4275
|
| 288 |
+
[08:17:58] Ep 6525 | reward +2.853 | Phase 3/3 | Rolling mean: 1.924 / β | Episodes in phase: 4300
|
| 289 |
+
[08:17:59] Ep 6550 | reward +3.239 | Phase 3/3 | Rolling mean: 1.995 / β | Episodes in phase: 4325
|
| 290 |
+
[08:18:01] Ep 6575 | reward -1.630 | Phase 3/3 | Rolling mean: 2.006 / β | Episodes in phase: 4350
|
| 291 |
+
[08:18:02] Ep 6600 | reward +2.893 | Phase 3/3 | Rolling mean: 1.982 / β | Episodes in phase: 4375
|
| 292 |
+
[08:18:20] Ep 6625 | reward +1.059 | Phase 3/3 | Rolling mean: 1.976 / β | Episodes in phase: 4400
|
| 293 |
+
[08:18:21] Ep 6650 | reward +3.015 | Phase 3/3 | Rolling mean: 1.982 / β | Episodes in phase: 4425
|
| 294 |
+
[08:18:22] Ep 6675 | reward +1.059 | Phase 3/3 | Rolling mean: 1.907 / β | Episodes in phase: 4450
|
| 295 |
+
[08:18:23] Ep 6700 | reward +0.000 | Phase 3/3 | Rolling mean: 1.970 / β | Episodes in phase: 4475
|
| 296 |
+
[08:18:41] Ep 6725 | reward +2.527 | Phase 3/3 | Rolling mean: 1.933 / β | Episodes in phase: 4500
|
| 297 |
+
[08:18:43] Ep 6750 | reward +2.207 | Phase 3/3 | Rolling mean: 1.882 / β | Episodes in phase: 4525
|
| 298 |
+
[08:18:44] Ep 6775 | reward +0.991 | Phase 3/3 | Rolling mean: 1.876 / β | Episodes in phase: 4550
|
| 299 |
+
[08:18:45] Ep 6800 | reward +1.038 | Phase 3/3 | Rolling mean: 1.838 / β | Episodes in phase: 4575
|
| 300 |
+
[08:19:00] Ep 6825 | reward +1.530 | Phase 3/3 | Rolling mean: 1.793 / β | Episodes in phase: 4600
|
| 301 |
+
[08:19:01] Ep 6850 | reward -0.560 | Phase 3/3 | Rolling mean: 1.732 / β | Episodes in phase: 4625
|
| 302 |
+
[08:19:02] Ep 6875 | reward +2.855 | Phase 3/3 | Rolling mean: 1.745 / β | Episodes in phase: 4650
|
| 303 |
+
[08:19:04] Ep 6900 | reward +2.213 | Phase 3/3 | Rolling mean: 1.663 / β | Episodes in phase: 4675
|
| 304 |
+
[08:19:18] Ep 6925 | reward +2.214 | Phase 3/3 | Rolling mean: 1.676 / β | Episodes in phase: 4700
|
| 305 |
+
[08:19:20] Ep 6950 | reward +0.990 | Phase 3/3 | Rolling mean: 1.641 / β | Episodes in phase: 4725
|
| 306 |
+
[08:19:23] Ep 6975 | reward +0.355 | Phase 3/3 | Rolling mean: 1.665 / β | Episodes in phase: 4750
|
| 307 |
+
[08:19:24] Ep 7000 | reward +2.411 | Phase 3/3 | Rolling mean: 1.658 / β | Episodes in phase: 4775
|
| 308 |
+
[08:19:39] Ep 7025 | reward +0.505 | Phase 3/3 | Rolling mean: 1.714 / β | Episodes in phase: 4800
|
| 309 |
+
[08:19:40] Ep 7050 | reward +2.409 | Phase 3/3 | Rolling mean: 1.794 / β | Episodes in phase: 4825
|
| 310 |
+
[08:19:41] Ep 7075 | reward +2.886 | Phase 3/3 | Rolling mean: 1.792 / β | Episodes in phase: 4850
|
| 311 |
+
[08:19:43] Ep 7100 | reward +2.091 | Phase 3/3 | Rolling mean: 1.843 / β | Episodes in phase: 4875
|
| 312 |
+
[08:19:59] Ep 7125 | reward +0.874 | Phase 3/3 | Rolling mean: 1.878 / β | Episodes in phase: 4900
|
| 313 |
+
[08:20:00] Ep 7150 | reward +2.551 | Phase 3/3 | Rolling mean: 1.926 / β | Episodes in phase: 4925
|
| 314 |
+
[08:20:01] Ep 7175 | reward +2.947 | Phase 3/3 | Rolling mean: 1.910 / β | Episodes in phase: 4950
|
| 315 |
+
[08:20:02] Ep 7200 | reward +1.366 | Phase 3/3 | Rolling mean: 1.940 / β | Episodes in phase: 4975
|
| 316 |
+
[08:20:20] Ep 7225 | reward +0.658 | Phase 3/3 | Rolling mean: 1.939 / β | Episodes in phase: 5000
|
| 317 |
+
[08:20:21] Ep 7250 | reward +0.482 | Phase 3/3 | Rolling mean: 1.899 / β | Episodes in phase: 5025
|
| 318 |
+
[08:20:22] Ep 7275 | reward +1.392 | Phase 3/3 | Rolling mean: 1.878 / β | Episodes in phase: 5050
|
| 319 |
+
[08:20:24] Ep 7300 | reward +0.991 | Phase 3/3 | Rolling mean: 1.802 / β | Episodes in phase: 5075
|
| 320 |
+
[08:20:40] Ep 7325 | reward +1.144 | Phase 3/3 | Rolling mean: 1.774 / β | Episodes in phase: 5100
|
| 321 |
+
[08:20:41] Ep 7350 | reward +1.261 | Phase 3/3 | Rolling mean: 1.749 / β | Episodes in phase: 5125
|
| 322 |
+
[08:20:42] Ep 7375 | reward +2.112 | Phase 3/3 | Rolling mean: 1.724 / β | Episodes in phase: 5150
|
| 323 |
+
[08:20:43] Ep 7400 | reward +2.036 | Phase 3/3 | Rolling mean: 1.751 / β | Episodes in phase: 5175
|
| 324 |
+
[08:20:59] Ep 7425 | reward +2.308 | Phase 3/3 | Rolling mean: 1.775 / β | Episodes in phase: 5200
|
| 325 |
+
[08:21:02] Ep 7450 | reward +1.764 | Phase 3/3 | Rolling mean: 1.743 / β | Episodes in phase: 5225
|
| 326 |
+
[08:21:03] Ep 7475 | reward +1.355 | Phase 3/3 | Rolling mean: 1.790 / β | Episodes in phase: 5250
|
| 327 |
+
[08:21:04] Ep 7500 | reward +1.752 | Phase 3/3 | Rolling mean: 1.847 / β | Episodes in phase: 5275
|
| 328 |
+
[08:21:20] Ep 7525 | reward +1.202 | Phase 3/3 | Rolling mean: 1.866 / β | Episodes in phase: 5300
|
| 329 |
+
[08:21:21] Ep 7550 | reward +1.265 | Phase 3/3 | Rolling mean: 1.907 / β | Episodes in phase: 5325
|
| 330 |
+
[08:21:22] Ep 7575 | reward +2.410 | Phase 3/3 | Rolling mean: 1.918 / β | Episodes in phase: 5350
|
| 331 |
+
[08:21:24] Ep 7600 | reward +2.552 | Phase 3/3 | Rolling mean: 1.963 / β | Episodes in phase: 5375
|
| 332 |
+
[08:21:37] Ep 7625 | reward +3.110 | Phase 3/3 | Rolling mean: 1.961 / β | Episodes in phase: 5400
|
| 333 |
+
[08:21:38] Ep 7650 | reward +0.200 | Phase 3/3 | Rolling mean: 1.953 / β | Episodes in phase: 5425
|
| 334 |
+
[08:21:40] Ep 7675 | reward +2.411 | Phase 3/3 | Rolling mean: 1.982 / β | Episodes in phase: 5450
|
| 335 |
+
[08:21:41] Ep 7700 | reward +1.059 | Phase 3/3 | Rolling mean: 1.966 / β | Episodes in phase: 5475
|
| 336 |
+
[08:21:57] Ep 7725 | reward +3.306 | Phase 3/3 | Rolling mean: 1.974 / β | Episodes in phase: 5500
|
| 337 |
+
[08:21:58] Ep 7750 | reward +2.317 | Phase 3/3 | Rolling mean: 1.970 / β | Episodes in phase: 5525
|
| 338 |
+
[08:22:00] Ep 7775 | reward +2.925 | Phase 3/3 | Rolling mean: 2.017 / β | Episodes in phase: 5550
|
| 339 |
+
[08:22:01] Ep 7800 | reward +2.278 | Phase 3/3 | Rolling mean: 1.931 / β | Episodes in phase: 5575
|
| 340 |
+
[08:22:20] Ep 7825 | reward +3.443 | Phase 3/3 | Rolling mean: 1.886 / β | Episodes in phase: 5600
|
| 341 |
+
[08:22:21] Ep 7850 | reward +3.094 | Phase 3/3 | Rolling mean: 1.889 / β | Episodes in phase: 5625
|
| 342 |
+
[08:22:22] Ep 7875 | reward +2.675 | Phase 3/3 | Rolling mean: 1.833 / β | Episodes in phase: 5650
|
| 343 |
+
[08:22:23] Ep 7900 | reward +2.489 | Phase 3/3 | Rolling mean: 1.845 / β | Episodes in phase: 5675
|
| 344 |
+
[08:22:37] Ep 7925 | reward +2.072 | Phase 3/3 | Rolling mean: 1.816 / β | Episodes in phase: 5700
|
| 345 |
+
[08:22:39] Ep 7950 | reward +0.401 | Phase 3/3 | Rolling mean: 1.803 / β | Episodes in phase: 5725
|
| 346 |
+
[08:22:42] Ep 7975 | reward +0.619 | Phase 3/3 | Rolling mean: 1.770 / β | Episodes in phase: 5750
|
| 347 |
+
[08:22:43] Ep 8000 | reward +2.239 | Phase 3/3 | Rolling mean: 1.808 / β | Episodes in phase: 5775
|
| 348 |
+
[08:22:59] Ep 8025 | reward +1.391 | Phase 3/3 | Rolling mean: 1.785 / β | Episodes in phase: 5800
|
| 349 |
+
[08:23:00] Ep 8050 | reward +2.563 | Phase 3/3 | Rolling mean: 1.821 / β | Episodes in phase: 5825
|
| 350 |
+
[08:23:02] Ep 8075 | reward -0.242 | Phase 3/3 | Rolling mean: 1.838 / β | Episodes in phase: 5850
|
| 351 |
+
[08:23:03] Ep 8100 | reward +1.609 | Phase 3/3 | Rolling mean: 1.892 / β | Episodes in phase: 5875
|
| 352 |
+
[08:23:18] Ep 8125 | reward +1.544 | Phase 3/3 | Rolling mean: 1.920 / β | Episodes in phase: 5900
|
| 353 |
+
[08:23:20] Ep 8150 | reward +2.318 | Phase 3/3 | Rolling mean: 1.953 / β | Episodes in phase: 5925
|
| 354 |
+
[08:23:21] Ep 8175 | reward +3.053 | Phase 3/3 | Rolling mean: 1.952 / β | Episodes in phase: 5950
|
| 355 |
+
[08:23:22] Ep 8200 | reward +0.620 | Phase 3/3 | Rolling mean: 1.922 / β | Episodes in phase: 5975
|
| 356 |
+
[08:23:38] Ep 8225 | reward +2.690 | Phase 3/3 | Rolling mean: 1.960 / β | Episodes in phase: 6000
|
| 357 |
+
[08:23:40] Ep 8250 | reward +1.656 | Phase 3/3 | Rolling mean: 1.986 / β | Episodes in phase: 6025
|
| 358 |
+
[08:23:41] Ep 8275 | reward +2.964 | Phase 3/3 | Rolling mean: 1.998 / β | Episodes in phase: 6050
|
| 359 |
+
[08:23:42] Ep 8300 | reward +3.911 | Phase 3/3 | Rolling mean: 1.953 / β | Episodes in phase: 6075
|
| 360 |
+
[08:23:57] Ep 8325 | reward +1.108 | Phase 3/3 | Rolling mean: 1.979 / β | Episodes in phase: 6100
|
| 361 |
+
[08:23:58] Ep 8350 | reward +3.940 | Phase 3/3 | Rolling mean: 2.029 / β | Episodes in phase: 6125
|
| 362 |
+
[08:24:00] Ep 8375 | reward +2.965 | Phase 3/3 | Rolling mean: 2.016 / β | Episodes in phase: 6150
|
| 363 |
+
[08:24:01] Ep 8400 | reward +2.909 | Phase 3/3 | Rolling mean: 2.081 / β | Episodes in phase: 6175
|
| 364 |
+
[08:24:16] Ep 8425 | reward -0.087 | Phase 3/3 | Rolling mean: 2.094 / β | Episodes in phase: 6200
|
| 365 |
+
[08:24:17] Ep 8450 | reward +1.111 | Phase 3/3 | Rolling mean: 2.066 / β | Episodes in phase: 6225
|
| 366 |
+
[08:24:20] Ep 8475 | reward +3.093 | Phase 3/3 | Rolling mean: 2.109 / β | Episodes in phase: 6250
|
| 367 |
+
[08:24:20] Periodic save at step 20,000 ...
|
vec_normalize_latest.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 166596
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74da382d70b483573ff4ffbe3b76bb4cfd839b08d630dbc2b8a60df92c0c7f71
|
| 3 |
size 166596
|