Instructions to use garvitsachdeva/spindleflow-rl with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- stable-baselines3
How to use garvitsachdeva/spindleflow-rl with stable-baselines3:
from huggingface_sb3 import load_from_hub checkpoint = load_from_hub( repo_id="garvitsachdeva/spindleflow-rl", filename="{MODEL FILENAME}.zip", ) - Notebooks
- Google Colab
- Kaggle
Checkpoint at step 25,000
Browse files- reward_curve.json +1 -1
- reward_curve.png +0 -0
- spindleflow_model_latest.zip +1 -1
- training_log.txt +100 -0
- vec_normalize_latest.pkl +1 -1
reward_curve.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"episodes": [0,
|
|
|
|
| 1 |
+
{"episodes": [0, 54, 108, 162, 216, 270, 324, 378, 432, 486, 540, 594, 648, 702, 756, 810, 864, 918, 972, 1026, 1080, 1134, 1188, 1242, 1296, 1350, 1404, 1458, 1512, 1566, 1620, 1674, 1728, 1782, 1836, 1890, 1944, 1998, 2052, 2106, 2160, 2214, 2268, 2322, 2376, 2430, 2484, 2538, 2592, 2646, 2700, 2754, 2808, 2862, 2916, 2970, 3024, 3078, 3132, 3186, 3240, 3294, 3348, 3402, 3456, 3510, 3564, 3618, 3672, 3726, 3780, 3834, 3888, 3942, 3996, 4050, 4104, 4158, 4212, 4266, 4320, 4374, 4428, 4482, 4536, 4590, 4644, 4698, 4752, 4806, 4860, 4914, 4968, 5022, 5076, 5130, 5184, 5238, 5292, 5346, 5400, 5454, 5508, 5562, 5616, 5670, 5724, 5778, 5832, 5886, 5940, 5994, 6048, 6102, 6156, 6210, 6264, 6318, 6372, 6426, 6480, 6534, 6588, 6642, 6696, 6750, 6804, 6858, 6912, 6966, 7020, 7074, 7128, 7182, 7236, 7290, 7344, 7398, 7452, 7506, 7560, 7614, 7668, 7722, 7776, 7830, 7884, 7938, 7992, 8046, 8100, 8154, 8208, 8262, 8316, 8370, 8424, 8478, 8532, 8586, 8640, 8694, 8748, 8802, 8856, 8910, 8964, 9018, 9072, 9126, 9180, 9234, 9288, 9342, 9396, 9450, 9504, 9558, 9612, 9666, 9720, 9774, 9828, 9882, 9936, 9990, 10044, 10098, 10152, 10206, 10260, 10314, 10368, 10422, 10476, 10530, 10584, 10638, 10692, 10746, 10800, 10854, 10908], "mean_rewards": [7.869385480880737, -0.41051984910260547, -0.5492027676365244, -0.6168348340923252, -0.5351173416861603, -0.4764093054678442, -0.47253693533918034, -0.47499917030069083, -0.4891404308417412, -0.5025442564148586, -0.48840301800794667, -0.508802940635636, -0.46954920547538487, -0.42533945731213657, -0.4368745926138775, -0.4193249794919669, -0.4092346332987675, -0.353060252322088, -0.2940995312962056, -0.21927801998549679, -0.20089613068780868, -0.12506907433271408, -0.10114177332402471, -0.008871172632967685, 0.09518664632584002, 0.1262925854581578, 0.16667680813332708, 0.22568784479262566, 0.28477957568396506, 0.296171207164489, 0.3757471558196049, 0.42450997700199256, 0.4808117801295491, 0.5391666152812779, 0.5689808041716409, 0.5712222996449104, 0.5904879566519113, 0.6232858557695095, 0.6342131112949467, 0.6917316244849697, 0.7243372341715663, 0.7537346597256963, 0.789060527155507, 0.8554964856626531, 0.8602999568525496, 0.9521009895790326, 1.009716379080926, 1.051387386374726, 1.1155794020388206, 1.150166751366795, 1.1501598492774119, 1.1837558287689138, 1.213108567848341, 1.2131437046706581, 1.276654483436906, 1.3081771806341318, 1.3261926365713759, 1.2855303687261024, 1.3094462854481799, 1.3246912506211848, 1.3649099320547151, 1.3636714587647496, 1.4078118026861581, 1.4299958327098539, 1.448524320732639, 1.4219702863606902, 1.4664546345262006, 1.4783013143209591, 1.5082530105022551, 1.498775199950455, 1.4922605522485097, 1.5474505744811595, 1.5467187698928802, 1.5193020349385873, 1.5015674076244034, 1.5476209773602607, 1.5425808865028472, 1.5614539348704308, 1.5752295390951334, 1.601019313977224, 1.5956984089713384, 1.5783536112206618, 1.605170402980289, 1.609818503597577, 1.6114819045325042, 1.611166421031901, 1.626127223142819, 1.6224109640191466, 1.62811375268794, 1.6364221707344038, 1.6479085205436252, 1.68300795165462, 1.7199315566838804, 1.7624231781204547, 1.8120945872093999, 1.7900560836224428, 1.8056307126220696, 1.796357822860676, 1.794105003174606, 1.7866614152400977, 1.7825549535948721, 1.7799180618051378, 1.7159151644033828, 1.707240282961322, 1.7067030376529944, 1.7414429096118835, 1.7244301065825942, 1.7394534384313016, 1.7501649395389574, 1.768823225281977, 1.7936280370897266, 1.7753073548622782, 1.7985860085296523, 1.775856651936601, 1.761148449725007, 1.7298969913180418, 1.7499384344927467, 1.7930582820463181, 1.7775360784647882, 1.7903102684193166, 1.8150328708382464, 1.8257946563767191, 1.8465963355418513, 1.8954826608643507, 1.9210255016546431, 1.9286256749034234, 1.9029758592466757, 1.8584531546866923, 1.8594947501572603, 1.8344094237201358, 1.79941314168616, 1.8079466098409875, 1.8152153570509608, 1.8054590580533947, 1.7969712433690948, 1.7882612819430108, 1.7704781028353924, 1.8000406818682146, 1.8211428545306847, 1.8399917912533732, 1.8737868167589338, 1.8774200505343348, 1.8721188200453092, 1.8767408794616383, 1.889315270822169, 1.8973016155200473, 1.9045764394563345, 1.9062862954065067, 1.8882204007109233, 1.885903197714556, 1.8929937056258974, 1.8889157436584143, 1.8742003176619646, 1.8987999117736072, 1.8982257889345777, 1.9233697905769422, 1.9702272060323833, 1.992930272779339, 1.9961104948722832, 2.028669857688547, 1.9892342082619068, 1.9922661232102634, 2.014080071243981, 2.003148631695794, 1.9819643528737727, 1.9620282770742927, 1.9306926963329112, 1.911854676284528, 1.9251795592454404, 1.9032559321120885, 1.940680162529937, 1.9206769538671118, 1.8939569022338225, 1.9223596293718832, 1.9247678831291053, 1.9413379225448395, 1.9664991747436398, 1.9917000998382746, 1.9840289586591762, 1.965516634339595, 1.9864252154801343, 1.9808506768425562, 2.002361669342974, 2.0258850431815967, 2.0045235502224465, 2.0037248894380215, 1.9576798827025401, 1.9127588423060757, 1.9126606692596557, 1.9263554555385196, 1.9400756921152666, 1.9400606237476807, 1.9311223548206493, 1.913246230365892, 1.954121733201586, 1.9481463303932254, 1.9877335088026717, 2.0078176827846352, 2.0425200287990246, 2.0322421691869046, 2.001448381682824, 1.987356471108531, 1.9812557704595468], "raw_rewards": [7.869385480880737, 0.4599737599492073, -1.4565927982330322, -1.4442170858383179, -0.7505795061588287, 0.12662386894226074, -1.4533054828643799, -1.4666680097579956, -1.248689442873001, -1.1987037062644958, -1.3451157063245773, -2.121876023709774, -2.401705786585808, 1.871029555797577, -2.000951513648033, 1.617654800415039, -1.5481922626495361, -0.1357777714729309, 0.5319417044520378, -1.5726486444473267, -0.5888408124446869, -0.5134999752044678, 1.1500522196292877, -1.2830363810062408, 2.7681850492954254, 1.2519995421171188, 1.9592264890670776, -1.6138243675231934, -1.6202170848846436, -1.1529327481985092, 3.0023685693740845, 0.11245410144329071, -1.0573668628931046, -1.6283231973648071, -0.4163784980773926, 0.6823182106018066, -2.69570055603981, 2.345923662185669, 1.8043333292007446, 1.250470757484436, -1.1206167042255402, 1.209741860628128, -0.8703119158744812, 1.5336683094501495, 1.1777045745402575, 3.212405025959015, -1.6204973459243774, 2.229508101940155, 3.6350157856941223, -0.4405495524406433, 1.1129309833049774, 0.40909240394830704, 1.5946876406669617, 2.9371981620788574, 1.0661866962909698, -1.5951862335205078, 1.6872042417526245, 1.1412177383899689, 2.9885196685791016, 1.5259829759597778, 1.2249691784381866, -0.17562654614448547, -0.060656994581222534, 2.018460303544998, 2.586593806743622, 1.5325765013694763, 2.585873991250992, 0.8348989896476269, 2.7364750802516937, 0.5851811468601227, -0.6234785914421082, -0.3203362226486206, -0.05612260103225708, -0.7573689520359039, -1.0328965410590172, 0.43692925572395325, 1.6856676638126373, 1.6831098198890686, 2.5903921127319336, 0.19393183290958405, 1.992238163948059, 1.4300525188446045, -0.3287913426756859, 2.126563787460327, 2.2346373796463013, -0.0103532075881958, 3.960599184036255, 1.9564669132232666, 3.540530562400818, 0.22767803072929382, 1.5510457158088684, 2.9248103499412537, 1.305261641740799, 2.897787034511566, 1.3643755316734314, 1.3663267493247986, 3.1297138929367065, 0.6142806708812714, 3.131833851337433, 2.492442339658737, 2.345623791217804, 2.467067837715149, 0.9856889843940735, -0.20317912101745605, 1.002382606267929, 1.7456613183021545, -1.5415513515472412, 1.7634225487709045, 3.887290835380554, 2.3161216974258423, 1.5524056553840637, 0.06543898582458496, 1.169892281293869, 2.116620749235153, 3.16776305437088, 2.9264339208602905, 3.236627459526062, 2.9213297367095947, 2.2149168252944946, 3.108502149581909, 0.660636343061924, 2.7253071069717407, 1.2900128662586212, 2.9700556993484497, 3.236673951148987, 3.241437554359436, 1.3386163115501404, 0.6383883208036423, 2.8580138087272644, 2.79445818066597, 3.4975720643997192, 2.8863253593444824, 1.201033592224121, 1.2613102197647095, 2.8605822324752808, -0.2421724647283554, 1.5260725319385529, 2.30670964717865, 3.353128671646118, 0.9881656616926193, 0.16763997077941895, 2.20599102973938, 1.3687167763710022, 2.1226362586021423, -0.22471871972084045, 3.111227869987488, 3.1131207942962646, 3.093649685382843, 3.689613163471222, 3.1143359541893005, 1.050121396780014, 2.962377607822418, 0.5182003080844879, 2.6878902316093445, 0.620332658290863, 2.1800947785377502, -0.08711862564086914, 1.4942425191402435, 3.117049276828766, 1.7567228972911835, 3.5543485283851624, 1.0609619319438934, 3.5412749648094177, 1.003387212753296, -0.044568657875061035, 1.9571133255958557, 0.987617015838623, 1.6582836210727692, 2.9391005635261536, 0.6417786628007889, 2.9579904675483704, 3.2891563773155212, 3.2921172380447388, 1.1086584031581879, 3.292155683040619, 1.2546398341655731, 0.45421365124639124, 3.228146195411682, 1.0346056520938873, 1.0338847637176514, 0.9749908745288849, 0.7823807299137115, 0.0020377933979034424, 2.640851616859436, 0.45796782709658146, -0.08856308460235596, 2.266314923763275, 0.9856714010238647, 2.3894476294517517, 2.9557398557662964, 2.0732545256614685, 3.8009579181671143, 2.2961906790733337, 3.6587679386138916, 2.8332625031471252, 3.6581618189811707, 1.9579390287399292, 0.7483655419200659, 3.1734155416488647, 1.7330745458602905, 1.4089505076408386, 2.901742935180664, 2.012480288743973], "step": 25000}
|
reward_curve.png
CHANGED
|
|
spindleflow_model_latest.zip
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 143819553
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4af29b29400e31cb4f9980de6eafdead8e2bd0a405acfdeff3d3543f1d3eb877
|
| 3 |
size 143819553
|
training_log.txt
CHANGED
|
@@ -365,3 +365,103 @@
|
|
| 365 |
[08:24:17] Ep 8450 | reward +1.111 | Phase 3/3 | Rolling mean: 2.066 / β | Episodes in phase: 6225
|
| 366 |
[08:24:20] Ep 8475 | reward +3.093 | Phase 3/3 | Rolling mean: 2.109 / β | Episodes in phase: 6250
|
| 367 |
[08:24:20] Periodic save at step 20,000 ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
[08:24:17] Ep 8450 | reward +1.111 | Phase 3/3 | Rolling mean: 2.066 / β | Episodes in phase: 6225
|
| 366 |
[08:24:20] Ep 8475 | reward +3.093 | Phase 3/3 | Rolling mean: 2.109 / β | Episodes in phase: 6250
|
| 367 |
[08:24:20] Periodic save at step 20,000 ...
|
| 368 |
+
[08:24:24] Periodic push done β 5 files at step 20,000
|
| 369 |
+
[08:24:25] Ep 8500 | reward +3.116 | Phase 3/3 | Rolling mean: 2.121 / β | Episodes in phase: 6275
|
| 370 |
+
[08:24:39] Ep 8525 | reward +1.944 | Phase 3/3 | Rolling mean: 2.051 / β | Episodes in phase: 6300
|
| 371 |
+
[08:24:40] Ep 8550 | reward +2.910 | Phase 3/3 | Rolling mean: 1.994 / β | Episodes in phase: 6325
|
| 372 |
+
[08:24:42] Ep 8575 | reward +2.345 | Phase 3/3 | Rolling mean: 2.057 / β | Episodes in phase: 6350
|
| 373 |
+
[08:24:43] Ep 8600 | reward +1.379 | Phase 3/3 | Rolling mean: 1.980 / β | Episodes in phase: 6375
|
| 374 |
+
[08:24:57] Ep 8625 | reward +0.658 | Phase 3/3 | Rolling mean: 1.964 / β | Episodes in phase: 6400
|
| 375 |
+
[08:24:58] Ep 8650 | reward +1.380 | Phase 3/3 | Rolling mean: 1.911 / β | Episodes in phase: 6425
|
| 376 |
+
[08:25:00] Ep 8675 | reward +1.496 | Phase 3/3 | Rolling mean: 1.918 / β | Episodes in phase: 6450
|
| 377 |
+
[08:25:01] Ep 8700 | reward +1.379 | Phase 3/3 | Rolling mean: 1.919 / β | Episodes in phase: 6475
|
| 378 |
+
[08:25:17] Ep 8725 | reward +1.979 | Phase 3/3 | Rolling mean: 1.922 / β | Episodes in phase: 6500
|
| 379 |
+
[08:25:18] Ep 8750 | reward +1.654 | Phase 3/3 | Rolling mean: 1.968 / β | Episodes in phase: 6525
|
| 380 |
+
[08:25:19] Ep 8775 | reward +3.113 | Phase 3/3 | Rolling mean: 1.884 / β | Episodes in phase: 6550
|
| 381 |
+
[08:25:20] Ep 8800 | reward +2.273 | Phase 3/3 | Rolling mean: 1.943 / β | Episodes in phase: 6575
|
| 382 |
+
[08:25:34] Ep 8825 | reward +2.554 | Phase 3/3 | Rolling mean: 1.984 / β | Episodes in phase: 6600
|
| 383 |
+
[08:25:36] Ep 8850 | reward +2.646 | Phase 3/3 | Rolling mean: 1.994 / β | Episodes in phase: 6625
|
| 384 |
+
[08:25:37] Ep 8875 | reward +2.192 | Phase 3/3 | Rolling mean: 1.889 / β | Episodes in phase: 6650
|
| 385 |
+
[08:25:38] Ep 8900 | reward +1.617 | Phase 3/3 | Rolling mean: 1.906 / β | Episodes in phase: 6675
|
| 386 |
+
[08:25:51] Ep 8925 | reward +2.647 | Phase 3/3 | Rolling mean: 1.952 / β | Episodes in phase: 6700
|
| 387 |
+
[08:25:53] Ep 8950 | reward +2.496 | Phase 3/3 | Rolling mean: 1.884 / β | Episodes in phase: 6725
|
| 388 |
+
[08:25:56] Ep 8975 | reward +2.195 | Phase 3/3 | Rolling mean: 1.936 / β | Episodes in phase: 6750
|
| 389 |
+
[08:25:57] Ep 9000 | reward +2.345 | Phase 3/3 | Rolling mean: 1.899 / β | Episodes in phase: 6775
|
| 390 |
+
[08:26:10] Ep 9025 | reward +2.477 | Phase 3/3 | Rolling mean: 1.844 / β | Episodes in phase: 6800
|
| 391 |
+
[08:26:11] Ep 9050 | reward +2.954 | Phase 3/3 | Rolling mean: 1.885 / β | Episodes in phase: 6825
|
| 392 |
+
[08:26:13] Ep 9075 | reward +1.034 | Phase 3/3 | Rolling mean: 1.946 / β | Episodes in phase: 6850
|
| 393 |
+
[08:26:14] Ep 9100 | reward +2.091 | Phase 3/3 | Rolling mean: 1.933 / β | Episodes in phase: 6875
|
| 394 |
+
[08:26:26] Ep 9125 | reward +2.799 | Phase 3/3 | Rolling mean: 1.908 / β | Episodes in phase: 6900
|
| 395 |
+
[08:26:28] Ep 9150 | reward +1.280 | Phase 3/3 | Rolling mean: 1.920 / β | Episodes in phase: 6925
|
| 396 |
+
[08:26:29] Ep 9175 | reward +2.092 | Phase 3/3 | Rolling mean: 1.903 / β | Episodes in phase: 6950
|
| 397 |
+
[08:26:30] Ep 9200 | reward +2.657 | Phase 3/3 | Rolling mean: 1.945 / β | Episodes in phase: 6975
|
| 398 |
+
[08:26:45] Ep 9225 | reward -1.512 | Phase 3/3 | Rolling mean: 1.969 / β | Episodes in phase: 7000
|
| 399 |
+
[08:26:46] Ep 9250 | reward +1.509 | Phase 3/3 | Rolling mean: 1.948 / β | Episodes in phase: 7025
|
| 400 |
+
[08:26:48] Ep 9275 | reward +1.571 | Phase 3/3 | Rolling mean: 1.916 / β | Episodes in phase: 7050
|
| 401 |
+
[08:26:49] Ep 9300 | reward +2.625 | Phase 3/3 | Rolling mean: 1.896 / β | Episodes in phase: 7075
|
| 402 |
+
[08:27:03] Ep 9325 | reward +2.590 | Phase 3/3 | Rolling mean: 1.909 / β | Episodes in phase: 7100
|
| 403 |
+
[08:27:04] Ep 9350 | reward +1.605 | Phase 3/3 | Rolling mean: 1.943 / β | Episodes in phase: 7125
|
| 404 |
+
[08:27:05] Ep 9375 | reward +0.890 | Phase 3/3 | Rolling mean: 1.975 / β | Episodes in phase: 7150
|
| 405 |
+
[08:27:06] Ep 9400 | reward +2.478 | Phase 3/3 | Rolling mean: 1.925 / β | Episodes in phase: 7175
|
| 406 |
+
[08:27:20] Ep 9425 | reward +1.982 | Phase 3/3 | Rolling mean: 1.950 / β | Episodes in phase: 7200
|
| 407 |
+
[08:27:22] Ep 9450 | reward +1.982 | Phase 3/3 | Rolling mean: 1.954 / β | Episodes in phase: 7225
|
| 408 |
+
[08:27:24] Ep 9475 | reward +3.113 | Phase 3/3 | Rolling mean: 2.030 / β | Episodes in phase: 7250
|
| 409 |
+
[08:27:26] Ep 9500 | reward +1.654 | Phase 3/3 | Rolling mean: 2.060 / β | Episodes in phase: 7275
|
| 410 |
+
[08:27:40] Ep 9525 | reward +0.484 | Phase 3/3 | Rolling mean: 2.067 / β | Episodes in phase: 7300
|
| 411 |
+
[08:27:41] Ep 9550 | reward +3.141 | Phase 3/3 | Rolling mean: 2.053 / β | Episodes in phase: 7325
|
| 412 |
+
[08:27:42] Ep 9575 | reward +2.562 | Phase 3/3 | Rolling mean: 2.070 / β | Episodes in phase: 7350
|
| 413 |
+
[08:27:43] Ep 9600 | reward +2.478 | Phase 3/3 | Rolling mean: 2.065 / β | Episodes in phase: 7375
|
| 414 |
+
[08:27:58] Ep 9625 | reward +0.659 | Phase 3/3 | Rolling mean: 2.022 / β | Episodes in phase: 7400
|
| 415 |
+
[08:27:59] Ep 9650 | reward +3.233 | Phase 3/3 | Rolling mean: 2.038 / β | Episodes in phase: 7425
|
| 416 |
+
[08:28:00] Ep 9675 | reward +0.474 | Phase 3/3 | Rolling mean: 1.934 / β | Episodes in phase: 7450
|
| 417 |
+
[08:28:01] Ep 9700 | reward +0.866 | Phase 3/3 | Rolling mean: 1.898 / β | Episodes in phase: 7475
|
| 418 |
+
[08:28:20] Ep 9725 | reward +2.296 | Phase 3/3 | Rolling mean: 1.938 / β | Episodes in phase: 7500
|
| 419 |
+
[08:28:21] Ep 9750 | reward +1.855 | Phase 3/3 | Rolling mean: 1.926 / β | Episodes in phase: 7525
|
| 420 |
+
[08:28:22] Ep 9775 | reward +0.782 | Phase 3/3 | Rolling mean: 1.905 / β | Episodes in phase: 7550
|
| 421 |
+
[08:28:24] Ep 9800 | reward +2.960 | Phase 3/3 | Rolling mean: 1.954 / β | Episodes in phase: 7575
|
| 422 |
+
[08:28:37] Ep 9825 | reward +2.961 | Phase 3/3 | Rolling mean: 1.974 / β | Episodes in phase: 7600
|
| 423 |
+
[08:28:38] Ep 9850 | reward +2.306 | Phase 3/3 | Rolling mean: 1.992 / β | Episodes in phase: 7625
|
| 424 |
+
[08:28:39] Ep 9875 | reward +2.611 | Phase 3/3 | Rolling mean: 2.085 / β | Episodes in phase: 7650
|
| 425 |
+
[08:28:40] Ep 9900 | reward +0.457 | Phase 3/3 | Rolling mean: 2.099 / β | Episodes in phase: 7675
|
| 426 |
+
[08:28:53] Ep 9925 | reward +0.458 | Phase 3/3 | Rolling mean: 2.034 / β | Episodes in phase: 7700
|
| 427 |
+
[08:28:54] Ep 9950 | reward +0.803 | Phase 3/3 | Rolling mean: 2.032 / β | Episodes in phase: 7725
|
| 428 |
+
[08:28:57] Ep 9975 | reward +1.350 | Phase 3/3 | Rolling mean: 2.003 / β | Episodes in phase: 7750
|
| 429 |
+
[08:28:58] Ep 10000 | reward +3.037 | Phase 3/3 | Rolling mean: 1.991 / β | Episodes in phase: 7775
|
| 430 |
+
[08:29:12] Ep 10025 | reward +0.658 | Phase 3/3 | Rolling mean: 1.935 / β | Episodes in phase: 7800
|
| 431 |
+
[08:29:13] Ep 10050 | reward +0.985 | Phase 3/3 | Rolling mean: 1.886 / β | Episodes in phase: 7825
|
| 432 |
+
[08:29:14] Ep 10075 | reward +1.687 | Phase 3/3 | Rolling mean: 1.832 / β | Episodes in phase: 7850
|
| 433 |
+
[08:29:15] Ep 10100 | reward +1.688 | Phase 3/3 | Rolling mean: 1.776 / β | Episodes in phase: 7875
|
| 434 |
+
[08:29:29] Ep 10125 | reward +0.801 | Phase 3/3 | Rolling mean: 1.762 / β | Episodes in phase: 7900
|
| 435 |
+
[08:29:31] Ep 10150 | reward +0.985 | Phase 3/3 | Rolling mean: 1.787 / β | Episodes in phase: 7925
|
| 436 |
+
[08:29:32] Ep 10175 | reward +0.660 | Phase 3/3 | Rolling mean: 1.784 / β | Episodes in phase: 7950
|
| 437 |
+
[08:29:33] Ep 10200 | reward +0.632 | Phase 3/3 | Rolling mean: 1.787 / β | Episodes in phase: 7975
|
| 438 |
+
[08:29:47] Ep 10225 | reward +1.526 | Phase 3/3 | Rolling mean: 1.852 / β | Episodes in phase: 8000
|
| 439 |
+
[08:29:50] Ep 10250 | reward +1.526 | Phase 3/3 | Rolling mean: 1.900 / β | Episodes in phase: 8025
|
| 440 |
+
[08:29:51] Ep 10275 | reward +3.283 | Phase 3/3 | Rolling mean: 1.906 / β | Episodes in phase: 8050
|
| 441 |
+
[08:29:52] Ep 10300 | reward +2.186 | Phase 3/3 | Rolling mean: 1.991 / β | Episodes in phase: 8075
|
| 442 |
+
[08:30:05] Ep 10325 | reward +1.118 | Phase 3/3 | Rolling mean: 2.017 / β | Episodes in phase: 8100
|
| 443 |
+
[08:30:06] Ep 10350 | reward +3.127 | Phase 3/3 | Rolling mean: 2.023 / β | Episodes in phase: 8125
|
| 444 |
+
[08:30:07] Ep 10375 | reward +2.992 | Phase 3/3 | Rolling mean: 2.033 / β | Episodes in phase: 8150
|
| 445 |
+
[08:30:09] Ep 10400 | reward +3.099 | Phase 3/3 | Rolling mean: 2.033 / β | Episodes in phase: 8175
|
| 446 |
+
[08:30:22] Ep 10425 | reward +1.935 | Phase 3/3 | Rolling mean: 2.025 / β | Episodes in phase: 8200
|
| 447 |
+
[08:30:23] Ep 10450 | reward +0.764 | Phase 3/3 | Rolling mean: 2.032 / β | Episodes in phase: 8225
|
| 448 |
+
[08:30:24] Ep 10475 | reward +1.935 | Phase 3/3 | Rolling mean: 2.103 / β | Episodes in phase: 8250
|
| 449 |
+
[08:30:27] Ep 10500 | reward +2.951 | Phase 3/3 | Rolling mean: 2.061 / β | Episodes in phase: 8275
|
| 450 |
+
[08:30:40] Ep 10525 | reward +1.709 | Phase 3/3 | Rolling mean: 2.034 / β | Episodes in phase: 8300
|
| 451 |
+
[08:30:41] Ep 10550 | reward +2.188 | Phase 3/3 | Rolling mean: 1.996 / β | Episodes in phase: 8325
|
| 452 |
+
[08:30:42] Ep 10575 | reward +1.201 | Phase 3/3 | Rolling mean: 2.052 / β | Episodes in phase: 8350
|
| 453 |
+
[08:30:44] Ep 10600 | reward +0.480 | Phase 3/3 | Rolling mean: 2.033 / β | Episodes in phase: 8375
|
| 454 |
+
[08:30:58] Ep 10625 | reward +2.980 | Phase 3/3 | Rolling mean: 2.070 / β | Episodes in phase: 8400
|
| 455 |
+
[08:30:59] Ep 10650 | reward +1.055 | Phase 3/3 | Rolling mean: 2.034 / β | Episodes in phase: 8425
|
| 456 |
+
[08:31:00] Ep 10675 | reward +1.155 | Phase 3/3 | Rolling mean: 1.964 / β | Episodes in phase: 8450
|
| 457 |
+
[08:31:01] Ep 10700 | reward +0.480 | Phase 3/3 | Rolling mean: 2.021 / β | Episodes in phase: 8475
|
| 458 |
+
[08:31:15] Ep 10725 | reward +2.531 | Phase 3/3 | Rolling mean: 2.078 / β | Episodes in phase: 8500
|
| 459 |
+
[08:31:18] Ep 10750 | reward +2.950 | Phase 3/3 | Rolling mean: 2.079 / β | Episodes in phase: 8525
|
| 460 |
+
[08:31:19] Ep 10775 | reward +1.978 | Phase 3/3 | Rolling mean: 1.999 / β | Episodes in phase: 8550
|
| 461 |
+
[08:31:20] Ep 10800 | reward +2.013 | Phase 3/3 | Rolling mean: 1.963 / β | Episodes in phase: 8575
|
| 462 |
+
[08:31:33] Ep 10825 | reward +1.748 | Phase 3/3 | Rolling mean: 1.908 / β | Episodes in phase: 8600
|
| 463 |
+
[08:31:34] Ep 10850 | reward +2.468 | Phase 3/3 | Rolling mean: 1.910 / β | Episodes in phase: 8625
|
| 464 |
+
[08:31:35] Ep 10875 | reward +2.013 | Phase 3/3 | Rolling mean: 1.912 / β | Episodes in phase: 8650
|
| 465 |
+
[08:31:36] Ep 10900 | reward +2.468 | Phase 3/3 | Rolling mean: 1.867 / β | Episodes in phase: 8675
|
| 466 |
+
[08:31:50] Ep 10925 | reward +1.010 | Phase 3/3 | Rolling mean: 1.803 / β | Episodes in phase: 8700
|
| 467 |
+
[08:31:50] Periodic save at step 25,000 ...
|
vec_normalize_latest.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 166596
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a19434ad2950b7e456a15d2a529b3534fa1a4ef989b9d6bb52ad9da04c5b2a9
|
| 3 |
size 166596
|