Instructions to use garvitsachdeva/spindleflow-rl with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- stable-baselines3
How to use garvitsachdeva/spindleflow-rl with stable-baselines3:
from huggingface_sb3 import load_from_hub checkpoint = load_from_hub( repo_id="garvitsachdeva/spindleflow-rl", filename="{MODEL FILENAME}.zip", ) - Notebooks
- Google Colab
- Kaggle
Checkpoint at step 15,000
Browse files- reward_curve.json +1 -1
- reward_curve.png +0 -0
- spindleflow_model_latest.zip +1 -1
- training_log.txt +98 -0
- vec_normalize_latest.pkl +1 -1
reward_curve.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"episodes": [0,
|
|
|
|
| 1 |
+
{"episodes": [0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360, 390, 420, 450, 480, 510, 540, 570, 600, 630, 660, 690, 720, 750, 780, 810, 840, 870, 900, 930, 960, 990, 1020, 1050, 1080, 1110, 1140, 1170, 1200, 1230, 1260, 1290, 1320, 1350, 1380, 1410, 1440, 1470, 1500, 1530, 1560, 1590, 1620, 1650, 1680, 1710, 1740, 1770, 1800, 1830, 1860, 1890, 1920, 1950, 1980, 2010, 2040, 2070, 2100, 2130, 2160, 2190, 2220, 2250, 2280, 2310, 2340, 2370, 2400, 2430, 2460, 2490, 2520, 2550, 2580, 2610, 2640, 2670, 2700, 2730, 2760, 2790, 2820, 2850, 2880, 2910, 2940, 2970, 3000, 3030, 3060, 3090, 3120, 3150, 3180, 3210, 3240, 3270, 3300, 3330, 3360, 3390, 3420, 3450, 3480, 3510, 3540, 3570, 3600, 3630, 3660, 3690, 3720, 3750, 3780, 3810, 3840, 3870, 3900, 3930, 3960, 3990, 4020, 4050, 4080, 4110, 4140, 4170, 4200, 4230, 4260, 4290, 4320, 4350, 4380, 4410, 4440, 4470, 4500, 4530, 4560, 4590, 4620, 4650, 4680, 4710, 4740, 4770, 4800, 4830, 4860, 4890, 4920, 4950, 4980, 5010, 5040, 5070, 5100, 5130, 5160, 5190, 5220, 5250, 5280, 5310, 5340, 5370, 5400, 5430, 5460, 5490, 5520, 5550, 5580, 5610, 5640, 5670, 5700, 5730, 5760, 5790, 5820, 5850, 5880, 5910, 5940, 5970, 6000], "mean_rewards": [7.869385480880737, -0.03784379158769884, -0.38398340008542187, -0.5210831629951577, -0.5376839611586953, -0.5634360476859556, -0.5763718811332027, -0.5373458652470208, -0.5397185524229996, -0.4764093054678442, -0.4615140618583144, -0.5137614059286648, -0.4864953896740069, -0.45432056753359584, -0.4838050582557177, -0.4608095791283378, -0.45296298416134045, -0.4769556150426286, -0.4439206124924814, -0.46383931326204975, -0.5280678830547443, -0.5144892747803832, -0.5157356494235874, -0.4909607601900961, -0.42363169100999043, -0.46855141274285633, -0.38041776629621227, -0.3448548663086844, -0.35473247120983353, -0.3207630833906942, -0.23483040785133247, -0.20471441963477838, -0.158047115078677, -0.13889030838964989, -0.07396525167397514, 0.01805690512529864, -0.0565502941423398, 0.002252017573410312, 0.04511761825704417, 0.04892878985612203, 0.017880844519726487, 0.07235489055415653, 0.136860854966899, 0.18752923034538596, 0.18795315243784858, 0.21790850257180286, 0.2798241166527451, 0.29991839786942137, 0.34795478758840076, 0.35555866552479815, 0.470945018639677, 0.46686866794348947, 0.42320820814405646, 0.4334799908564136, 0.47369504578423915, 0.5062765226389485, 0.5452343198395218, 0.5701588542495133, 0.5511569689877566, 0.5913673671022588, 0.6101769053628545, 0.6990016415824695, 0.7033187948030093, 0.6567048065584856, 0.6408570082110371, 0.6604031588664796, 0.6721762023058208, 0.6225126298323324, 0.6809567648467194, 0.7293495094705842, 0.6778633743740421, 0.6709112041037751, 0.7319613369706006, 0.8407363991117388, 0.8891737701153805, 0.8738852649425918, 0.9014333227411008, 1.0337030563943628, 1.0823944018347962, 1.033293042744158, 1.0458343673274535, 1.1098001648381175, 1.095692048926091, 1.07855425682897, 1.161847744206821, 1.1869417882993947, 1.208030396056306, 1.2077794038699878, 1.2062692112113589, 1.25349467685477, 1.235462390262609, 1.2114898104356318, 1.2211218770124659, 1.2378501558478996, 1.2069873789522723, 1.2299577432024658, 1.2741849172848048, 1.2766783515435478, 1.2896925689134475, 1.3053895339943369, 1.371006360144928, 1.4421150034805452, 1.3858411352686733, 1.3896705019873696, 1.3836635586910493, 1.4107440632724801, 1.3759163208120804, 1.378403567067588, 1.369893674923736, 1.3730929282629578, 1.3362735824085872, 1.353932322443938, 1.4317909455518947, 1.474903305566913, 1.4992357752538774, 1.5263299180614138, 1.495371489162674, 1.4704664318717473, 1.524534152993402, 1.538413893641127, 1.5681221992122811, 1.5714308077605157, 1.5370448222584308, 1.5366823457918495, 1.5262220435489018, 1.4881265710542806, 1.5132084826103085, 1.5588943709462066, 1.5223572991698955, 1.5288328876123696, 1.5251142936685456, 1.5322279708898652, 1.4718089767778755, 1.5043148173066319, 1.532659297278968, 1.5412766329642686, 1.554760075595603, 1.5617171381958699, 1.5802682615536734, 1.5707963926178736, 1.5742931580278923, 1.618845480722406, 1.681138369700384, 1.6638504557052491, 1.6770966182345686, 1.63650145752522, 1.624646826235528, 1.6389080214187726, 1.620934071169726, 1.6273822806358431, 1.613626319843429, 1.5480119745938379, 1.5782707857344804, 1.5856853888808533, 1.6018862150357147, 1.6425269661083213, 1.6230434969893033, 1.606394495180898, 1.6375761047233424, 1.6313965761703895, 1.6889079141102705, 1.7454538686861691, 1.7183754010737369, 1.744705453254853, 1.7503461068891613, 1.7638859317706597, 1.8400702719381603, 1.8697715372607322, 1.8458195232380712, 1.922130432215038, 1.8721037799579596, 1.8412629770024815, 1.8464906375091183, 1.8437538819941877, 1.807503633808255, 1.7754378892041494, 1.732152500254409, 1.7182342989005177, 1.7024477059964094, 1.6869846862283635, 1.6826654165518309, 1.6873143049668589, 1.718196635267509, 1.6883231143891928, 1.6928668349194156, 1.7060435906028304, 1.697957367866974, 1.726183009851417, 1.7559919002404611, 1.7728966967794821, 1.768641453597152, 1.7635819557344923, 1.7206717061452468, 1.7514717591439208, 1.7769463395064586, 1.8193774796029343, 1.8263204891869909, 1.7906362776835727, 1.796637931567253, 1.802112196942187, 1.7855615070187418], "raw_rewards": [7.869385480880737, 0.5789080858230591, -0.22971093654632568, -0.23191767930984497, -1.4611610174179077, -1.462523341178894, -0.16410590708255768, 0.9507952705025673, 0.7632522517815232, 0.12662386894226074, 0.6558951139450073, -1.9698248095810413, -1.4647120237350464, -1.4707489013671875, -1.7659942060709, -1.7310243248939514, -0.9493927657604218, -0.7137753665447235, -1.3451157063245773, -0.3434333801269531, -0.1224970817565918, -0.2025727927684784, 0.7919805943965912, -1.5900378823280334, -0.02213919162750244, 0.23868058621883392, 0.609815925359726, 1.617654800415039, -1.3175745010375977, -0.23402023315429688, -0.7919640690088272, 0.3521580994129181, 0.26505059003829956, 0.3243334889411926, 1.7180908620357513, -0.08261801302433014, -0.5888408124446869, 0.6898269057273865, 1.597887396812439, -3.4195436611771584, -1.036895513534546, 1.735375165939331, 1.1141240745782852, 1.0031541883945465, 3.58181095123291, 1.2519995421171188, -0.905962198972702, 2.344604343175888, -0.16510164737701416, -1.6151320934295654, 1.225896954536438, 2.166758894920349, -0.4428107738494873, -1.1809425950050354, 3.0023685693740845, -1.1184721887111664, 0.20370317995548248, -1.6265513896942139, 0.06449981778860092, 0.9455031752586365, -1.4633913040161133, 2.364822745323181, 0.4447557330131531, 0.6823182106018066, -1.8564150631427765, 2.234480082988739, -0.5784454345703125, 1.0068292915821075, 3.9354124665260315, 2.1249231696128845, -1.3315133452415466, 3.106228768825531, -1.1206167042255402, -1.624510645866394, 1.3102963268756866, -1.6248805522918701, -0.7630165815353394, 2.588159680366516, 0.18629544973373413, 1.871050775051117, 1.9939693808555603, 3.212405025959015, 1.0923259556293488, 1.4676193594932556, 1.850459337234497, -1.854575514793396, -0.6370886564254761, -0.3667806386947632, 0.5894009470939636, -1.4472685158252716, 1.1129309833049774, 2.8762494027614594, -0.3795955777168274, 3.6179640889167786, 1.412075400352478, 1.5585983395576477, 1.5607191920280457, 1.1473413854837418, 0.051368117332458496, -1.5951862335205078, 1.1134981513023376, 3.602319121360779, 2.647982209920883, 2.231625735759735, 0.09437358379364014, 2.98825940489769, 2.06882107257843, 3.894202470779419, 1.2249691784381866, 0.6109913885593414, 1.2600931525230408, 3.888312876224518, 2.653698205947876, 3.7928194403648376, 3.78766006231308, 2.497298628091812, 3.244860827922821, 1.5325765013694763, 2.1558673977851868, 1.0946676433086395, -0.5991351902484894, 0.9993070363998413, 1.0076514780521393, -0.9858678132295609, 3.0596781373023987, 0.9859656542539597, -0.6234785914421082, 2.0233528912067413, 3.1986913979053497, 1.2487848103046417, -1.1012918949127197, 1.9054069519042969, 0.5774856805801392, 2.873240500688553, 2.0751985609531403, 0.43692925572395325, 0.44107480347156525, 3.7445985674858093, 3.2738695442676544, 1.6449948251247406, 2.79997581243515, -0.5944533348083496, 0.9953901767730713, 1.9119168519973755, 1.992238163948059, 1.6357325911521912, 1.5611605048179626, 0.9630968570709229, -0.2885543704032898, 2.3379001021385193, 1.0430959463119507, 3.019285798072815, 2.56219819188118, -0.0103532075881958, 3.545325219631195, 1.6280039548873901, 2.413131445646286, 2.3907550573349, 0.48759880661964417, 3.15597265958786, 1.919751524925232, 0.7646693168208003, 1.5510457158088684, 1.2842804789543152, 2.391243815422058, 2.117287963628769, 1.5539557337760925, 0.17066586017608643, 1.6957352757453918, 2.897974669933319, 1.5834221541881561, 1.3663267493247986, 1.5662124156951904, 1.8789651691913605, -0.6412868201732635, 2.6010509729385376, -0.36556345224380493, 1.1735109984874725, 2.2970699667930603, 3.421109616756439, 2.345623791217804, 3.973730683326721, -0.2049075961112976, 0.8851002007722855, 3.5538004636764526, 1.331779658794403, 3.5597912073135376, 2.337675094604492, 2.5098856687545776, 1.7456613183021545, 2.1533316373825073, 2.513371169567108, 1.563796043395996, 0.9926588535308838, 2.324680268764496, 1.5514620542526245, 0.8437174558639526, 0.4093678444623947, 1.5524056553840637, 0.4966275990009308, -1.2454921007156372], "step": 15000}
|
reward_curve.png
CHANGED
|
|
spindleflow_model_latest.zip
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 143819552
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74272b5c8f16c62fe53d9f29d8b79657c5dfeb342105db48c5b948fbef00681b
|
| 3 |
size 143819552
|
training_log.txt
CHANGED
|
@@ -167,3 +167,101 @@
|
|
| 167 |
[08:07:27] Ep 3600 | reward -0.091 | Phase 3/3 | Rolling mean: 1.554 / β | Episodes in phase: 1375
|
| 168 |
[08:07:43] Ep 3625 | reward +0.999 | Phase 3/3 | Rolling mean: 1.488 / β | Episodes in phase: 1400
|
| 169 |
[08:07:44] Periodic save at step 10,000 ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
[08:07:27] Ep 3600 | reward -0.091 | Phase 3/3 | Rolling mean: 1.554 / β | Episodes in phase: 1375
|
| 168 |
[08:07:43] Ep 3625 | reward +0.999 | Phase 3/3 | Rolling mean: 1.488 / β | Episodes in phase: 1400
|
| 169 |
[08:07:44] Periodic save at step 10,000 ...
|
| 170 |
+
[08:07:47] Periodic push done β 5 files at step 10,000
|
| 171 |
+
[08:07:48] Ep 3650 | reward +3.507 | Phase 3/3 | Rolling mean: 1.491 / β | Episodes in phase: 1425
|
| 172 |
+
[08:07:50] Ep 3675 | reward +1.569 | Phase 3/3 | Rolling mean: 1.523 / β | Episodes in phase: 1450
|
| 173 |
+
[08:07:51] Ep 3700 | reward +1.832 | Phase 3/3 | Rolling mean: 1.531 / β | Episodes in phase: 1475
|
| 174 |
+
[08:08:05] Ep 3725 | reward +1.000 | Phase 3/3 | Rolling mean: 1.487 / β | Episodes in phase: 1500
|
| 175 |
+
[08:08:06] Ep 3750 | reward +0.162 | Phase 3/3 | Rolling mean: 1.503 / β | Episodes in phase: 1525
|
| 176 |
+
[08:08:09] Ep 3775 | reward +0.485 | Phase 3/3 | Rolling mean: 1.498 / β | Episodes in phase: 1550
|
| 177 |
+
[08:08:11] Ep 3800 | reward +2.840 | Phase 3/3 | Rolling mean: 1.526 / β | Episodes in phase: 1575
|
| 178 |
+
[08:08:25] Ep 3825 | reward +0.482 | Phase 3/3 | Rolling mean: 1.608 / β | Episodes in phase: 1600
|
| 179 |
+
[08:08:26] Ep 3850 | reward +2.716 | Phase 3/3 | Rolling mean: 1.512 / β | Episodes in phase: 1625
|
| 180 |
+
[08:08:28] Ep 3875 | reward +2.714 | Phase 3/3 | Rolling mean: 1.549 / β | Episodes in phase: 1650
|
| 181 |
+
[08:08:29] Ep 3900 | reward +1.553 | Phase 3/3 | Rolling mean: 1.577 / β | Episodes in phase: 1675
|
| 182 |
+
[08:08:45] Ep 3925 | reward -0.296 | Phase 3/3 | Rolling mean: 1.540 / β | Episodes in phase: 1700
|
| 183 |
+
[08:08:46] Ep 3950 | reward +1.561 | Phase 3/3 | Rolling mean: 1.466 / β | Episodes in phase: 1725
|
| 184 |
+
[08:08:48] Ep 3975 | reward +4.074 | Phase 3/3 | Rolling mean: 1.487 / β | Episodes in phase: 1750
|
| 185 |
+
[08:08:51] Ep 4000 | reward +1.566 | Phase 3/3 | Rolling mean: 1.503 / β | Episodes in phase: 1775
|
| 186 |
+
[08:09:06] Ep 4025 | reward +3.457 | Phase 3/3 | Rolling mean: 1.455 / β | Episodes in phase: 1800
|
| 187 |
+
[08:09:08] Ep 4050 | reward +2.916 | Phase 3/3 | Rolling mean: 1.559 / β | Episodes in phase: 1825
|
| 188 |
+
[08:09:09] Ep 4075 | reward -1.564 | Phase 3/3 | Rolling mean: 1.541 / β | Episodes in phase: 1850
|
| 189 |
+
[08:09:10] Ep 4100 | reward +3.629 | Phase 3/3 | Rolling mean: 1.564 / β | Episodes in phase: 1875
|
| 190 |
+
[08:09:31] Ep 4125 | reward -0.041 | Phase 3/3 | Rolling mean: 1.616 / β | Episodes in phase: 1900
|
| 191 |
+
[08:09:33] Ep 4150 | reward +2.066 | Phase 3/3 | Rolling mean: 1.656 / β | Episodes in phase: 1925
|
| 192 |
+
[08:09:34] Ep 4175 | reward +2.140 | Phase 3/3 | Rolling mean: 1.644 / β | Episodes in phase: 1950
|
| 193 |
+
[08:09:35] Ep 4200 | reward +3.126 | Phase 3/3 | Rolling mean: 1.636 / β | Episodes in phase: 1975
|
| 194 |
+
[08:09:51] Ep 4225 | reward +1.709 | Phase 3/3 | Rolling mean: 1.699 / β | Episodes in phase: 2000
|
| 195 |
+
[08:09:54] Ep 4250 | reward -0.055 | Phase 3/3 | Rolling mean: 1.668 / β | Episodes in phase: 2025
|
| 196 |
+
[08:09:56] Ep 4275 | reward +0.543 | Phase 3/3 | Rolling mean: 1.624 / β | Episodes in phase: 2050
|
| 197 |
+
[08:09:57] Ep 4300 | reward +2.236 | Phase 3/3 | Rolling mean: 1.643 / β | Episodes in phase: 2075
|
| 198 |
+
[08:10:12] Ep 4325 | reward +0.196 | Phase 3/3 | Rolling mean: 1.628 / β | Episodes in phase: 2100
|
| 199 |
+
[08:10:13] Ep 4350 | reward +1.938 | Phase 3/3 | Rolling mean: 1.650 / β | Episodes in phase: 2125
|
| 200 |
+
[08:10:14] Ep 4375 | reward +1.430 | Phase 3/3 | Rolling mean: 1.648 / β | Episodes in phase: 2150
|
| 201 |
+
[08:10:16] Ep 4400 | reward +1.555 | Phase 3/3 | Rolling mean: 1.652 / β | Episodes in phase: 2175
|
| 202 |
+
[08:10:32] Ep 4425 | reward +2.752 | Phase 3/3 | Rolling mean: 1.617 / β | Episodes in phase: 2200
|
| 203 |
+
[08:10:34] Ep 4450 | reward +2.292 | Phase 3/3 | Rolling mean: 1.567 / β | Episodes in phase: 2225
|
| 204 |
+
[08:10:38] Ep 4475 | reward -0.754 | Phase 3/3 | Rolling mean: 1.611 / β | Episodes in phase: 2250
|
| 205 |
+
[08:10:39] Ep 4500 | reward +2.128 | Phase 3/3 | Rolling mean: 1.563 / β | Episodes in phase: 2275
|
| 206 |
+
[08:10:55] Ep 4525 | reward +0.792 | Phase 3/3 | Rolling mean: 1.522 / β | Episodes in phase: 2300
|
| 207 |
+
[08:10:57] Ep 4550 | reward +2.848 | Phase 3/3 | Rolling mean: 1.561 / β | Episodes in phase: 2325
|
| 208 |
+
[08:10:58] Ep 4575 | reward +1.605 | Phase 3/3 | Rolling mean: 1.600 / β | Episodes in phase: 2350
|
| 209 |
+
[08:10:59] Ep 4600 | reward +3.546 | Phase 3/3 | Rolling mean: 1.579 / β | Episodes in phase: 2375
|
| 210 |
+
[08:11:16] Ep 4625 | reward +0.029 | Phase 3/3 | Rolling mean: 1.570 / β | Episodes in phase: 2400
|
| 211 |
+
[08:11:18] Ep 4650 | reward +2.394 | Phase 3/3 | Rolling mean: 1.676 / β | Episodes in phase: 2425
|
| 212 |
+
[08:11:19] Ep 4675 | reward +2.261 | Phase 3/3 | Rolling mean: 1.612 / β | Episodes in phase: 2450
|
| 213 |
+
[08:11:21] Ep 4700 | reward +0.827 | Phase 3/3 | Rolling mean: 1.650 / β | Episodes in phase: 2475
|
| 214 |
+
[08:11:39] Ep 4725 | reward +3.394 | Phase 3/3 | Rolling mean: 1.728 / β | Episodes in phase: 2500
|
| 215 |
+
[08:11:40] Ep 4750 | reward +1.881 | Phase 3/3 | Rolling mean: 1.688 / β | Episodes in phase: 2525
|
| 216 |
+
[08:11:42] Ep 4775 | reward +2.768 | Phase 3/3 | Rolling mean: 1.665 / β | Episodes in phase: 2550
|
| 217 |
+
[08:11:43] Ep 4800 | reward +2.454 | Phase 3/3 | Rolling mean: 1.714 / β | Episodes in phase: 2575
|
| 218 |
+
[08:12:01] Ep 4825 | reward +2.767 | Phase 3/3 | Rolling mean: 1.745 / β | Episodes in phase: 2600
|
| 219 |
+
[08:12:02] Ep 4850 | reward -0.418 | Phase 3/3 | Rolling mean: 1.665 / β | Episodes in phase: 2625
|
| 220 |
+
[08:12:04] Ep 4875 | reward +1.551 | Phase 3/3 | Rolling mean: 1.779 / β | Episodes in phase: 2650
|
| 221 |
+
[08:12:06] Ep 4900 | reward +3.347 | Phase 3/3 | Rolling mean: 1.793 / β | Episodes in phase: 2675
|
| 222 |
+
[08:12:22] Ep 4925 | reward +1.148 | Phase 3/3 | Rolling mean: 1.783 / β | Episodes in phase: 2700
|
| 223 |
+
[08:12:23] Ep 4950 | reward +2.564 | Phase 3/3 | Rolling mean: 1.854 / β | Episodes in phase: 2725
|
| 224 |
+
[08:12:27] Ep 4975 | reward +1.749 | Phase 3/3 | Rolling mean: 1.908 / β | Episodes in phase: 2750
|
| 225 |
+
[08:12:28] Ep 5000 | reward +3.049 | Phase 3/3 | Rolling mean: 1.911 / β | Episodes in phase: 2775
|
| 226 |
+
[08:12:44] Ep 5025 | reward +3.009 | Phase 3/3 | Rolling mean: 1.889 / β | Episodes in phase: 2800
|
| 227 |
+
[08:12:45] Ep 5050 | reward +2.506 | Phase 3/3 | Rolling mean: 1.943 / β | Episodes in phase: 2825
|
| 228 |
+
[08:12:47] Ep 5075 | reward +1.562 | Phase 3/3 | Rolling mean: 1.932 / β | Episodes in phase: 2850
|
| 229 |
+
[08:12:48] Ep 5100 | reward +1.220 | Phase 3/3 | Rolling mean: 1.894 / β | Episodes in phase: 2875
|
| 230 |
+
[08:13:06] Ep 5125 | reward +2.966 | Phase 3/3 | Rolling mean: 1.891 / β | Episodes in phase: 2900
|
| 231 |
+
[08:13:07] Ep 5150 | reward +2.490 | Phase 3/3 | Rolling mean: 1.806 / β | Episodes in phase: 2925
|
| 232 |
+
[08:13:08] Ep 5175 | reward +3.160 | Phase 3/3 | Rolling mean: 1.779 / β | Episodes in phase: 2950
|
| 233 |
+
[08:13:11] Ep 5200 | reward +2.764 | Phase 3/3 | Rolling mean: 1.741 / β | Episodes in phase: 2975
|
| 234 |
+
[08:13:28] Ep 5225 | reward -1.548 | Phase 3/3 | Rolling mean: 1.686 / β | Episodes in phase: 3000
|
| 235 |
+
[08:13:29] Ep 5250 | reward +3.422 | Phase 3/3 | Rolling mean: 1.688 / β | Episodes in phase: 3025
|
| 236 |
+
[08:13:30] Ep 5275 | reward +1.713 | Phase 3/3 | Rolling mean: 1.656 / β | Episodes in phase: 3050
|
| 237 |
+
[08:13:32] Ep 5300 | reward +1.742 | Phase 3/3 | Rolling mean: 1.694 / β | Episodes in phase: 3075
|
| 238 |
+
[08:13:46] Ep 5325 | reward +1.142 | Phase 3/3 | Rolling mean: 1.640 / β | Episodes in phase: 3100
|
| 239 |
+
[08:13:48] Ep 5350 | reward +0.820 | Phase 3/3 | Rolling mean: 1.695 / β | Episodes in phase: 3125
|
| 240 |
+
[08:13:49] Ep 5375 | reward +1.741 | Phase 3/3 | Rolling mean: 1.693 / β | Episodes in phase: 3150
|
| 241 |
+
[08:13:50] Ep 5400 | reward +0.858 | Phase 3/3 | Rolling mean: 1.678 / β | Episodes in phase: 3175
|
| 242 |
+
[08:14:06] Ep 5425 | reward +0.858 | Phase 3/3 | Rolling mean: 1.710 / β | Episodes in phase: 3200
|
| 243 |
+
[08:14:09] Ep 5450 | reward +0.983 | Phase 3/3 | Rolling mean: 1.734 / β | Episodes in phase: 3225
|
| 244 |
+
[08:14:11] Ep 5475 | reward +0.324 | Phase 3/3 | Rolling mean: 1.743 / β | Episodes in phase: 3250
|
| 245 |
+
[08:14:12] Ep 5500 | reward +0.666 | Phase 3/3 | Rolling mean: 1.652 / β | Episodes in phase: 3275
|
| 246 |
+
[08:14:28] Ep 5525 | reward +0.981 | Phase 3/3 | Rolling mean: 1.719 / β | Episodes in phase: 3300
|
| 247 |
+
[08:14:29] Ep 5550 | reward +2.169 | Phase 3/3 | Rolling mean: 1.706 / β | Episodes in phase: 3325
|
| 248 |
+
[08:14:30] Ep 5575 | reward +0.982 | Phase 3/3 | Rolling mean: 1.679 / β | Episodes in phase: 3350
|
| 249 |
+
[08:14:32] Ep 5600 | reward +1.002 | Phase 3/3 | Rolling mean: 1.733 / β | Episodes in phase: 3375
|
| 250 |
+
[08:14:47] Ep 5625 | reward +3.085 | Phase 3/3 | Rolling mean: 1.797 / β | Episodes in phase: 3400
|
| 251 |
+
[08:14:48] Ep 5650 | reward +0.114 | Phase 3/3 | Rolling mean: 1.749 / β | Episodes in phase: 3425
|
| 252 |
+
[08:14:49] Ep 5675 | reward +2.483 | Phase 3/3 | Rolling mean: 1.784 / β | Episodes in phase: 3450
|
| 253 |
+
[08:14:53] Ep 5700 | reward +0.842 | Phase 3/3 | Rolling mean: 1.832 / β | Episodes in phase: 3475
|
| 254 |
+
[08:15:09] Ep 5725 | reward -1.542 | Phase 3/3 | Rolling mean: 1.815 / β | Episodes in phase: 3500
|
| 255 |
+
[08:15:11] Ep 5750 | reward +1.748 | Phase 3/3 | Rolling mean: 1.753 / β | Episodes in phase: 3525
|
| 256 |
+
[08:15:12] Ep 5775 | reward +1.872 | Phase 3/3 | Rolling mean: 1.796 / β | Episodes in phase: 3550
|
| 257 |
+
[08:15:13] Ep 5800 | reward +2.027 | Phase 3/3 | Rolling mean: 1.785 / β | Episodes in phase: 3575
|
| 258 |
+
[08:15:28] Ep 5825 | reward +2.915 | Phase 3/3 | Rolling mean: 1.741 / β | Episodes in phase: 3600
|
| 259 |
+
[08:15:30] Ep 5850 | reward +3.336 | Phase 3/3 | Rolling mean: 1.799 / β | Episodes in phase: 3625
|
| 260 |
+
[08:15:31] Ep 5875 | reward +0.619 | Phase 3/3 | Rolling mean: 1.748 / β | Episodes in phase: 3650
|
| 261 |
+
[08:15:32] Ep 5900 | reward +0.004 | Phase 3/3 | Rolling mean: 1.753 / β | Episodes in phase: 3675
|
| 262 |
+
[08:15:47] Ep 5925 | reward +2.854 | Phase 3/3 | Rolling mean: 1.796 / β | Episodes in phase: 3700
|
| 263 |
+
[08:15:49] Ep 5950 | reward +2.325 | Phase 3/3 | Rolling mean: 1.873 / β | Episodes in phase: 3725
|
| 264 |
+
[08:15:52] Ep 5975 | reward +2.218 | Phase 3/3 | Rolling mean: 1.895 / β | Episodes in phase: 3750
|
| 265 |
+
[08:15:53] Ep 6000 | reward +3.170 | Phase 3/3 | Rolling mean: 1.835 / β | Episodes in phase: 3775
|
| 266 |
+
[08:16:08] Ep 6025 | reward +0.068 | Phase 3/3 | Rolling mean: 1.834 / β | Episodes in phase: 3800
|
| 267 |
+
[08:16:08] Periodic save at step 15,000 ...
|
vec_normalize_latest.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 166596
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0641d426a429ea3f6c247160b1575b38235caa910f82f5fc207532589b97759f
|
| 3 |
size 166596
|