Instructions to use garvitsachdeva/spindleflow-rl with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- stable-baselines3
How to use garvitsachdeva/spindleflow-rl with stable-baselines3:
from huggingface_sb3 import load_from_hub checkpoint = load_from_hub( repo_id="garvitsachdeva/spindleflow-rl", filename="{MODEL FILENAME}.zip", ) - Notebooks
- Google Colab
- Kaggle
Checkpoint at step 5,000
Browse files- reward_curve.json +1 -0
- reward_curve.png +0 -0
- spindleflow_model_latest.zip +3 -0
- training_log.txt +81 -0
- vec_normalize_latest.pkl +3 -0
reward_curve.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"episodes": [0, 7, 14, 21, 28, 35, 42, 49, 56, 63, 70, 77, 84, 91, 98, 105, 112, 119, 126, 133, 140, 147, 154, 161, 168, 175, 182, 189, 196, 203, 210, 217, 224, 231, 238, 245, 252, 259, 266, 273, 280, 287, 294, 301, 308, 315, 322, 329, 336, 343, 350, 357, 364, 371, 378, 385, 392, 399, 406, 413, 420, 427, 434, 441, 448, 455, 462, 469, 476, 483, 490, 497, 504, 511, 518, 525, 532, 539, 546, 553, 560, 567, 574, 581, 588, 595, 602, 609, 616, 623, 630, 637, 644, 651, 658, 665, 672, 679, 686, 693, 700, 707, 714, 721, 728, 735, 742, 749, 756, 763, 770, 777, 784, 791, 798, 805, 812, 819, 826, 833, 840, 847, 854, 861, 868, 875, 882, 889, 896, 903, 910, 917, 924, 931, 938, 945, 952, 959, 966, 973, 980, 987, 994, 1001, 1008, 1015, 1022, 1029, 1036, 1043, 1050, 1057, 1064, 1071, 1078, 1085, 1092, 1099, 1106, 1113, 1120, 1127, 1134, 1141, 1148, 1155, 1162, 1169, 1176, 1183, 1190, 1197, 1204, 1211, 1218, 1225, 1232, 1239, 1246, 1253, 1260, 1267, 1274, 1281, 1288, 1295, 1302, 1309, 1316, 1323, 1330, 1337, 1344, 1351, 1358, 1365, 1372, 1379, 1386, 1393, 1400, 1407, 1414, 1421, 1428, 1435, 1442, 1449, 1456, 1463, 1470, 1477, 1484, 1491, 1498, 1505], "mean_rewards": [7.869385480880737, 0.39447228563949466, 0.46736883049209915, 0.3199193167754195, -0.010892340720727527, -0.12946453556004497, -0.2949352840524773, -0.373719582632184, -0.38903877655403657, -0.394298461091239, -0.4755417896093617, -0.6254103597939799, -0.631615869112705, -0.7221846164841401, -0.7328726649284363, -0.7261277403878538, -0.7111742453355538, -0.6811343130508536, -0.6172156007470269, -0.5741234169782776, -0.5649614407386827, -0.6304719744887399, -0.6643430014925176, -0.7462935703316409, -0.7303286298472238, -0.6525270720992825, -0.6220860190346444, -0.5855647556140626, -0.5600520932409716, -0.6385312578640878, -0.6217203504209848, -0.6404820328186217, -0.5646297308665357, -0.46785074545952837, -0.38970960279632555, -0.3499533957924302, -0.30017317354826156, -0.2824550400316519, -0.3004617769344661, -0.3535477963006614, -0.33393376740921094, -0.3313333828271808, -0.2409426725690106, -0.16390284169220218, -0.21640204097830543, -0.2319642876789562, -0.31141620639123413, -0.353557417757417, -0.3985643586339919, -0.4311660311527942, -0.3594949227806769, -0.34175623384745496, -0.4613509353642401, -0.47967430919800935, -0.5492527224123478, -0.49916473776102066, -0.4918099214371882, -0.47077294831213196, -0.4269245275737424, -0.4952932044275497, -0.6369963715245065, -0.6453018104657531, -0.6300839686178064, -0.49564921125573547, -0.5465395265798035, -0.5404178698694235, -0.6208385011965507, -0.6157085209790814, -0.6108908611594847, -0.7157817421186912, -0.611315862471728, -0.5023467261834365, -0.5686648415499612, -0.5695220507485302, -0.6284753986095127, -0.5987623235897014, -0.5179943298234752, -0.47517906001916055, -0.4324410670485936, -0.4277130573693859, -0.33923015503310844, -0.31555696287633556, -0.2900881542285022, -0.34110071137547493, -0.3634238948947505, -0.46017439952610356, -0.46005683769717026, -0.5069957694626952, -0.5368034732282946, -0.5363630219975388, -0.5321896889394051, -0.513642537456594, -0.4158981905171746, -0.48374814049978004, -0.47483629596076515, -0.44893436712261875, -0.34121398338557857, -0.37074862562708166, -0.35986225605991323, -0.2888309538462444, -0.24531601732106587, -0.139288459659407, -0.26363536735114296, -0.45459090594790486, -0.4021470984934192, -0.3751047815343267, -0.42528639194604595, -0.512994073527424, -0.33741763302762257, -0.3813054520440729, -0.3467753083494149, -0.38562066176612125, -0.4449031753465533, -0.4120248835043688, -0.32635698609642294, -0.23468282377641453, -0.14027568080315464, -0.10482709658773322, -0.04141475936692012, -0.21747780304500147, -0.19404257870720407, -0.18385122959060887, -0.190495409046937, -0.1766425924816806, -0.11231868486165215, -0.07688000227177613, -0.11049901294571005, -0.11022839932947566, -0.05706106148366081, 0.00837513066730217, 0.00798436445429137, 0.036163482373874435, 0.03783564666580213, -0.00424236260158451, 0.027116403375801287, 0.043803773428264414, 0.03858699110385619, 0.033069006020301266, 0.01624414589452116, -0.1366923186731966, -0.041730524187809544, 0.00771083506314378, 0.034296022335949694, 0.09421771705934875, 0.15127575436705037, 0.09569966989128213, 0.10003364811602392, 0.029222117050697927, 0.07921872876192394, 0.15258988159659662, 0.21950287509121394, 0.14954511282083235, 0.11745882475454557, 0.011103616831334014, -0.07632065397736273, -0.15541876722617368, -0.016381304000357266, 0.026465766058352432, 0.033593990717475354, -0.05189018341173467, -0.035155581481950846, -0.06485033640637994, 0.07285434382624532, 0.11660055379922453, 0.20391239691525698, 0.25393885869140687, 0.2059961920359025, 0.1650057145100283, -0.10431696060358693, -0.007403379869892409, 0.06139961681573799, -0.008295599640788217, 0.024523470379216105, 0.00638242489903381, 0.03186144836639103, 0.0253872059800319, 0.10695318372822121, 0.21488577240195714, 0.2510290814955768, 0.4169209191918765, 0.447288881619706, 0.46890390509935587, 0.5537877085030471, 0.5642890175559411, 0.5325108316702474, 0.4965915335955, 0.5290486808057482, 0.4647692522695778, 0.34476327095916004, 0.4440733800568667, 0.4059782953271152, 0.3710075085124883, 0.39774463871052784, 0.3539025296922773, 0.3479153207797361, 0.3787729506576924, 0.3558028209709415, 0.31861691088660765, 0.3099606362612624, 0.3599474479101206, 0.3267678932139748, 0.3568433095750056, 0.4187891443112963, 0.4246233755624608, 0.3433752029545997, 0.31683328071315037, 0.39499579752354247, 0.4693222563517721, 0.5369696448508062, 0.5532687793632871, 0.4783048149271819, 0.5151937603827959, 0.5181547475758156, 0.5387902116574543, 0.5961638863909205, 0.6189852633734086], "raw_rewards": [7.869385480880737, -2.0270893275737762, -1.5078903436660767, -1.6289145946502686, -1.4581109285354614, -1.4582953453063965, 0.30212289094924927, 0.20895814895629883, 0.420966237783432, -1.42896568775177, -1.7208606004714966, -1.4811574220657349, -0.1653035283088684, 0.04737560451030731, 1.0387153327465057, -1.4575903415679932, -1.4570224285125732, 1.0627342760562897, -0.8742947578430176, 1.6949703097343445, -1.0036369562149048, -1.4761041402816772, -1.4624449014663696, -1.4472192525863647, -1.4332927465438843, 0.7904170751571655, 0.653762698173523, 1.2795192301273346, -0.3608699440956116, 0.35677075386047363, 0.9507952705025673, -3.4470927715301514, -1.4728577136993408, -1.483479380607605, 0.21373587846755981, 1.4765518307685852, -1.6128928735852242, -0.1485140323638916, 2.6798701882362366, -1.4776854515075684, -1.4701982736587524, -1.46907639503479, -1.4756847620010376, 2.111427068710327, -1.472286581993103, -1.4616392850875854, 1.861622393131256, 0.5909617990255356, 0.12714916467666626, -0.45931126549839973, 0.5657570883631706, -2.696280002593994, -1.1088446974754333, -0.629875659942627, -1.4666680097579956, -1.4689733982086182, -0.5987190008163452, -0.6024218797683716, 0.8062635287642479, -1.4665796756744385, -1.7659942060709, 0.5284237861633301, 0.3446391820907593, -1.4728001356124878, 0.9817864894866943, -1.473576545715332, 0.0034030526876449585, -1.4753345251083374, 0.03397753834724426, -0.8355124965310097, 2.2451577186584473, -1.2429626286029816, -1.0772457867860794, 0.44872643053531647, -1.1378175616264343, -0.1028159111738205, 0.3221398591995239, 1.5046327412128448, -0.939087450504303, -0.6979364454746246, 0.9164094515144825, -0.20006322860717773, -1.2905930280685425, -1.5479609966278076, -0.09268271923065186, -2.0991574972867966, 0.033834993839263916, 0.0028892159461975098, 0.6096921563148499, 0.23623988032341003, -0.2025727927684784, 2.117010712623596, 0.809642419219017, -1.8340483605861664, -1.55027636885643, 2.208156943321228, 1.2409679293632507, 0.3747459650039673, 0.5356261730194092, -1.5522199869155884, 1.7077340483665466, -1.8154371231794357, -1.549471378326416, -1.5465556383132935, -1.599133551120758, -2.312764525413513, -1.5398250818252563, 3.3785969018936157, -2.000951513648033, -2.503466285765171, -1.2611201405525208, -0.4323340952396393, -1.5388507843017578, -0.8869258761405945, 1.5722772777080536, -1.6581648588180542, -0.0460590124130249, 0.687589555978775, -0.3689194992184639, -2.1063080430030823, -1.3175745010375977, -1.5440726280212402, 1.2180553078651428, -1.6553943455219269, -0.4625515341758728, 0.7008185386657715, 2.127745598554611, 1.7467806935310364, -0.035655677318573, 0.8889943957328796, 0.4433015286922455, 0.16804933547973633, 3.3618789315223694, 1.5720088481903076, -0.3283916711807251, 1.053344488143921, 2.339919328689575, -0.939920961856842, -1.929141715168953, -1.4939937591552734, 0.46812979876995087, 1.9332732260227203, -0.05799245834350586, 1.641272246837616, 0.09446060657501221, -3.6827295124530792, -1.9294183105230331, -1.9493356943130493, -0.6192144155502319, 1.261986494064331, -0.08261801302433014, -2.1036276817321777, -0.6942150890827179, -1.5749655961990356, -0.6196750402450562, 0.3630285579711199, 0.2618691325187683, -0.291328564286232, -0.33801981806755066, -3.120486468076706, 1.8896211087703705, -2.12729275226593, -0.5134999752044678, 1.873863399028778, -1.3715928196907043, -0.5881721675395966, -1.2663371562957764, -2.325909271836281, -2.073127508163452, -1.5085158348083496, -0.21068474650382996, 1.8182676434516907, -1.391431838274002, 2.4527386128902435, 0.27165186405181885, 0.6319058239459991, 2.7988539934158325, 1.4977125525474548, -0.4476410448551178, 2.2035112977027893, 1.1141240745782852, 1.6052564978599548, 0.8674910366535187, -0.9870564341545105, 0.2687806189060211, -0.06043100357055664, 0.22413843870162964, -1.415934532880783, 0.5874640643596649, -0.2128455936908722, -0.5651094913482666, -0.07148459553718567, 0.6378590166568756, 3.1044903993606567, -1.9472792744636536, 1.4779390394687653, 2.53517609834671, -1.3368602693080902, 0.7410027906298637, -1.6118918657302856, 1.1316379755735397, 1.6181618869304657, 1.731214463710785, -0.7870284914970398, -1.7340275347232819, -0.04152199625968933, -1.6137357950210571, -0.5440936982631683, 2.3059643507003784, 2.786042958498001, -1.6151320934295654, 0.21432161331176758, 1.6381785869598389, 2.781656265258789, 1.1450567245483398, 0.05866968631744385], "step": 5000}
|
reward_curve.png
ADDED
|
spindleflow_model_latest.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60ea120815a053b10ff44d8005a7b91a831f86917864c69263254b4a5f5fa2c5
|
| 3 |
+
size 143819538
|
training_log.txt
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[07:52:57] OpenAI key found β finetuner + spawn self-learning enabled.
|
| 2 |
+
[07:52:58] Model will be pushed to: https://huggingface.co/garvitsachdeva/spindleflow-rl
|
| 3 |
+
[07:52:58] Working directory: /home/user/app
|
| 4 |
+
[07:52:58] Patching SentenceTransformer to CUDA...
|
| 5 |
+
[07:53:04] SentenceTransformer β cuda β
|
| 6 |
+
[07:53:04] Loading environment...
|
| 7 |
+
[07:53:04] TieredRewardScorer β Tier-1 only (LLM judge disabled for speed) β
|
| 8 |
+
[07:53:04] Generalist baseline β static simulation (0 API calls per episode) β
|
| 9 |
+
[07:53:04] Running smoke test...
|
| 10 |
+
[07:53:07] Smoke test OK β obs shape (5490,)
|
| 11 |
+
[07:53:07] Benchmarking SentenceTransformer encode speed...
|
| 12 |
+
[07:53:07] Encode speed : 8.0 ms/call [CUDA β fast]
|
| 13 |
+
[07:53:07] Benchmarking full env.step() speed...
|
| 14 |
+
[07:53:09] Step speed : 33.4 ms/step [fast β]
|
| 15 |
+
[07:53:09] Projected 100k steps: 56 min
|
| 16 |
+
[07:53:10] Training on : cuda
|
| 17 |
+
[07:53:10] Curriculum : Phase 1 β Phase 1/3 | Rolling mean: 0.000 / 0.6 | Episodes in phase: 0
|
| 18 |
+
[07:53:10] Total steps : 30,000
|
| 19 |
+
[07:53:10] Training started...
|
| 20 |
+
|
| 21 |
+
[07:53:12] Ep 25 | reward +2.018 | Phase 1/3 | Rolling mean: 0.237 / 0.6 | Episodes in phase: 25
|
| 22 |
+
[07:53:14] Ep 50 | reward +0.209 | Phase 1/3 | Rolling mean: -0.374 / 0.6 | Episodes in phase: 50
|
| 23 |
+
[07:53:16] Ep 75 | reward -1.476 | Phase 1/3 | Rolling mean: -0.471 / 0.6 | Episodes in phase: 75
|
| 24 |
+
[07:53:17] Ep 100 | reward -1.467 | Phase 1/3 | Rolling mean: -0.517 / 0.6 | Episodes in phase: 100
|
| 25 |
+
[07:53:32] Ep 125 | reward +0.543 | Phase 1/3 | Rolling mean: -0.513 / 0.6 | Episodes in phase: 125
|
| 26 |
+
[07:53:35] Ep 150 | reward -2.948 | Phase 1/3 | Rolling mean: -0.557 / 0.6 | Episodes in phase: 150
|
| 27 |
+
[07:53:36] Ep 175 | reward +1.288 | Phase 1/3 | Rolling mean: -0.583 / 0.6 | Episodes in phase: 175
|
| 28 |
+
[07:53:41] Ep 200 | reward +0.220 | Phase 1/3 | Rolling mean: -0.545 / 0.6 | Episodes in phase: 200
|
| 29 |
+
[07:53:57] Ep 225 | reward -1.473 | Phase 1/3 | Rolling mean: -0.648 / 0.6 | Episodes in phase: 225
|
| 30 |
+
[07:54:00] Ep 250 | reward -0.720 | Phase 1/3 | Rolling mean: -0.548 / 0.6 | Episodes in phase: 250
|
| 31 |
+
[07:54:02] Ep 275 | reward -3.039 | Phase 1/3 | Rolling mean: -0.512 / 0.6 | Episodes in phase: 275
|
| 32 |
+
[07:54:04] Ep 300 | reward -0.902 | Phase 1/3 | Rolling mean: -0.439 / 0.6 | Episodes in phase: 300
|
| 33 |
+
[07:54:21] Ep 325 | reward -1.453 | Phase 1/3 | Rolling mean: -0.447 / 0.6 | Episodes in phase: 325
|
| 34 |
+
[07:54:25] Ep 350 | reward +1.876 | Phase 1/3 | Rolling mean: -0.410 / 0.6 | Episodes in phase: 350
|
| 35 |
+
[07:54:27] Ep 375 | reward +0.403 | Phase 1/3 | Rolling mean: -0.379 / 0.6 | Episodes in phase: 375
|
| 36 |
+
[07:54:29] Ep 400 | reward -0.602 | Phase 1/3 | Rolling mean: -0.394 / 0.6 | Episodes in phase: 400
|
| 37 |
+
[07:54:44] Ep 425 | reward -2.030 | Phase 1/3 | Rolling mean: -0.444 / 0.6 | Episodes in phase: 425
|
| 38 |
+
[07:54:46] Ep 450 | reward +0.240 | Phase 1/3 | Rolling mean: -0.449 / 0.6 | Episodes in phase: 450
|
| 39 |
+
[07:54:49] Ep 475 | reward -1.267 | Phase 1/3 | Rolling mean: -0.487 / 0.6 | Episodes in phase: 475
|
| 40 |
+
[07:54:54] Ep 500 | reward -2.088 | Phase 1/3 | Rolling mean: -0.564 / 0.6 | Episodes in phase: 500
|
| 41 |
+
[07:55:11] Ep 525 | reward -2.098 | Phase 1/3 | Rolling mean: -0.550 / 0.6 | Episodes in phase: 525
|
| 42 |
+
[07:55:14] Ep 550 | reward +0.230 | Phase 1/3 | Rolling mean: -0.511 / 0.6 | Episodes in phase: 550
|
| 43 |
+
[07:55:17] Ep 575 | reward -1.291 | Phase 1/3 | Rolling mean: -0.472 / 0.6 | Episodes in phase: 575
|
| 44 |
+
[07:55:19] Ep 600 | reward -2.534 | Phase 1/3 | Rolling mean: -0.551 / 0.6 | Episodes in phase: 600
|
| 45 |
+
[07:55:36] Ep 625 | reward +0.415 | Phase 1/3 | Rolling mean: -0.478 / 0.6 | Episodes in phase: 625
|
| 46 |
+
[07:55:38] Ep 650 | reward -1.307 | Phase 1/3 | Rolling mean: -0.453 / 0.6 | Episodes in phase: 650
|
| 47 |
+
[07:55:41] Ep 675 | reward +1.887 | Phase 1/3 | Rolling mean: -0.420 / 0.6 | Episodes in phase: 675
|
| 48 |
+
[07:55:43] Ep 700 | reward -0.268 | Phase 1/3 | Rolling mean: -0.380 / 0.6 | Episodes in phase: 700
|
| 49 |
+
[07:55:58] Ep 725 | reward -1.548 | Phase 1/3 | Rolling mean: -0.372 / 0.6 | Episodes in phase: 725
|
| 50 |
+
[07:56:02] Ep 750 | reward +3.379 | Phase 1/3 | Rolling mean: -0.462 / 0.6 | Episodes in phase: 750
|
| 51 |
+
[07:56:04] Ep 775 | reward -0.134 | Phase 1/3 | Rolling mean: -0.422 / 0.6 | Episodes in phase: 775
|
| 52 |
+
[07:56:07] Ep 800 | reward +0.350 | Phase 1/3 | Rolling mean: -0.320 / 0.6 | Episodes in phase: 800
|
| 53 |
+
[07:56:23] Ep 825 | reward -0.756 | Phase 1/3 | Rolling mean: -0.284 / 0.6 | Episodes in phase: 825
|
| 54 |
+
[07:56:25] Ep 850 | reward +0.532 | Phase 1/3 | Rolling mean: -0.308 / 0.6 | Episodes in phase: 850
|
| 55 |
+
[07:56:28] Ep 875 | reward +0.814 | Phase 1/3 | Rolling mean: -0.246 / 0.6 | Episodes in phase: 875
|
| 56 |
+
[07:56:32] Ep 900 | reward -0.429 | Phase 1/3 | Rolling mean: -0.187 / 0.6 | Episodes in phase: 900
|
| 57 |
+
[07:56:46] Ep 925 | reward +3.362 | Phase 1/3 | Rolling mean: -0.150 / 0.6 | Episodes in phase: 925
|
| 58 |
+
[07:56:49] Ep 950 | reward -1.139 | Phase 1/3 | Rolling mean: -0.037 / 0.6 | Episodes in phase: 950
|
| 59 |
+
[07:56:52] Ep 975 | reward +0.267 | Phase 1/3 | Rolling mean: -0.090 / 0.6 | Episodes in phase: 975
|
| 60 |
+
[07:56:55] Ep 1000 | reward +0.900 | Phase 1/3 | Rolling mean: -0.015 / 0.6 | Episodes in phase: 1000
|
| 61 |
+
[07:57:10] Ep 1025 | reward -0.576 | Phase 1/3 | Rolling mean: 0.031 / 0.6 | Episodes in phase: 1025
|
| 62 |
+
[07:57:13] Ep 1050 | reward -1.113 | Phase 1/3 | Rolling mean: 0.062 / 0.6 | Episodes in phase: 1050
|
| 63 |
+
[07:57:17] Ep 1075 | reward -1.574 | Phase 1/3 | Rolling mean: -0.009 / 0.6 | Episodes in phase: 1075
|
| 64 |
+
[07:57:19] Ep 1100 | reward -0.291 | Phase 1/3 | Rolling mean: 0.049 / 0.6 | Episodes in phase: 1100
|
| 65 |
+
[07:57:36] Ep 1125 | reward +1.694 | Phase 1/3 | Rolling mean: 0.050 / 0.6 | Episodes in phase: 1125
|
| 66 |
+
[07:57:38] Ep 1150 | reward +0.781 | Phase 1/3 | Rolling mean: 0.078 / 0.6 | Episodes in phase: 1150
|
| 67 |
+
[07:57:40] Ep 1175 | reward -0.938 | Phase 1/3 | Rolling mean: 0.077 / 0.6 | Episodes in phase: 1175
|
| 68 |
+
[07:57:42] Ep 1200 | reward -0.997 | Phase 1/3 | Rolling mean: 0.031 / 0.6 | Episodes in phase: 1200
|
| 69 |
+
[07:58:00] Ep 1225 | reward +0.129 | Phase 1/3 | Rolling mean: 0.039 / 0.6 | Episodes in phase: 1225
|
| 70 |
+
[07:58:02] Ep 1250 | reward +0.765 | Phase 1/3 | Rolling mean: 0.134 / 0.6 | Episodes in phase: 1250
|
| 71 |
+
[07:58:04] Ep 1275 | reward +0.867 | Phase 1/3 | Rolling mean: 0.273 / 0.6 | Episodes in phase: 1275
|
| 72 |
+
[07:58:07] Ep 1300 | reward -0.775 | Phase 1/3 | Rolling mean: 0.221 / 0.6 | Episodes in phase: 1300
|
| 73 |
+
[07:58:23] Ep 1325 | reward -1.674 | Phase 1/3 | Rolling mean: 0.282 / 0.6 | Episodes in phase: 1325
|
| 74 |
+
[07:58:26] Ep 1350 | reward -0.136 | Phase 1/3 | Rolling mean: 0.298 / 0.6 | Episodes in phase: 1350
|
| 75 |
+
[07:58:28] Ep 1375 | reward -0.152 | Phase 1/3 | Rolling mean: 0.378 / 0.6 | Episodes in phase: 1375
|
| 76 |
+
[07:58:32] Ep 1400 | reward -0.021 | Phase 1/3 | Rolling mean: 0.404 / 0.6 | Episodes in phase: 1400
|
| 77 |
+
[07:58:47] Ep 1425 | reward +0.736 | Phase 1/3 | Rolling mean: 0.461 / 0.6 | Episodes in phase: 1425
|
| 78 |
+
[07:58:49] Ep 1450 | reward -0.544 | Phase 1/3 | Rolling mean: 0.417 / 0.6 | Episodes in phase: 1450
|
| 79 |
+
[07:58:51] Ep 1475 | reward +0.547 | Phase 1/3 | Rolling mean: 0.386 / 0.6 | Episodes in phase: 1475
|
| 80 |
+
[07:58:53] Ep 1500 | reward +0.621 | Phase 1/3 | Rolling mean: 0.475 / 0.6 | Episodes in phase: 1500
|
| 81 |
+
[07:59:08] Periodic save at step 5,000 ...
|
vec_normalize_latest.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b640eeaa4179d9f1d9ac1ad4c9d141bb36be1d0e9d5cbc6eebb60b429f1e5856
|
| 3 |
+
size 166596
|