Instructions to use garvitsachdeva/spindleflow-rl with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- stable-baselines3
How to use garvitsachdeva/spindleflow-rl with stable-baselines3:
from huggingface_sb3 import load_from_hub checkpoint = load_from_hub( repo_id="garvitsachdeva/spindleflow-rl", filename="{MODEL FILENAME}.zip", ) - Notebooks
- Google Colab
- Kaggle
Checkpoint at step 10,000
Browse files- reward_curve.json +1 -1
- reward_curve.png +0 -0
- spindleflow_model_latest.zip +2 -2
- training_log.txt +88 -0
- vec_normalize_latest.pkl +1 -1
reward_curve.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"episodes": [0, 7, 14, 21, 28, 35, 42, 49, 56, 63, 70, 77, 84, 91, 98, 105, 112, 119, 126, 133, 140, 147, 154, 161, 168, 175, 182, 189, 196, 203, 210, 217, 224, 231, 238, 245, 252, 259, 266, 273, 280, 287, 294, 301, 308, 315, 322, 329, 336, 343, 350, 357, 364, 371, 378, 385, 392, 399, 406, 413, 420, 427, 434, 441, 448, 455, 462, 469, 476, 483, 490, 497, 504, 511, 518, 525, 532, 539, 546, 553, 560, 567, 574, 581, 588, 595, 602, 609, 616, 623, 630, 637, 644, 651, 658, 665, 672, 679, 686, 693, 700, 707, 714, 721, 728, 735, 742, 749, 756, 763, 770, 777, 784, 791, 798, 805, 812, 819, 826, 833, 840, 847, 854, 861, 868, 875, 882, 889, 896, 903, 910, 917, 924, 931, 938, 945, 952, 959, 966, 973, 980, 987, 994, 1001, 1008, 1015, 1022, 1029, 1036, 1043, 1050, 1057, 1064, 1071, 1078, 1085, 1092, 1099, 1106, 1113, 1120, 1127, 1134, 1141, 1148, 1155, 1162, 1169, 1176, 1183, 1190, 1197, 1204, 1211, 1218, 1225, 1232, 1239, 1246, 1253, 1260, 1267, 1274, 1281, 1288, 1295, 1302, 1309, 1316, 1323, 1330, 1337, 1344, 1351, 1358, 1365, 1372, 1379, 1386, 1393, 1400, 1407, 1414, 1421, 1428, 1435, 1442, 1449, 1456, 1463, 1470, 1477, 1484, 1491, 1498, 1505], "mean_rewards": [7.869385480880737, 0.39447228563949466, 0.46736883049209915, 0.3199193167754195, -0.010892340720727527, -0.12946453556004497, -0.2949352840524773, -0.373719582632184, -0.38903877655403657, -0.394298461091239, -0.4755417896093617, -0.6254103597939799, -0.631615869112705, -0.7221846164841401, -0.7328726649284363, -0.7261277403878538, -0.7111742453355538, -0.6811343130508536, -0.6172156007470269, -0.5741234169782776, -0.5649614407386827, -0.6304719744887399, -0.6643430014925176, -0.7462935703316409, -0.7303286298472238, -0.6525270720992825, -0.6220860190346444, -0.5855647556140626, -0.5600520932409716, -0.6385312578640878, -0.6217203504209848, -0.6404820328186217, -0.5646297308665357, -0.46785074545952837, -0.38970960279632555, -0.3499533957924302, -0.30017317354826156, -0.2824550400316519, -0.3004617769344661, -0.3535477963006614, -0.33393376740921094, -0.3313333828271808, -0.2409426725690106, -0.16390284169220218, -0.21640204097830543, -0.2319642876789562, -0.31141620639123413, -0.353557417757417, -0.3985643586339919, -0.4311660311527942, -0.3594949227806769, -0.34175623384745496, -0.4613509353642401, -0.47967430919800935, -0.5492527224123478, -0.49916473776102066, -0.4918099214371882, -0.47077294831213196, -0.4269245275737424, -0.4952932044275497, -0.6369963715245065, -0.6453018104657531, -0.6300839686178064, -0.49564921125573547, -0.5465395265798035, -0.5404178698694235, -0.6208385011965507, -0.6157085209790814, -0.6108908611594847, -0.7157817421186912, -0.611315862471728, -0.5023467261834365, -0.5686648415499612, -0.5695220507485302, -0.6284753986095127, -0.5987623235897014, -0.5179943298234752, -0.47517906001916055, -0.4324410670485936, -0.4277130573693859, -0.33923015503310844, -0.31555696287633556, -0.2900881542285022, -0.34110071137547493, -0.3634238948947505, -0.46017439952610356, -0.46005683769717026, -0.5069957694626952, -0.5368034732282946, -0.5363630219975388, -0.5321896889394051, -0.513642537456594, -0.4158981905171746, -0.48374814049978004, -0.47483629596076515, -0.44893436712261875, -0.34121398338557857, -0.37074862562708166, -0.35986225605991323, -0.2888309538462444, -0.24531601732106587, -0.139288459659407, -0.26363536735114296, -0.45459090594790486, -0.4021470984934192, -0.3751047815343267, -0.42528639194604595, -0.512994073527424, -0.33741763302762257, -0.3813054520440729, -0.3467753083494149, -0.38562066176612125, -0.4449031753465533, -0.4120248835043688, -0.32635698609642294, -0.23468282377641453, -0.14027568080315464, -0.10482709658773322, -0.04141475936692012, -0.21747780304500147, -0.19404257870720407, -0.18385122959060887, -0.190495409046937, -0.1766425924816806, -0.11231868486165215, -0.07688000227177613, -0.11049901294571005, -0.11022839932947566, -0.05706106148366081, 0.00837513066730217, 0.00798436445429137, 0.036163482373874435, 0.03783564666580213, -0.00424236260158451, 0.027116403375801287, 0.043803773428264414, 0.03858699110385619, 0.033069006020301266, 0.01624414589452116, -0.1366923186731966, -0.041730524187809544, 0.00771083506314378, 0.034296022335949694, 0.09421771705934875, 0.15127575436705037, 0.09569966989128213, 0.10003364811602392, 0.029222117050697927, 0.07921872876192394, 0.15258988159659662, 0.21950287509121394, 0.14954511282083235, 0.11745882475454557, 0.011103616831334014, -0.07632065397736273, -0.15541876722617368, -0.016381304000357266, 0.026465766058352432, 0.033593990717475354, -0.05189018341173467, -0.035155581481950846, -0.06485033640637994, 0.07285434382624532, 0.11660055379922453, 0.20391239691525698, 0.25393885869140687, 0.2059961920359025, 0.1650057145100283, -0.10431696060358693, -0.007403379869892409, 0.06139961681573799, -0.008295599640788217, 0.024523470379216105, 0.00638242489903381, 0.03186144836639103, 0.0253872059800319, 0.10695318372822121, 0.21488577240195714, 0.2510290814955768, 0.4169209191918765, 0.447288881619706, 0.46890390509935587, 0.5537877085030471, 0.5642890175559411, 0.5325108316702474, 0.4965915335955, 0.5290486808057482, 0.4647692522695778, 0.34476327095916004, 0.4440733800568667, 0.4059782953271152, 0.3710075085124883, 0.39774463871052784, 0.3539025296922773, 0.3479153207797361, 0.3787729506576924, 0.3558028209709415, 0.31861691088660765, 0.3099606362612624, 0.3599474479101206, 0.3267678932139748, 0.3568433095750056, 0.4187891443112963, 0.4246233755624608, 0.3433752029545997, 0.31683328071315037, 0.39499579752354247, 0.4693222563517721, 0.5369696448508062, 0.5532687793632871, 0.4783048149271819, 0.5151937603827959, 0.5181547475758156, 0.5387902116574543, 0.5961638863909205, 0.6189852633734086], "raw_rewards": [7.869385480880737, -2.0270893275737762, -1.5078903436660767, -1.6289145946502686, -1.4581109285354614, -1.4582953453063965, 0.30212289094924927, 0.20895814895629883, 0.420966237783432, -1.42896568775177, -1.7208606004714966, -1.4811574220657349, -0.1653035283088684, 0.04737560451030731, 1.0387153327465057, -1.4575903415679932, -1.4570224285125732, 1.0627342760562897, -0.8742947578430176, 1.6949703097343445, -1.0036369562149048, -1.4761041402816772, -1.4624449014663696, -1.4472192525863647, -1.4332927465438843, 0.7904170751571655, 0.653762698173523, 1.2795192301273346, -0.3608699440956116, 0.35677075386047363, 0.9507952705025673, -3.4470927715301514, -1.4728577136993408, -1.483479380607605, 0.21373587846755981, 1.4765518307685852, -1.6128928735852242, -0.1485140323638916, 2.6798701882362366, -1.4776854515075684, -1.4701982736587524, -1.46907639503479, -1.4756847620010376, 2.111427068710327, -1.472286581993103, -1.4616392850875854, 1.861622393131256, 0.5909617990255356, 0.12714916467666626, -0.45931126549839973, 0.5657570883631706, -2.696280002593994, -1.1088446974754333, -0.629875659942627, -1.4666680097579956, -1.4689733982086182, -0.5987190008163452, -0.6024218797683716, 0.8062635287642479, -1.4665796756744385, -1.7659942060709, 0.5284237861633301, 0.3446391820907593, -1.4728001356124878, 0.9817864894866943, -1.473576545715332, 0.0034030526876449585, -1.4753345251083374, 0.03397753834724426, -0.8355124965310097, 2.2451577186584473, -1.2429626286029816, -1.0772457867860794, 0.44872643053531647, -1.1378175616264343, -0.1028159111738205, 0.3221398591995239, 1.5046327412128448, -0.939087450504303, -0.6979364454746246, 0.9164094515144825, -0.20006322860717773, -1.2905930280685425, -1.5479609966278076, -0.09268271923065186, -2.0991574972867966, 0.033834993839263916, 0.0028892159461975098, 0.6096921563148499, 0.23623988032341003, -0.2025727927684784, 2.117010712623596, 0.809642419219017, -1.8340483605861664, -1.55027636885643, 2.208156943321228, 1.2409679293632507, 0.3747459650039673, 0.5356261730194092, -1.5522199869155884, 1.7077340483665466, -1.8154371231794357, -1.549471378326416, -1.5465556383132935, -1.599133551120758, -2.312764525413513, -1.5398250818252563, 3.3785969018936157, -2.000951513648033, -2.503466285765171, -1.2611201405525208, -0.4323340952396393, -1.5388507843017578, -0.8869258761405945, 1.5722772777080536, -1.6581648588180542, -0.0460590124130249, 0.687589555978775, -0.3689194992184639, -2.1063080430030823, -1.3175745010375977, -1.5440726280212402, 1.2180553078651428, -1.6553943455219269, -0.4625515341758728, 0.7008185386657715, 2.127745598554611, 1.7467806935310364, -0.035655677318573, 0.8889943957328796, 0.4433015286922455, 0.16804933547973633, 3.3618789315223694, 1.5720088481903076, -0.3283916711807251, 1.053344488143921, 2.339919328689575, -0.939920961856842, -1.929141715168953, -1.4939937591552734, 0.46812979876995087, 1.9332732260227203, -0.05799245834350586, 1.641272246837616, 0.09446060657501221, -3.6827295124530792, -1.9294183105230331, -1.9493356943130493, -0.6192144155502319, 1.261986494064331, -0.08261801302433014, -2.1036276817321777, -0.6942150890827179, -1.5749655961990356, -0.6196750402450562, 0.3630285579711199, 0.2618691325187683, -0.291328564286232, -0.33801981806755066, -3.120486468076706, 1.8896211087703705, -2.12729275226593, -0.5134999752044678, 1.873863399028778, -1.3715928196907043, -0.5881721675395966, -1.2663371562957764, -2.325909271836281, -2.073127508163452, -1.5085158348083496, -0.21068474650382996, 1.8182676434516907, -1.391431838274002, 2.4527386128902435, 0.27165186405181885, 0.6319058239459991, 2.7988539934158325, 1.4977125525474548, -0.4476410448551178, 2.2035112977027893, 1.1141240745782852, 1.6052564978599548, 0.8674910366535187, -0.9870564341545105, 0.2687806189060211, -0.06043100357055664, 0.22413843870162964, -1.415934532880783, 0.5874640643596649, -0.2128455936908722, -0.5651094913482666, -0.07148459553718567, 0.6378590166568756, 3.1044903993606567, -1.9472792744636536, 1.4779390394687653, 2.53517609834671, -1.3368602693080902, 0.7410027906298637, -1.6118918657302856, 1.1316379755735397, 1.6181618869304657, 1.731214463710785, -0.7870284914970398, -1.7340275347232819, -0.04152199625968933, -1.6137357950210571, -0.5440936982631683, 2.3059643507003784, 2.786042958498001, -1.6151320934295654, 0.21432161331176758, 1.6381785869598389, 2.781656265258789, 1.1450567245483398, 0.05866968631744385], "step": 5000}
|
|
|
|
| 1 |
+
{"episodes": [0, 18, 36, 54, 72, 90, 108, 126, 144, 162, 180, 198, 216, 234, 252, 270, 288, 306, 324, 342, 360, 378, 396, 414, 432, 450, 468, 486, 504, 522, 540, 558, 576, 594, 612, 630, 648, 666, 684, 702, 720, 738, 756, 774, 792, 810, 828, 846, 864, 882, 900, 918, 936, 954, 972, 990, 1008, 1026, 1044, 1062, 1080, 1098, 1116, 1134, 1152, 1170, 1188, 1206, 1224, 1242, 1260, 1278, 1296, 1314, 1332, 1350, 1368, 1386, 1404, 1422, 1440, 1458, 1476, 1494, 1512, 1530, 1548, 1566, 1584, 1602, 1620, 1638, 1656, 1674, 1692, 1710, 1728, 1746, 1764, 1782, 1800, 1818, 1836, 1854, 1872, 1890, 1908, 1926, 1944, 1962, 1980, 1998, 2016, 2034, 2052, 2070, 2088, 2106, 2124, 2142, 2160, 2178, 2196, 2214, 2232, 2250, 2268, 2286, 2304, 2322, 2340, 2358, 2376, 2394, 2412, 2430, 2448, 2466, 2484, 2502, 2520, 2538, 2556, 2574, 2592, 2610, 2628, 2646, 2664, 2682, 2700, 2718, 2736, 2754, 2772, 2790, 2808, 2826, 2844, 2862, 2880, 2898, 2916, 2934, 2952, 2970, 2988, 3006, 3024, 3042, 3060, 3078, 3096, 3114, 3132, 3150, 3168, 3186, 3204, 3222, 3240, 3258, 3276, 3294, 3312, 3330, 3348, 3366, 3384, 3402, 3420, 3438, 3456, 3474, 3492, 3510, 3528, 3546, 3564, 3582, 3600, 3618, 3636], "mean_rewards": [7.869385480880737, 0.35832654429893745, -0.16492195256255768, -0.41051984910260547, -0.4666600593250908, -0.5210831629951577, -0.5492027676365244, -0.5199973956043795, -0.5274246354169887, -0.6168348340923252, -0.5763718811332027, -0.6426244833414535, -0.6204287648958328, -0.57561621586741, -0.5043334544247405, -0.4581533375318971, -0.4431843029406781, -0.4299387016517857, -0.44611268262839415, -0.35823154031391163, -0.35052841665878237, -0.3978075118540981, -0.37302712366946944, -0.4092835626094142, -0.46812825426868687, -0.4861689556062549, -0.5141371829783196, -0.5750634879037574, -0.5776504724194388, -0.5405701848749931, -0.5450267998276503, -0.5152033388942152, -0.49353750375519084, -0.541939804944527, -0.5020202658865806, -0.5006938357274611, -0.4278534385938566, -0.42015426341894563, -0.40840607932717593, -0.327925127531801, -0.390656028597892, -0.4459366598831756, -0.44185569296990124, -0.3529528458244525, -0.3244507045454376, -0.29189814684482723, -0.3297747009239354, -0.24400284398183392, -0.23462226946971246, -0.2303248851938726, -0.1785312213187853, -0.10729554165564067, -0.08863365000298062, -0.044396064285147974, -0.0756548218899867, -0.035530524932175546, 0.0331043691814437, 0.05104965120963343, 0.07906889477437669, 0.049794286489486694, -0.0333541377887621, 0.08013261274362986, 0.02404522315527384, 0.04678324418613217, 0.11633681839557139, 0.09018559919690693, -0.011656419200065372, -0.002868405103192225, 0.043362459781896935, 0.1609243609748044, 0.2448525605927948, 0.25207074520761497, 0.29158232284832625, 0.2784405831339677, 0.2859187344764615, 0.3154568649204141, 0.4207567130829525, 0.41447765322320246, 0.41456545753772933, 0.39034133770370055, 0.4166013927833474, 0.3762397216172418, 0.40843041296945987, 0.478122525546536, 0.4872877030162612, 0.44964427149414526, 0.4389239299970751, 0.4775110198706798, 0.5237314501767057, 0.4781374816148245, 0.49800853094004177, 0.5347077473844555, 0.5750919471502529, 0.5338655400875403, 0.5849348806026443, 0.6334119247884111, 0.6058271814298888, 0.6376417535037355, 0.6136350722939984, 0.6905025819273011, 0.7251743287561911, 0.7001435885776428, 0.7300886690800825, 0.7274164289010914, 0.637157942802942, 0.6127062377386859, 0.647124655713624, 0.6327053736844152, 0.643251203507144, 0.6397692677360233, 0.6305316047139138, 0.6138729520582153, 0.539113004860765, 0.6314716713431363, 0.7084281146475165, 0.7664123414196893, 0.7908425120698227, 0.7909288813288395, 0.7565632448173486, 0.8062136572386537, 0.8095599105683985, 0.910840266210201, 0.9882220188974515, 0.9823392286392506, 0.9880800652294516, 0.9232531002863242, 0.9731405620787972, 1.0696704696570773, 1.137984300649755, 1.1157669601689322, 1.120751329056345, 1.091754916163304, 1.0543542655838423, 1.0874224616000991, 1.1050260064664175, 1.1508460028620262, 1.1218418643156138, 1.1050172184547922, 1.1149552422857096, 1.1210666878256375, 1.1432936652640269, 1.212356471494966, 1.2528533238283062, 1.2775489754442657, 1.2711966389196587, 1.3054996868493138, 1.3098191296026767, 1.303010802235004, 1.3167688116165144, 1.332003332336771, 1.2072641095760597, 1.1659913170611957, 1.1617221327007308, 1.1973635774863127, 1.19029163322565, 1.1377832197819602, 1.1733945103280328, 1.200469083787239, 1.1737295676759638, 1.2198803689903939, 1.3437078056222462, 1.4073380057669276, 1.4434562324044795, 1.3765805392925228, 1.4333498724582758, 1.4617803602397523, 1.4339119075838436, 1.4578246967201787, 1.4940050998651473, 1.4790018203381736, 1.3881733044572584, 1.2966387915005395, 1.2835384612227534, 1.2808164652068537, 1.3310863320807835, 1.3513264275299242, 1.343058421517016, 1.3600143823046034, 1.3582245039876428, 1.3453988715811827, 1.402938225777349, 1.4644667300405902, 1.4406623108476238, 1.3766494840485872, 1.3924313055883561, 1.3975671993896037, 1.433126266708004, 1.4459480587802418, 1.4466661614580796, 1.4509608922628583, 1.5078899698270547, 1.5502726379986649, 1.591167314530729, 1.6082314410052456, 1.5954601630404754, 1.560145628914892, 1.5600862531912523, 1.573801012259427, 1.5904485320283488, 1.5825545474845957, 1.5387725430939878, 1.4717690143324844, 1.4518324316653248], "raw_rewards": [7.869385480880737, 0.6967869400978088, -1.4413889646530151, 0.4599737599492073, -0.6520639061927795, -0.23191767930984497, -1.4565927982330322, -0.8742947578430176, -2.6075824797153473, -1.4442170858383179, -0.16410590708255768, -1.4615715742111206, -0.7505795061588287, 0.3043771982192993, -1.6128928735852242, 0.12662386894226074, 0.1923147737979889, -0.8761289119720459, -1.4533054828643799, -0.27511686831712723, -1.4647120237350464, -1.4666680097579956, 2.151063919067383, -2.312368720769882, -1.248689442873001, -1.7310243248939514, -0.6287155747413635, -1.1987037062644958, -1.0772457867860794, 0.216892309486866, -1.3451157063245773, 0.383902370929718, -2.4344645142555237, -2.121876023709774, -1.5537163019180298, -0.2025727927684784, -2.401705786585808, 0.6549385488033295, -2.55817112326622, 1.871029555797577, -0.02213919162750244, -1.5415691137313843, -2.000951513648033, -0.13388577103614807, 0.17777174711227417, 1.617654800415039, 1.0062615424394608, -1.5448787212371826, -1.5481922626495361, 2.127745598554611, -0.7919640690088272, -0.1357777714729309, -0.35106261074543, -0.9314782917499542, 0.5319417044520378, 0.3243334889411926, 0.09446060657501221, -1.5726486444473267, 0.9032813012599945, 2.5962595641613007, -0.5888408124446869, 1.5929381847381592, -0.09314501285552979, -0.5134999752044678, 0.7780119786038995, -3.4195436611771584, 1.1500522196292877, -1.9147660434246063, 0.12893807888031006, -1.2830363810062408, 1.1141240745782852, 0.34193265438079834, 2.7681850492954254, 1.138201892375946, 0.4452551603317261, 1.2519995421171188, -0.39680755138397217, 0.7410027906298637, 1.9592264890670776, -2.23688642680645, -0.16510164737701416, -1.6138243675231934, -0.8476248979568481, 1.1157943941652775, -1.6202170848846436, 2.166758894920349, -1.5663868188858032, -1.1529327481985092, -0.12494632601737976, 0.9080524891614914, 3.0023685693740845, 2.2008373141288757, 0.9773052334785461, 0.11245410144329071, 0.6097117960453033, -1.6265513896942139, -1.0573668628931046, 2.8179323077201843, 0.7834433019161224, -1.6283231973648071, -1.4633913040161133, -1.1272140443325043, -0.4163784980773926, 3.6425812244415283, 0.4373038485646248, 0.6823182106018066, 1.5891563892364502, -1.5813692808151245, -2.69570055603981, -0.34255164861679077, -0.5784454345703125, 2.345923662185669, 0.44461843371391296, 2.922288239002228, 1.8043333292007446, 2.1249231696128845, 2.92220076918602, 1.250470757484436, -1.804740458726883, 1.0650655627250671, -1.1206167042255402, 2.8100045323371887, 2.840016543865204, 1.209741860628128, 1.085776150226593, -1.6248805522918701, -0.8703119158744812, 0.7544630169868469, 1.263941377401352, 1.5336683094501495, 0.18629544973373413, 1.9573948979377747, 1.1777045745402575, 0.3519379496574402, 2.2451024651527405, 3.212405025959015, 2.4137960374355316, -0.8849076330661774, -1.6204973459243774, -0.4745387136936188, 1.850459337234497, 2.229508101940155, 1.3959138691425323, 2.329979747533798, 3.6350157856941223, -0.3667806386947632, 0.8960558772087097, -0.4405495524406433, 0.9980701804161072, 2.5332993268966675, 1.1129309833049774, 0.6006221175193787, 3.297209322452545, 0.40909240394830704, 0.05018448829650879, 3.6179640889167786, 1.5946876406669617, 3.062887966632843, 2.337017297744751, 2.9371981620788574, 1.5607191920280457, 0.27119821310043335, 1.0661866962909698, -1.5963889360427856, 1.907113641500473, -1.5951862335205078, -0.0360943078994751, 0.7266952320933342, 1.6872042417526245, 0.20690703392028809, 2.647982209920883, 1.1412177383899689, 0.3479418158531189, 1.6862413585186005, 2.9885196685791016, 2.98825940489769, 0.7678718436509371, 1.5259829759597778, 1.1916424036026, -0.06244182586669922, 1.2249691784381866, 2.6612743139266968, 1.537642002105713, -0.17562654614448547, -1.7242172360420227, 3.888312876224518, -0.060656994581222534, 2.1636223196983337, -0.9748473465442657, 2.018460303544998, 3.78766006231308, 2.910611093044281, 2.586593806743622, 0.2345079481601715, -0.8356946706771851, 1.5325765013694763, 1.5288212597370148, 2.1431926488876343, 2.585873991250992, 0.7807489335536957, -0.5991351902484894, 0.8348989896476269, 0.7598983645439148], "step": 10000}
|
reward_curve.png
CHANGED
|
|
spindleflow_model_latest.zip
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f011377570fe13fa9e2851657c415886f83f9ffebfad0f8182cafdd1ba4fd55
|
| 3 |
+
size 143819552
|
training_log.txt
CHANGED
|
@@ -79,3 +79,91 @@
|
|
| 79 |
[07:58:51] Ep 1475 | reward +0.547 | Phase 1/3 | Rolling mean: 0.386 / 0.6 | Episodes in phase: 1475
|
| 80 |
[07:58:53] Ep 1500 | reward +0.621 | Phase 1/3 | Rolling mean: 0.475 / 0.6 | Episodes in phase: 1500
|
| 81 |
[07:59:08] Periodic save at step 5,000 ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
[07:58:51] Ep 1475 | reward +0.547 | Phase 1/3 | Rolling mean: 0.386 / 0.6 | Episodes in phase: 1475
|
| 80 |
[07:58:53] Ep 1500 | reward +0.621 | Phase 1/3 | Rolling mean: 0.475 / 0.6 | Episodes in phase: 1500
|
| 81 |
[07:59:08] Periodic save at step 5,000 ...
|
| 82 |
+
[07:59:12] Periodic push done β 5 files at step 5,000
|
| 83 |
+
[07:59:13] Ep 1525 | reward +0.139 | Phase 1/3 | Rolling mean: 0.434 / 0.6 | Episodes in phase: 1525
|
| 84 |
+
[07:59:18] Ep 1550 | reward +1.843 | Phase 1/3 | Rolling mean: 0.449 / 0.6 | Episodes in phase: 1550
|
| 85 |
+
[07:59:21] Ep 1575 | reward +1.560 | Phase 1/3 | Rolling mean: 0.460 / 0.6 | Episodes in phase: 1575
|
| 86 |
+
[07:59:23] Ep 1600 | reward +0.935 | Phase 1/3 | Rolling mean: 0.513 / 0.6 | Episodes in phase: 1600
|
| 87 |
+
[07:59:40] Ep 1625 | reward -2.150 | Phase 1/3 | Rolling mean: 0.468 / 0.6 | Episodes in phase: 1625
|
| 88 |
+
[07:59:42] Ep 1650 | reward +1.466 | Phase 1/3 | Rolling mean: 0.548 / 0.6 | Episodes in phase: 1650
|
| 89 |
+
[07:59:44] Ep 1675 | reward +0.112 | Phase 1/3 | Rolling mean: 0.557 / 0.6 | Episodes in phase: 1675
|
| 90 |
+
[07:59:44] Ep 1680 | reward +1.979 | Phase 2/3 | Rolling mean: 0.000 / 1.0 | Episodes in phase: 0
|
| 91 |
+
[07:59:46] Ep 1700 | reward +2.247 | Phase 2/3 | Rolling mean: 0.741 / 1.0 | Episodes in phase: 20
|
| 92 |
+
[08:00:01] Ep 1725 | reward -0.777 | Phase 2/3 | Rolling mean: 0.624 / 1.0 | Episodes in phase: 45
|
| 93 |
+
[08:00:06] Ep 1750 | reward -1.629 | Phase 2/3 | Rolling mean: 0.511 / 1.0 | Episodes in phase: 70
|
| 94 |
+
[08:00:08] Ep 1775 | reward +1.412 | Phase 2/3 | Rolling mean: 0.674 / 1.0 | Episodes in phase: 95
|
| 95 |
+
[08:00:10] Ep 1800 | reward +0.696 | Phase 2/3 | Rolling mean: 0.725 / 1.0 | Episodes in phase: 120
|
| 96 |
+
[08:00:32] Ep 1825 | reward +3.791 | Phase 2/3 | Rolling mean: 0.763 / 1.0 | Episodes in phase: 145
|
| 97 |
+
[08:00:35] Ep 1850 | reward +0.643 | Phase 2/3 | Rolling mean: 0.697 / 1.0 | Episodes in phase: 170
|
| 98 |
+
[08:00:37] Ep 1875 | reward -0.231 | Phase 2/3 | Rolling mean: 0.663 / 1.0 | Episodes in phase: 195
|
| 99 |
+
[08:00:39] Ep 1900 | reward +0.922 | Phase 2/3 | Rolling mean: 0.645 / 1.0 | Episodes in phase: 220
|
| 100 |
+
[08:00:58] Ep 1925 | reward +1.702 | Phase 2/3 | Rolling mean: 0.613 / 1.0 | Episodes in phase: 245
|
| 101 |
+
[08:01:03] Ep 1950 | reward +0.046 | Phase 2/3 | Rolling mean: 0.702 / 1.0 | Episodes in phase: 270
|
| 102 |
+
[08:01:05] Ep 1975 | reward +4.085 | Phase 2/3 | Rolling mean: 0.690 / 1.0 | Episodes in phase: 295
|
| 103 |
+
[08:01:07] Ep 2000 | reward +0.513 | Phase 2/3 | Rolling mean: 0.630 / 1.0 | Episodes in phase: 320
|
| 104 |
+
[08:01:25] Ep 2025 | reward +1.310 | Phase 2/3 | Rolling mean: 0.559 / 1.0 | Episodes in phase: 345
|
| 105 |
+
[08:01:27] Ep 2050 | reward +1.680 | Phase 2/3 | Rolling mean: 0.656 / 1.0 | Episodes in phase: 370
|
| 106 |
+
[08:01:28] Ep 2075 | reward -1.329 | Phase 2/3 | Rolling mean: 0.732 / 1.0 | Episodes in phase: 395
|
| 107 |
+
[08:01:30] Ep 2100 | reward -0.745 | Phase 2/3 | Rolling mean: 0.743 / 1.0 | Episodes in phase: 420
|
| 108 |
+
[08:01:48] Ep 2125 | reward -1.805 | Phase 2/3 | Rolling mean: 0.776 / 1.0 | Episodes in phase: 445
|
| 109 |
+
[08:01:52] Ep 2150 | reward -0.513 | Phase 2/3 | Rolling mean: 0.838 / 1.0 | Episodes in phase: 470
|
| 110 |
+
[08:01:53] Ep 2175 | reward +1.576 | Phase 2/3 | Rolling mean: 0.832 / 1.0 | Episodes in phase: 495
|
| 111 |
+
[08:01:55] Ep 2200 | reward -0.518 | Phase 2/3 | Rolling mean: 0.924 / 1.0 | Episodes in phase: 520
|
| 112 |
+
[08:02:13] Ep 2225 | reward +2.084 | Phase 3/3 | Rolling mean: 0.000 / β | Episodes in phase: 0
|
| 113 |
+
[08:02:14] Ep 2250 | reward +2.102 | Phase 3/3 | Rolling mean: 0.763 / β | Episodes in phase: 25
|
| 114 |
+
[08:02:16] Ep 2275 | reward +2.070 | Phase 3/3 | Rolling mean: 0.972 / β | Episodes in phase: 50
|
| 115 |
+
[08:02:18] Ep 2300 | reward +1.526 | Phase 3/3 | Rolling mean: 1.075 / β | Episodes in phase: 75
|
| 116 |
+
[08:02:41] Ep 2325 | reward -0.972 | Phase 3/3 | Rolling mean: 1.124 / β | Episodes in phase: 100
|
| 117 |
+
[08:02:42] Ep 2350 | reward +2.646 | Phase 3/3 | Rolling mean: 1.119 / β | Episodes in phase: 125
|
| 118 |
+
[08:02:46] Ep 2375 | reward +0.804 | Phase 3/3 | Rolling mean: 1.073 / β | Episodes in phase: 150
|
| 119 |
+
[08:02:47] Ep 2400 | reward +0.805 | Phase 3/3 | Rolling mean: 1.053 / β | Episodes in phase: 175
|
| 120 |
+
[08:03:03] Ep 2425 | reward +2.599 | Phase 3/3 | Rolling mean: 1.063 / β | Episodes in phase: 200
|
| 121 |
+
[08:03:05] Ep 2450 | reward +1.555 | Phase 3/3 | Rolling mean: 1.132 / β | Episodes in phase: 225
|
| 122 |
+
[08:03:07] Ep 2475 | reward +1.189 | Phase 3/3 | Rolling mean: 1.136 / β | Episodes in phase: 250
|
| 123 |
+
[08:03:08] Ep 2500 | reward -0.174 | Phase 3/3 | Rolling mean: 1.126 / β | Episodes in phase: 275
|
| 124 |
+
[08:03:26] Ep 2525 | reward +0.991 | Phase 3/3 | Rolling mean: 1.184 / β | Episodes in phase: 300
|
| 125 |
+
[08:03:28] Ep 2550 | reward +0.487 | Phase 3/3 | Rolling mean: 1.187 / β | Episodes in phase: 325
|
| 126 |
+
[08:03:29] Ep 2575 | reward +2.330 | Phase 3/3 | Rolling mean: 1.248 / β | Episodes in phase: 350
|
| 127 |
+
[08:03:32] Ep 2600 | reward +0.458 | Phase 3/3 | Rolling mean: 1.285 / β | Episodes in phase: 375
|
| 128 |
+
[08:03:53] Ep 2625 | reward +2.538 | Phase 3/3 | Rolling mean: 1.341 / β | Episodes in phase: 400
|
| 129 |
+
[08:03:55] Ep 2650 | reward +0.220 | Phase 3/3 | Rolling mean: 1.298 / β | Episodes in phase: 425
|
| 130 |
+
[08:03:56] Ep 2675 | reward +0.329 | Phase 3/3 | Rolling mean: 1.297 / β | Episodes in phase: 450
|
| 131 |
+
[08:03:58] Ep 2700 | reward -0.493 | Phase 3/3 | Rolling mean: 1.248 / β | Episodes in phase: 475
|
| 132 |
+
[08:04:14] Ep 2725 | reward -1.109 | Phase 3/3 | Rolling mean: 1.134 / β | Episodes in phase: 500
|
| 133 |
+
[08:04:16] Ep 2750 | reward +1.337 | Phase 3/3 | Rolling mean: 1.200 / β | Episodes in phase: 525
|
| 134 |
+
[08:04:17] Ep 2775 | reward +2.999 | Phase 3/3 | Rolling mean: 1.185 / β | Episodes in phase: 550
|
| 135 |
+
[08:04:18] Ep 2800 | reward +0.851 | Phase 3/3 | Rolling mean: 1.198 / β | Episodes in phase: 575
|
| 136 |
+
[08:04:36] Ep 2825 | reward +2.942 | Phase 3/3 | Rolling mean: 1.153 / β | Episodes in phase: 600
|
| 137 |
+
[08:04:38] Ep 2850 | reward +1.594 | Phase 3/3 | Rolling mean: 1.208 / β | Episodes in phase: 625
|
| 138 |
+
[08:04:39] Ep 2875 | reward +1.437 | Phase 3/3 | Rolling mean: 1.266 / β | Episodes in phase: 650
|
| 139 |
+
[08:04:41] Ep 2900 | reward +1.378 | Phase 3/3 | Rolling mean: 1.376 / β | Episodes in phase: 675
|
| 140 |
+
[08:05:01] Ep 2925 | reward +1.841 | Phase 3/3 | Rolling mean: 1.466 / β | Episodes in phase: 700
|
| 141 |
+
[08:05:03] Ep 2950 | reward +1.378 | Phase 3/3 | Rolling mean: 1.383 / β | Episodes in phase: 725
|
| 142 |
+
[08:05:04] Ep 2975 | reward +2.754 | Phase 3/3 | Rolling mean: 1.417 / β | Episodes in phase: 750
|
| 143 |
+
[08:05:06] Ep 3000 | reward +1.674 | Phase 3/3 | Rolling mean: 1.441 / β | Episodes in phase: 775
|
| 144 |
+
[08:05:24] Ep 3025 | reward +1.687 | Phase 3/3 | Rolling mean: 1.461 / β | Episodes in phase: 800
|
| 145 |
+
[08:05:27] Ep 3050 | reward +3.082 | Phase 3/3 | Rolling mean: 1.481 / β | Episodes in phase: 825
|
| 146 |
+
[08:05:29] Ep 3075 | reward +1.691 | Phase 3/3 | Rolling mean: 1.365 / β | Episodes in phase: 850
|
| 147 |
+
[08:05:31] Ep 3100 | reward +1.570 | Phase 3/3 | Rolling mean: 1.296 / β | Episodes in phase: 875
|
| 148 |
+
[08:05:46] Ep 3125 | reward +2.934 | Phase 3/3 | Rolling mean: 1.288 / β | Episodes in phase: 900
|
| 149 |
+
[08:05:47] Ep 3150 | reward +2.934 | Phase 3/3 | Rolling mean: 1.372 / β | Episodes in phase: 925
|
| 150 |
+
[08:05:48] Ep 3175 | reward +1.372 | Phase 3/3 | Rolling mean: 1.368 / β | Episodes in phase: 950
|
| 151 |
+
[08:05:50] Ep 3200 | reward +0.083 | Phase 3/3 | Rolling mean: 1.343 / β | Episodes in phase: 975
|
| 152 |
+
[08:06:11] Ep 3225 | reward -0.211 | Phase 3/3 | Rolling mean: 1.375 / β | Episodes in phase: 1000
|
| 153 |
+
[08:06:12] Ep 3250 | reward +2.469 | Phase 3/3 | Rolling mean: 1.360 / β | Episodes in phase: 1025
|
| 154 |
+
[08:06:16] Ep 3275 | reward -0.052 | Phase 3/3 | Rolling mean: 1.423 / β | Episodes in phase: 1050
|
| 155 |
+
[08:06:17] Ep 3300 | reward +0.193 | Phase 3/3 | Rolling mean: 1.398 / β | Episodes in phase: 1075
|
| 156 |
+
[08:06:35] Ep 3325 | reward +1.664 | Phase 3/3 | Rolling mean: 1.411 / β | Episodes in phase: 1100
|
| 157 |
+
[08:06:36] Ep 3350 | reward +2.164 | Phase 3/3 | Rolling mean: 1.429 / β | Episodes in phase: 1125
|
| 158 |
+
[08:06:37] Ep 3375 | reward +0.881 | Phase 3/3 | Rolling mean: 1.460 / β | Episodes in phase: 1150
|
| 159 |
+
[08:06:39] Ep 3400 | reward -1.587 | Phase 3/3 | Rolling mean: 1.476 / β | Episodes in phase: 1175
|
| 160 |
+
[08:07:00] Ep 3425 | reward +0.563 | Phase 3/3 | Rolling mean: 1.499 / β | Episodes in phase: 1200
|
| 161 |
+
[08:07:01] Ep 3450 | reward +0.873 | Phase 3/3 | Rolling mean: 1.544 / β | Episodes in phase: 1225
|
| 162 |
+
[08:07:03] Ep 3475 | reward +0.235 | Phase 3/3 | Rolling mean: 1.536 / β | Episodes in phase: 1250
|
| 163 |
+
[08:07:04] Ep 3500 | reward +1.512 | Phase 3/3 | Rolling mean: 1.570 / β | Episodes in phase: 1275
|
| 164 |
+
[08:07:23] Ep 3525 | reward +2.749 | Phase 3/3 | Rolling mean: 1.605 / β | Episodes in phase: 1300
|
| 165 |
+
[08:07:25] Ep 3550 | reward -0.013 | Phase 3/3 | Rolling mean: 1.583 / β | Episodes in phase: 1325
|
| 166 |
+
[08:07:26] Ep 3575 | reward +3.445 | Phase 3/3 | Rolling mean: 1.559 / β | Episodes in phase: 1350
|
| 167 |
+
[08:07:27] Ep 3600 | reward -0.091 | Phase 3/3 | Rolling mean: 1.554 / β | Episodes in phase: 1375
|
| 168 |
+
[08:07:43] Ep 3625 | reward +0.999 | Phase 3/3 | Rolling mean: 1.488 / β | Episodes in phase: 1400
|
| 169 |
+
[08:07:44] Periodic save at step 10,000 ...
|
vec_normalize_latest.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 166596
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00edcbf12ded6ddfd9bff8c0669ffbc0a2e4e0de95378321be3fb17b45f7fcf1
|
| 3 |
size 166596
|