garvitsachdeva commited on
Commit
2c51a53
Β·
verified Β·
1 Parent(s): 67a81e8

Checkpoint at step 15,000

Browse files
reward_curve.json CHANGED
@@ -1 +1 @@
1
- {"episodes": [0, 18, 36, 54, 72, 90, 108, 126, 144, 162, 180, 198, 216, 234, 252, 270, 288, 306, 324, 342, 360, 378, 396, 414, 432, 450, 468, 486, 504, 522, 540, 558, 576, 594, 612, 630, 648, 666, 684, 702, 720, 738, 756, 774, 792, 810, 828, 846, 864, 882, 900, 918, 936, 954, 972, 990, 1008, 1026, 1044, 1062, 1080, 1098, 1116, 1134, 1152, 1170, 1188, 1206, 1224, 1242, 1260, 1278, 1296, 1314, 1332, 1350, 1368, 1386, 1404, 1422, 1440, 1458, 1476, 1494, 1512, 1530, 1548, 1566, 1584, 1602, 1620, 1638, 1656, 1674, 1692, 1710, 1728, 1746, 1764, 1782, 1800, 1818, 1836, 1854, 1872, 1890, 1908, 1926, 1944, 1962, 1980, 1998, 2016, 2034, 2052, 2070, 2088, 2106, 2124, 2142, 2160, 2178, 2196, 2214, 2232, 2250, 2268, 2286, 2304, 2322, 2340, 2358, 2376, 2394, 2412, 2430, 2448, 2466, 2484, 2502, 2520, 2538, 2556, 2574, 2592, 2610, 2628, 2646, 2664, 2682, 2700, 2718, 2736, 2754, 2772, 2790, 2808, 2826, 2844, 2862, 2880, 2898, 2916, 2934, 2952, 2970, 2988, 3006, 3024, 3042, 3060, 3078, 3096, 3114, 3132, 3150, 3168, 3186, 3204, 3222, 3240, 3258, 3276, 3294, 3312, 3330, 3348, 3366, 3384, 3402, 3420, 3438, 3456, 3474, 3492, 3510, 3528, 3546, 3564, 3582, 3600, 3618, 3636], "mean_rewards": [7.869385480880737, 0.35832654429893745, -0.16492195256255768, -0.41051984910260547, -0.4666600593250908, -0.5210831629951577, -0.5492027676365244, -0.5199973956043795, -0.5274246354169887, -0.6168348340923252, -0.5763718811332027, -0.6426244833414535, -0.6204287648958328, -0.57561621586741, -0.5043334544247405, -0.4581533375318971, -0.4431843029406781, -0.4299387016517857, -0.44611268262839415, -0.35823154031391163, -0.35052841665878237, -0.3978075118540981, -0.37302712366946944, -0.4092835626094142, -0.46812825426868687, -0.4861689556062549, -0.5141371829783196, -0.5750634879037574, -0.5776504724194388, -0.5405701848749931, -0.5450267998276503, -0.5152033388942152, -0.49353750375519084, -0.541939804944527, -0.5020202658865806, -0.5006938357274611, -0.4278534385938566, -0.42015426341894563, -0.40840607932717593, -0.327925127531801, -0.390656028597892, -0.4459366598831756, -0.44185569296990124, -0.3529528458244525, -0.3244507045454376, -0.29189814684482723, -0.3297747009239354, -0.24400284398183392, -0.23462226946971246, -0.2303248851938726, -0.1785312213187853, -0.10729554165564067, -0.08863365000298062, -0.044396064285147974, -0.0756548218899867, -0.035530524932175546, 0.0331043691814437, 0.05104965120963343, 0.07906889477437669, 0.049794286489486694, -0.0333541377887621, 0.08013261274362986, 0.02404522315527384, 0.04678324418613217, 0.11633681839557139, 0.09018559919690693, -0.011656419200065372, -0.002868405103192225, 0.043362459781896935, 0.1609243609748044, 0.2448525605927948, 0.25207074520761497, 0.29158232284832625, 0.2784405831339677, 0.2859187344764615, 0.3154568649204141, 0.4207567130829525, 0.41447765322320246, 0.41456545753772933, 0.39034133770370055, 0.4166013927833474, 0.3762397216172418, 0.40843041296945987, 0.478122525546536, 0.4872877030162612, 0.44964427149414526, 0.4389239299970751, 0.4775110198706798, 0.5237314501767057, 0.4781374816148245, 0.49800853094004177, 0.5347077473844555, 0.5750919471502529, 0.5338655400875403, 0.5849348806026443, 0.6334119247884111, 0.6058271814298888, 0.6376417535037355, 0.6136350722939984, 0.6905025819273011, 0.7251743287561911, 0.7001435885776428, 0.7300886690800825, 0.7274164289010914, 0.637157942802942, 0.6127062377386859, 0.647124655713624, 0.6327053736844152, 0.643251203507144, 0.6397692677360233, 0.6305316047139138, 0.6138729520582153, 0.539113004860765, 0.6314716713431363, 0.7084281146475165, 0.7664123414196893, 0.7908425120698227, 0.7909288813288395, 0.7565632448173486, 0.8062136572386537, 0.8095599105683985, 0.910840266210201, 0.9882220188974515, 0.9823392286392506, 0.9880800652294516, 0.9232531002863242, 0.9731405620787972, 1.0696704696570773, 1.137984300649755, 1.1157669601689322, 1.120751329056345, 1.091754916163304, 1.0543542655838423, 1.0874224616000991, 1.1050260064664175, 1.1508460028620262, 1.1218418643156138, 1.1050172184547922, 1.1149552422857096, 1.1210666878256375, 1.1432936652640269, 1.212356471494966, 1.2528533238283062, 1.2775489754442657, 1.2711966389196587, 1.3054996868493138, 1.3098191296026767, 1.303010802235004, 1.3167688116165144, 1.332003332336771, 1.2072641095760597, 1.1659913170611957, 1.1617221327007308, 1.1973635774863127, 1.19029163322565, 1.1377832197819602, 1.1733945103280328, 1.200469083787239, 1.1737295676759638, 1.2198803689903939, 1.3437078056222462, 1.4073380057669276, 1.4434562324044795, 1.3765805392925228, 1.4333498724582758, 1.4617803602397523, 1.4339119075838436, 1.4578246967201787, 1.4940050998651473, 1.4790018203381736, 1.3881733044572584, 1.2966387915005395, 1.2835384612227534, 1.2808164652068537, 1.3310863320807835, 1.3513264275299242, 1.343058421517016, 1.3600143823046034, 1.3582245039876428, 1.3453988715811827, 1.402938225777349, 1.4644667300405902, 1.4406623108476238, 1.3766494840485872, 1.3924313055883561, 1.3975671993896037, 1.433126266708004, 1.4459480587802418, 1.4466661614580796, 1.4509608922628583, 1.5078899698270547, 1.5502726379986649, 1.591167314530729, 1.6082314410052456, 1.5954601630404754, 1.560145628914892, 1.5600862531912523, 1.573801012259427, 1.5904485320283488, 1.5825545474845957, 1.5387725430939878, 1.4717690143324844, 1.4518324316653248], "raw_rewards": [7.869385480880737, 0.6967869400978088, -1.4413889646530151, 0.4599737599492073, -0.6520639061927795, -0.23191767930984497, -1.4565927982330322, -0.8742947578430176, -2.6075824797153473, -1.4442170858383179, -0.16410590708255768, -1.4615715742111206, -0.7505795061588287, 0.3043771982192993, -1.6128928735852242, 0.12662386894226074, 0.1923147737979889, -0.8761289119720459, -1.4533054828643799, -0.27511686831712723, -1.4647120237350464, -1.4666680097579956, 2.151063919067383, -2.312368720769882, -1.248689442873001, -1.7310243248939514, -0.6287155747413635, -1.1987037062644958, -1.0772457867860794, 0.216892309486866, -1.3451157063245773, 0.383902370929718, -2.4344645142555237, -2.121876023709774, -1.5537163019180298, -0.2025727927684784, -2.401705786585808, 0.6549385488033295, -2.55817112326622, 1.871029555797577, -0.02213919162750244, -1.5415691137313843, -2.000951513648033, -0.13388577103614807, 0.17777174711227417, 1.617654800415039, 1.0062615424394608, -1.5448787212371826, -1.5481922626495361, 2.127745598554611, -0.7919640690088272, -0.1357777714729309, -0.35106261074543, -0.9314782917499542, 0.5319417044520378, 0.3243334889411926, 0.09446060657501221, -1.5726486444473267, 0.9032813012599945, 2.5962595641613007, -0.5888408124446869, 1.5929381847381592, -0.09314501285552979, -0.5134999752044678, 0.7780119786038995, -3.4195436611771584, 1.1500522196292877, -1.9147660434246063, 0.12893807888031006, -1.2830363810062408, 1.1141240745782852, 0.34193265438079834, 2.7681850492954254, 1.138201892375946, 0.4452551603317261, 1.2519995421171188, -0.39680755138397217, 0.7410027906298637, 1.9592264890670776, -2.23688642680645, -0.16510164737701416, -1.6138243675231934, -0.8476248979568481, 1.1157943941652775, -1.6202170848846436, 2.166758894920349, -1.5663868188858032, -1.1529327481985092, -0.12494632601737976, 0.9080524891614914, 3.0023685693740845, 2.2008373141288757, 0.9773052334785461, 0.11245410144329071, 0.6097117960453033, -1.6265513896942139, -1.0573668628931046, 2.8179323077201843, 0.7834433019161224, -1.6283231973648071, -1.4633913040161133, -1.1272140443325043, -0.4163784980773926, 3.6425812244415283, 0.4373038485646248, 0.6823182106018066, 1.5891563892364502, -1.5813692808151245, -2.69570055603981, -0.34255164861679077, -0.5784454345703125, 2.345923662185669, 0.44461843371391296, 2.922288239002228, 1.8043333292007446, 2.1249231696128845, 2.92220076918602, 1.250470757484436, -1.804740458726883, 1.0650655627250671, -1.1206167042255402, 2.8100045323371887, 2.840016543865204, 1.209741860628128, 1.085776150226593, -1.6248805522918701, -0.8703119158744812, 0.7544630169868469, 1.263941377401352, 1.5336683094501495, 0.18629544973373413, 1.9573948979377747, 1.1777045745402575, 0.3519379496574402, 2.2451024651527405, 3.212405025959015, 2.4137960374355316, -0.8849076330661774, -1.6204973459243774, -0.4745387136936188, 1.850459337234497, 2.229508101940155, 1.3959138691425323, 2.329979747533798, 3.6350157856941223, -0.3667806386947632, 0.8960558772087097, -0.4405495524406433, 0.9980701804161072, 2.5332993268966675, 1.1129309833049774, 0.6006221175193787, 3.297209322452545, 0.40909240394830704, 0.05018448829650879, 3.6179640889167786, 1.5946876406669617, 3.062887966632843, 2.337017297744751, 2.9371981620788574, 1.5607191920280457, 0.27119821310043335, 1.0661866962909698, -1.5963889360427856, 1.907113641500473, -1.5951862335205078, -0.0360943078994751, 0.7266952320933342, 1.6872042417526245, 0.20690703392028809, 2.647982209920883, 1.1412177383899689, 0.3479418158531189, 1.6862413585186005, 2.9885196685791016, 2.98825940489769, 0.7678718436509371, 1.5259829759597778, 1.1916424036026, -0.06244182586669922, 1.2249691784381866, 2.6612743139266968, 1.537642002105713, -0.17562654614448547, -1.7242172360420227, 3.888312876224518, -0.060656994581222534, 2.1636223196983337, -0.9748473465442657, 2.018460303544998, 3.78766006231308, 2.910611093044281, 2.586593806743622, 0.2345079481601715, -0.8356946706771851, 1.5325765013694763, 1.5288212597370148, 2.1431926488876343, 2.585873991250992, 0.7807489335536957, -0.5991351902484894, 0.8348989896476269, 0.7598983645439148], "step": 10000}
 
1
+ {"episodes": [0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360, 390, 420, 450, 480, 510, 540, 570, 600, 630, 660, 690, 720, 750, 780, 810, 840, 870, 900, 930, 960, 990, 1020, 1050, 1080, 1110, 1140, 1170, 1200, 1230, 1260, 1290, 1320, 1350, 1380, 1410, 1440, 1470, 1500, 1530, 1560, 1590, 1620, 1650, 1680, 1710, 1740, 1770, 1800, 1830, 1860, 1890, 1920, 1950, 1980, 2010, 2040, 2070, 2100, 2130, 2160, 2190, 2220, 2250, 2280, 2310, 2340, 2370, 2400, 2430, 2460, 2490, 2520, 2550, 2580, 2610, 2640, 2670, 2700, 2730, 2760, 2790, 2820, 2850, 2880, 2910, 2940, 2970, 3000, 3030, 3060, 3090, 3120, 3150, 3180, 3210, 3240, 3270, 3300, 3330, 3360, 3390, 3420, 3450, 3480, 3510, 3540, 3570, 3600, 3630, 3660, 3690, 3720, 3750, 3780, 3810, 3840, 3870, 3900, 3930, 3960, 3990, 4020, 4050, 4080, 4110, 4140, 4170, 4200, 4230, 4260, 4290, 4320, 4350, 4380, 4410, 4440, 4470, 4500, 4530, 4560, 4590, 4620, 4650, 4680, 4710, 4740, 4770, 4800, 4830, 4860, 4890, 4920, 4950, 4980, 5010, 5040, 5070, 5100, 5130, 5160, 5190, 5220, 5250, 5280, 5310, 5340, 5370, 5400, 5430, 5460, 5490, 5520, 5550, 5580, 5610, 5640, 5670, 5700, 5730, 5760, 5790, 5820, 5850, 5880, 5910, 5940, 5970, 6000], "mean_rewards": [7.869385480880737, -0.03784379158769884, -0.38398340008542187, -0.5210831629951577, -0.5376839611586953, -0.5634360476859556, -0.5763718811332027, -0.5373458652470208, -0.5397185524229996, -0.4764093054678442, -0.4615140618583144, -0.5137614059286648, -0.4864953896740069, -0.45432056753359584, -0.4838050582557177, -0.4608095791283378, -0.45296298416134045, -0.4769556150426286, -0.4439206124924814, -0.46383931326204975, -0.5280678830547443, -0.5144892747803832, -0.5157356494235874, -0.4909607601900961, -0.42363169100999043, -0.46855141274285633, -0.38041776629621227, -0.3448548663086844, -0.35473247120983353, -0.3207630833906942, -0.23483040785133247, -0.20471441963477838, -0.158047115078677, -0.13889030838964989, -0.07396525167397514, 0.01805690512529864, -0.0565502941423398, 0.002252017573410312, 0.04511761825704417, 0.04892878985612203, 0.017880844519726487, 0.07235489055415653, 0.136860854966899, 0.18752923034538596, 0.18795315243784858, 0.21790850257180286, 0.2798241166527451, 0.29991839786942137, 0.34795478758840076, 0.35555866552479815, 0.470945018639677, 0.46686866794348947, 0.42320820814405646, 0.4334799908564136, 0.47369504578423915, 0.5062765226389485, 0.5452343198395218, 0.5701588542495133, 0.5511569689877566, 0.5913673671022588, 0.6101769053628545, 0.6990016415824695, 0.7033187948030093, 0.6567048065584856, 0.6408570082110371, 0.6604031588664796, 0.6721762023058208, 0.6225126298323324, 0.6809567648467194, 0.7293495094705842, 0.6778633743740421, 0.6709112041037751, 0.7319613369706006, 0.8407363991117388, 0.8891737701153805, 0.8738852649425918, 0.9014333227411008, 1.0337030563943628, 1.0823944018347962, 1.033293042744158, 1.0458343673274535, 1.1098001648381175, 1.095692048926091, 1.07855425682897, 1.161847744206821, 1.1869417882993947, 1.208030396056306, 1.2077794038699878, 1.2062692112113589, 1.25349467685477, 1.235462390262609, 1.2114898104356318, 1.2211218770124659, 1.2378501558478996, 1.2069873789522723, 1.2299577432024658, 1.2741849172848048, 1.2766783515435478, 1.2896925689134475, 1.3053895339943369, 1.371006360144928, 1.4421150034805452, 1.3858411352686733, 1.3896705019873696, 1.3836635586910493, 1.4107440632724801, 1.3759163208120804, 1.378403567067588, 1.369893674923736, 1.3730929282629578, 1.3362735824085872, 1.353932322443938, 1.4317909455518947, 1.474903305566913, 1.4992357752538774, 1.5263299180614138, 1.495371489162674, 1.4704664318717473, 1.524534152993402, 1.538413893641127, 1.5681221992122811, 1.5714308077605157, 1.5370448222584308, 1.5366823457918495, 1.5262220435489018, 1.4881265710542806, 1.5132084826103085, 1.5588943709462066, 1.5223572991698955, 1.5288328876123696, 1.5251142936685456, 1.5322279708898652, 1.4718089767778755, 1.5043148173066319, 1.532659297278968, 1.5412766329642686, 1.554760075595603, 1.5617171381958699, 1.5802682615536734, 1.5707963926178736, 1.5742931580278923, 1.618845480722406, 1.681138369700384, 1.6638504557052491, 1.6770966182345686, 1.63650145752522, 1.624646826235528, 1.6389080214187726, 1.620934071169726, 1.6273822806358431, 1.613626319843429, 1.5480119745938379, 1.5782707857344804, 1.5856853888808533, 1.6018862150357147, 1.6425269661083213, 1.6230434969893033, 1.606394495180898, 1.6375761047233424, 1.6313965761703895, 1.6889079141102705, 1.7454538686861691, 1.7183754010737369, 1.744705453254853, 1.7503461068891613, 1.7638859317706597, 1.8400702719381603, 1.8697715372607322, 1.8458195232380712, 1.922130432215038, 1.8721037799579596, 1.8412629770024815, 1.8464906375091183, 1.8437538819941877, 1.807503633808255, 1.7754378892041494, 1.732152500254409, 1.7182342989005177, 1.7024477059964094, 1.6869846862283635, 1.6826654165518309, 1.6873143049668589, 1.718196635267509, 1.6883231143891928, 1.6928668349194156, 1.7060435906028304, 1.697957367866974, 1.726183009851417, 1.7559919002404611, 1.7728966967794821, 1.768641453597152, 1.7635819557344923, 1.7206717061452468, 1.7514717591439208, 1.7769463395064586, 1.8193774796029343, 1.8263204891869909, 1.7906362776835727, 1.796637931567253, 1.802112196942187, 1.7855615070187418], "raw_rewards": [7.869385480880737, 0.5789080858230591, -0.22971093654632568, -0.23191767930984497, -1.4611610174179077, -1.462523341178894, -0.16410590708255768, 0.9507952705025673, 0.7632522517815232, 0.12662386894226074, 0.6558951139450073, -1.9698248095810413, -1.4647120237350464, -1.4707489013671875, -1.7659942060709, -1.7310243248939514, -0.9493927657604218, -0.7137753665447235, -1.3451157063245773, -0.3434333801269531, -0.1224970817565918, -0.2025727927684784, 0.7919805943965912, -1.5900378823280334, -0.02213919162750244, 0.23868058621883392, 0.609815925359726, 1.617654800415039, -1.3175745010375977, -0.23402023315429688, -0.7919640690088272, 0.3521580994129181, 0.26505059003829956, 0.3243334889411926, 1.7180908620357513, -0.08261801302433014, -0.5888408124446869, 0.6898269057273865, 1.597887396812439, -3.4195436611771584, -1.036895513534546, 1.735375165939331, 1.1141240745782852, 1.0031541883945465, 3.58181095123291, 1.2519995421171188, -0.905962198972702, 2.344604343175888, -0.16510164737701416, -1.6151320934295654, 1.225896954536438, 2.166758894920349, -0.4428107738494873, -1.1809425950050354, 3.0023685693740845, -1.1184721887111664, 0.20370317995548248, -1.6265513896942139, 0.06449981778860092, 0.9455031752586365, -1.4633913040161133, 2.364822745323181, 0.4447557330131531, 0.6823182106018066, -1.8564150631427765, 2.234480082988739, -0.5784454345703125, 1.0068292915821075, 3.9354124665260315, 2.1249231696128845, -1.3315133452415466, 3.106228768825531, -1.1206167042255402, -1.624510645866394, 1.3102963268756866, -1.6248805522918701, -0.7630165815353394, 2.588159680366516, 0.18629544973373413, 1.871050775051117, 1.9939693808555603, 3.212405025959015, 1.0923259556293488, 1.4676193594932556, 1.850459337234497, -1.854575514793396, -0.6370886564254761, -0.3667806386947632, 0.5894009470939636, -1.4472685158252716, 1.1129309833049774, 2.8762494027614594, -0.3795955777168274, 3.6179640889167786, 1.412075400352478, 1.5585983395576477, 1.5607191920280457, 1.1473413854837418, 0.051368117332458496, -1.5951862335205078, 1.1134981513023376, 3.602319121360779, 2.647982209920883, 2.231625735759735, 0.09437358379364014, 2.98825940489769, 2.06882107257843, 3.894202470779419, 1.2249691784381866, 0.6109913885593414, 1.2600931525230408, 3.888312876224518, 2.653698205947876, 3.7928194403648376, 3.78766006231308, 2.497298628091812, 3.244860827922821, 1.5325765013694763, 2.1558673977851868, 1.0946676433086395, -0.5991351902484894, 0.9993070363998413, 1.0076514780521393, -0.9858678132295609, 3.0596781373023987, 0.9859656542539597, -0.6234785914421082, 2.0233528912067413, 3.1986913979053497, 1.2487848103046417, -1.1012918949127197, 1.9054069519042969, 0.5774856805801392, 2.873240500688553, 2.0751985609531403, 0.43692925572395325, 0.44107480347156525, 3.7445985674858093, 3.2738695442676544, 1.6449948251247406, 2.79997581243515, -0.5944533348083496, 0.9953901767730713, 1.9119168519973755, 1.992238163948059, 1.6357325911521912, 1.5611605048179626, 0.9630968570709229, -0.2885543704032898, 2.3379001021385193, 1.0430959463119507, 3.019285798072815, 2.56219819188118, -0.0103532075881958, 3.545325219631195, 1.6280039548873901, 2.413131445646286, 2.3907550573349, 0.48759880661964417, 3.15597265958786, 1.919751524925232, 0.7646693168208003, 1.5510457158088684, 1.2842804789543152, 2.391243815422058, 2.117287963628769, 1.5539557337760925, 0.17066586017608643, 1.6957352757453918, 2.897974669933319, 1.5834221541881561, 1.3663267493247986, 1.5662124156951904, 1.8789651691913605, -0.6412868201732635, 2.6010509729385376, -0.36556345224380493, 1.1735109984874725, 2.2970699667930603, 3.421109616756439, 2.345623791217804, 3.973730683326721, -0.2049075961112976, 0.8851002007722855, 3.5538004636764526, 1.331779658794403, 3.5597912073135376, 2.337675094604492, 2.5098856687545776, 1.7456613183021545, 2.1533316373825073, 2.513371169567108, 1.563796043395996, 0.9926588535308838, 2.324680268764496, 1.5514620542526245, 0.8437174558639526, 0.4093678444623947, 1.5524056553840637, 0.4966275990009308, -1.2454921007156372], "step": 15000}
reward_curve.png CHANGED
spindleflow_model_latest.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f011377570fe13fa9e2851657c415886f83f9ffebfad0f8182cafdd1ba4fd55
3
  size 143819552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74272b5c8f16c62fe53d9f29d8b79657c5dfeb342105db48c5b948fbef00681b
3
  size 143819552
training_log.txt CHANGED
@@ -167,3 +167,101 @@
167
  [08:07:27] Ep 3600 | reward -0.091 | Phase 3/3 | Rolling mean: 1.554 / β€” | Episodes in phase: 1375
168
  [08:07:43] Ep 3625 | reward +0.999 | Phase 3/3 | Rolling mean: 1.488 / β€” | Episodes in phase: 1400
169
  [08:07:44] Periodic save at step 10,000 ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  [08:07:27] Ep 3600 | reward -0.091 | Phase 3/3 | Rolling mean: 1.554 / β€” | Episodes in phase: 1375
168
  [08:07:43] Ep 3625 | reward +0.999 | Phase 3/3 | Rolling mean: 1.488 / β€” | Episodes in phase: 1400
169
  [08:07:44] Periodic save at step 10,000 ...
170
+ [08:07:47] Periodic push done β€” 5 files at step 10,000
171
+ [08:07:48] Ep 3650 | reward +3.507 | Phase 3/3 | Rolling mean: 1.491 / β€” | Episodes in phase: 1425
172
+ [08:07:50] Ep 3675 | reward +1.569 | Phase 3/3 | Rolling mean: 1.523 / β€” | Episodes in phase: 1450
173
+ [08:07:51] Ep 3700 | reward +1.832 | Phase 3/3 | Rolling mean: 1.531 / β€” | Episodes in phase: 1475
174
+ [08:08:05] Ep 3725 | reward +1.000 | Phase 3/3 | Rolling mean: 1.487 / β€” | Episodes in phase: 1500
175
+ [08:08:06] Ep 3750 | reward +0.162 | Phase 3/3 | Rolling mean: 1.503 / β€” | Episodes in phase: 1525
176
+ [08:08:09] Ep 3775 | reward +0.485 | Phase 3/3 | Rolling mean: 1.498 / β€” | Episodes in phase: 1550
177
+ [08:08:11] Ep 3800 | reward +2.840 | Phase 3/3 | Rolling mean: 1.526 / β€” | Episodes in phase: 1575
178
+ [08:08:25] Ep 3825 | reward +0.482 | Phase 3/3 | Rolling mean: 1.608 / β€” | Episodes in phase: 1600
179
+ [08:08:26] Ep 3850 | reward +2.716 | Phase 3/3 | Rolling mean: 1.512 / β€” | Episodes in phase: 1625
180
+ [08:08:28] Ep 3875 | reward +2.714 | Phase 3/3 | Rolling mean: 1.549 / β€” | Episodes in phase: 1650
181
+ [08:08:29] Ep 3900 | reward +1.553 | Phase 3/3 | Rolling mean: 1.577 / β€” | Episodes in phase: 1675
182
+ [08:08:45] Ep 3925 | reward -0.296 | Phase 3/3 | Rolling mean: 1.540 / β€” | Episodes in phase: 1700
183
+ [08:08:46] Ep 3950 | reward +1.561 | Phase 3/3 | Rolling mean: 1.466 / β€” | Episodes in phase: 1725
184
+ [08:08:48] Ep 3975 | reward +4.074 | Phase 3/3 | Rolling mean: 1.487 / β€” | Episodes in phase: 1750
185
+ [08:08:51] Ep 4000 | reward +1.566 | Phase 3/3 | Rolling mean: 1.503 / β€” | Episodes in phase: 1775
186
+ [08:09:06] Ep 4025 | reward +3.457 | Phase 3/3 | Rolling mean: 1.455 / β€” | Episodes in phase: 1800
187
+ [08:09:08] Ep 4050 | reward +2.916 | Phase 3/3 | Rolling mean: 1.559 / β€” | Episodes in phase: 1825
188
+ [08:09:09] Ep 4075 | reward -1.564 | Phase 3/3 | Rolling mean: 1.541 / β€” | Episodes in phase: 1850
189
+ [08:09:10] Ep 4100 | reward +3.629 | Phase 3/3 | Rolling mean: 1.564 / β€” | Episodes in phase: 1875
190
+ [08:09:31] Ep 4125 | reward -0.041 | Phase 3/3 | Rolling mean: 1.616 / β€” | Episodes in phase: 1900
191
+ [08:09:33] Ep 4150 | reward +2.066 | Phase 3/3 | Rolling mean: 1.656 / β€” | Episodes in phase: 1925
192
+ [08:09:34] Ep 4175 | reward +2.140 | Phase 3/3 | Rolling mean: 1.644 / β€” | Episodes in phase: 1950
193
+ [08:09:35] Ep 4200 | reward +3.126 | Phase 3/3 | Rolling mean: 1.636 / β€” | Episodes in phase: 1975
194
+ [08:09:51] Ep 4225 | reward +1.709 | Phase 3/3 | Rolling mean: 1.699 / β€” | Episodes in phase: 2000
195
+ [08:09:54] Ep 4250 | reward -0.055 | Phase 3/3 | Rolling mean: 1.668 / β€” | Episodes in phase: 2025
196
+ [08:09:56] Ep 4275 | reward +0.543 | Phase 3/3 | Rolling mean: 1.624 / β€” | Episodes in phase: 2050
197
+ [08:09:57] Ep 4300 | reward +2.236 | Phase 3/3 | Rolling mean: 1.643 / β€” | Episodes in phase: 2075
198
+ [08:10:12] Ep 4325 | reward +0.196 | Phase 3/3 | Rolling mean: 1.628 / β€” | Episodes in phase: 2100
199
+ [08:10:13] Ep 4350 | reward +1.938 | Phase 3/3 | Rolling mean: 1.650 / β€” | Episodes in phase: 2125
200
+ [08:10:14] Ep 4375 | reward +1.430 | Phase 3/3 | Rolling mean: 1.648 / β€” | Episodes in phase: 2150
201
+ [08:10:16] Ep 4400 | reward +1.555 | Phase 3/3 | Rolling mean: 1.652 / β€” | Episodes in phase: 2175
202
+ [08:10:32] Ep 4425 | reward +2.752 | Phase 3/3 | Rolling mean: 1.617 / β€” | Episodes in phase: 2200
203
+ [08:10:34] Ep 4450 | reward +2.292 | Phase 3/3 | Rolling mean: 1.567 / β€” | Episodes in phase: 2225
204
+ [08:10:38] Ep 4475 | reward -0.754 | Phase 3/3 | Rolling mean: 1.611 / β€” | Episodes in phase: 2250
205
+ [08:10:39] Ep 4500 | reward +2.128 | Phase 3/3 | Rolling mean: 1.563 / β€” | Episodes in phase: 2275
206
+ [08:10:55] Ep 4525 | reward +0.792 | Phase 3/3 | Rolling mean: 1.522 / β€” | Episodes in phase: 2300
207
+ [08:10:57] Ep 4550 | reward +2.848 | Phase 3/3 | Rolling mean: 1.561 / β€” | Episodes in phase: 2325
208
+ [08:10:58] Ep 4575 | reward +1.605 | Phase 3/3 | Rolling mean: 1.600 / β€” | Episodes in phase: 2350
209
+ [08:10:59] Ep 4600 | reward +3.546 | Phase 3/3 | Rolling mean: 1.579 / β€” | Episodes in phase: 2375
210
+ [08:11:16] Ep 4625 | reward +0.029 | Phase 3/3 | Rolling mean: 1.570 / β€” | Episodes in phase: 2400
211
+ [08:11:18] Ep 4650 | reward +2.394 | Phase 3/3 | Rolling mean: 1.676 / β€” | Episodes in phase: 2425
212
+ [08:11:19] Ep 4675 | reward +2.261 | Phase 3/3 | Rolling mean: 1.612 / β€” | Episodes in phase: 2450
213
+ [08:11:21] Ep 4700 | reward +0.827 | Phase 3/3 | Rolling mean: 1.650 / β€” | Episodes in phase: 2475
214
+ [08:11:39] Ep 4725 | reward +3.394 | Phase 3/3 | Rolling mean: 1.728 / β€” | Episodes in phase: 2500
215
+ [08:11:40] Ep 4750 | reward +1.881 | Phase 3/3 | Rolling mean: 1.688 / β€” | Episodes in phase: 2525
216
+ [08:11:42] Ep 4775 | reward +2.768 | Phase 3/3 | Rolling mean: 1.665 / β€” | Episodes in phase: 2550
217
+ [08:11:43] Ep 4800 | reward +2.454 | Phase 3/3 | Rolling mean: 1.714 / β€” | Episodes in phase: 2575
218
+ [08:12:01] Ep 4825 | reward +2.767 | Phase 3/3 | Rolling mean: 1.745 / β€” | Episodes in phase: 2600
219
+ [08:12:02] Ep 4850 | reward -0.418 | Phase 3/3 | Rolling mean: 1.665 / β€” | Episodes in phase: 2625
220
+ [08:12:04] Ep 4875 | reward +1.551 | Phase 3/3 | Rolling mean: 1.779 / β€” | Episodes in phase: 2650
221
+ [08:12:06] Ep 4900 | reward +3.347 | Phase 3/3 | Rolling mean: 1.793 / β€” | Episodes in phase: 2675
222
+ [08:12:22] Ep 4925 | reward +1.148 | Phase 3/3 | Rolling mean: 1.783 / β€” | Episodes in phase: 2700
223
+ [08:12:23] Ep 4950 | reward +2.564 | Phase 3/3 | Rolling mean: 1.854 / β€” | Episodes in phase: 2725
224
+ [08:12:27] Ep 4975 | reward +1.749 | Phase 3/3 | Rolling mean: 1.908 / β€” | Episodes in phase: 2750
225
+ [08:12:28] Ep 5000 | reward +3.049 | Phase 3/3 | Rolling mean: 1.911 / β€” | Episodes in phase: 2775
226
+ [08:12:44] Ep 5025 | reward +3.009 | Phase 3/3 | Rolling mean: 1.889 / β€” | Episodes in phase: 2800
227
+ [08:12:45] Ep 5050 | reward +2.506 | Phase 3/3 | Rolling mean: 1.943 / β€” | Episodes in phase: 2825
228
+ [08:12:47] Ep 5075 | reward +1.562 | Phase 3/3 | Rolling mean: 1.932 / β€” | Episodes in phase: 2850
229
+ [08:12:48] Ep 5100 | reward +1.220 | Phase 3/3 | Rolling mean: 1.894 / β€” | Episodes in phase: 2875
230
+ [08:13:06] Ep 5125 | reward +2.966 | Phase 3/3 | Rolling mean: 1.891 / β€” | Episodes in phase: 2900
231
+ [08:13:07] Ep 5150 | reward +2.490 | Phase 3/3 | Rolling mean: 1.806 / β€” | Episodes in phase: 2925
232
+ [08:13:08] Ep 5175 | reward +3.160 | Phase 3/3 | Rolling mean: 1.779 / β€” | Episodes in phase: 2950
233
+ [08:13:11] Ep 5200 | reward +2.764 | Phase 3/3 | Rolling mean: 1.741 / β€” | Episodes in phase: 2975
234
+ [08:13:28] Ep 5225 | reward -1.548 | Phase 3/3 | Rolling mean: 1.686 / β€” | Episodes in phase: 3000
235
+ [08:13:29] Ep 5250 | reward +3.422 | Phase 3/3 | Rolling mean: 1.688 / β€” | Episodes in phase: 3025
236
+ [08:13:30] Ep 5275 | reward +1.713 | Phase 3/3 | Rolling mean: 1.656 / β€” | Episodes in phase: 3050
237
+ [08:13:32] Ep 5300 | reward +1.742 | Phase 3/3 | Rolling mean: 1.694 / β€” | Episodes in phase: 3075
238
+ [08:13:46] Ep 5325 | reward +1.142 | Phase 3/3 | Rolling mean: 1.640 / β€” | Episodes in phase: 3100
239
+ [08:13:48] Ep 5350 | reward +0.820 | Phase 3/3 | Rolling mean: 1.695 / β€” | Episodes in phase: 3125
240
+ [08:13:49] Ep 5375 | reward +1.741 | Phase 3/3 | Rolling mean: 1.693 / β€” | Episodes in phase: 3150
241
+ [08:13:50] Ep 5400 | reward +0.858 | Phase 3/3 | Rolling mean: 1.678 / β€” | Episodes in phase: 3175
242
+ [08:14:06] Ep 5425 | reward +0.858 | Phase 3/3 | Rolling mean: 1.710 / β€” | Episodes in phase: 3200
243
+ [08:14:09] Ep 5450 | reward +0.983 | Phase 3/3 | Rolling mean: 1.734 / β€” | Episodes in phase: 3225
244
+ [08:14:11] Ep 5475 | reward +0.324 | Phase 3/3 | Rolling mean: 1.743 / β€” | Episodes in phase: 3250
245
+ [08:14:12] Ep 5500 | reward +0.666 | Phase 3/3 | Rolling mean: 1.652 / β€” | Episodes in phase: 3275
246
+ [08:14:28] Ep 5525 | reward +0.981 | Phase 3/3 | Rolling mean: 1.719 / β€” | Episodes in phase: 3300
247
+ [08:14:29] Ep 5550 | reward +2.169 | Phase 3/3 | Rolling mean: 1.706 / β€” | Episodes in phase: 3325
248
+ [08:14:30] Ep 5575 | reward +0.982 | Phase 3/3 | Rolling mean: 1.679 / β€” | Episodes in phase: 3350
249
+ [08:14:32] Ep 5600 | reward +1.002 | Phase 3/3 | Rolling mean: 1.733 / β€” | Episodes in phase: 3375
250
+ [08:14:47] Ep 5625 | reward +3.085 | Phase 3/3 | Rolling mean: 1.797 / β€” | Episodes in phase: 3400
251
+ [08:14:48] Ep 5650 | reward +0.114 | Phase 3/3 | Rolling mean: 1.749 / β€” | Episodes in phase: 3425
252
+ [08:14:49] Ep 5675 | reward +2.483 | Phase 3/3 | Rolling mean: 1.784 / β€” | Episodes in phase: 3450
253
+ [08:14:53] Ep 5700 | reward +0.842 | Phase 3/3 | Rolling mean: 1.832 / β€” | Episodes in phase: 3475
254
+ [08:15:09] Ep 5725 | reward -1.542 | Phase 3/3 | Rolling mean: 1.815 / β€” | Episodes in phase: 3500
255
+ [08:15:11] Ep 5750 | reward +1.748 | Phase 3/3 | Rolling mean: 1.753 / β€” | Episodes in phase: 3525
256
+ [08:15:12] Ep 5775 | reward +1.872 | Phase 3/3 | Rolling mean: 1.796 / β€” | Episodes in phase: 3550
257
+ [08:15:13] Ep 5800 | reward +2.027 | Phase 3/3 | Rolling mean: 1.785 / β€” | Episodes in phase: 3575
258
+ [08:15:28] Ep 5825 | reward +2.915 | Phase 3/3 | Rolling mean: 1.741 / β€” | Episodes in phase: 3600
259
+ [08:15:30] Ep 5850 | reward +3.336 | Phase 3/3 | Rolling mean: 1.799 / β€” | Episodes in phase: 3625
260
+ [08:15:31] Ep 5875 | reward +0.619 | Phase 3/3 | Rolling mean: 1.748 / β€” | Episodes in phase: 3650
261
+ [08:15:32] Ep 5900 | reward +0.004 | Phase 3/3 | Rolling mean: 1.753 / β€” | Episodes in phase: 3675
262
+ [08:15:47] Ep 5925 | reward +2.854 | Phase 3/3 | Rolling mean: 1.796 / β€” | Episodes in phase: 3700
263
+ [08:15:49] Ep 5950 | reward +2.325 | Phase 3/3 | Rolling mean: 1.873 / β€” | Episodes in phase: 3725
264
+ [08:15:52] Ep 5975 | reward +2.218 | Phase 3/3 | Rolling mean: 1.895 / β€” | Episodes in phase: 3750
265
+ [08:15:53] Ep 6000 | reward +3.170 | Phase 3/3 | Rolling mean: 1.835 / β€” | Episodes in phase: 3775
266
+ [08:16:08] Ep 6025 | reward +0.068 | Phase 3/3 | Rolling mean: 1.834 / β€” | Episodes in phase: 3800
267
+ [08:16:08] Periodic save at step 15,000 ...
vec_normalize_latest.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00edcbf12ded6ddfd9bff8c0669ffbc0a2e4e0de95378321be3fb17b45f7fcf1
3
  size 166596
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0641d426a429ea3f6c247160b1575b38235caa910f82f5fc207532589b97759f
3
  size 166596