garvitsachdeva commited on
Commit
4599064
Β·
verified Β·
1 Parent(s): 2c51a53

Checkpoint at step 20,000

Browse files
reward_curve.json CHANGED
@@ -1 +1 @@
1
- {"episodes": [0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360, 390, 420, 450, 480, 510, 540, 570, 600, 630, 660, 690, 720, 750, 780, 810, 840, 870, 900, 930, 960, 990, 1020, 1050, 1080, 1110, 1140, 1170, 1200, 1230, 1260, 1290, 1320, 1350, 1380, 1410, 1440, 1470, 1500, 1530, 1560, 1590, 1620, 1650, 1680, 1710, 1740, 1770, 1800, 1830, 1860, 1890, 1920, 1950, 1980, 2010, 2040, 2070, 2100, 2130, 2160, 2190, 2220, 2250, 2280, 2310, 2340, 2370, 2400, 2430, 2460, 2490, 2520, 2550, 2580, 2610, 2640, 2670, 2700, 2730, 2760, 2790, 2820, 2850, 2880, 2910, 2940, 2970, 3000, 3030, 3060, 3090, 3120, 3150, 3180, 3210, 3240, 3270, 3300, 3330, 3360, 3390, 3420, 3450, 3480, 3510, 3540, 3570, 3600, 3630, 3660, 3690, 3720, 3750, 3780, 3810, 3840, 3870, 3900, 3930, 3960, 3990, 4020, 4050, 4080, 4110, 4140, 4170, 4200, 4230, 4260, 4290, 4320, 4350, 4380, 4410, 4440, 4470, 4500, 4530, 4560, 4590, 4620, 4650, 4680, 4710, 4740, 4770, 4800, 4830, 4860, 4890, 4920, 4950, 4980, 5010, 5040, 5070, 5100, 5130, 5160, 5190, 5220, 5250, 5280, 5310, 5340, 5370, 5400, 5430, 5460, 5490, 5520, 5550, 5580, 5610, 5640, 5670, 5700, 5730, 5760, 5790, 5820, 5850, 5880, 5910, 5940, 5970, 6000], "mean_rewards": [7.869385480880737, -0.03784379158769884, -0.38398340008542187, -0.5210831629951577, -0.5376839611586953, -0.5634360476859556, -0.5763718811332027, -0.5373458652470208, -0.5397185524229996, -0.4764093054678442, -0.4615140618583144, -0.5137614059286648, -0.4864953896740069, -0.45432056753359584, -0.4838050582557177, -0.4608095791283378, -0.45296298416134045, -0.4769556150426286, -0.4439206124924814, -0.46383931326204975, -0.5280678830547443, -0.5144892747803832, -0.5157356494235874, -0.4909607601900961, -0.42363169100999043, -0.46855141274285633, -0.38041776629621227, -0.3448548663086844, -0.35473247120983353, -0.3207630833906942, -0.23483040785133247, -0.20471441963477838, -0.158047115078677, -0.13889030838964989, -0.07396525167397514, 0.01805690512529864, -0.0565502941423398, 0.002252017573410312, 0.04511761825704417, 0.04892878985612203, 0.017880844519726487, 0.07235489055415653, 0.136860854966899, 0.18752923034538596, 0.18795315243784858, 0.21790850257180286, 0.2798241166527451, 0.29991839786942137, 0.34795478758840076, 0.35555866552479815, 0.470945018639677, 0.46686866794348947, 0.42320820814405646, 0.4334799908564136, 0.47369504578423915, 0.5062765226389485, 0.5452343198395218, 0.5701588542495133, 0.5511569689877566, 0.5913673671022588, 0.6101769053628545, 0.6990016415824695, 0.7033187948030093, 0.6567048065584856, 0.6408570082110371, 0.6604031588664796, 0.6721762023058208, 0.6225126298323324, 0.6809567648467194, 0.7293495094705842, 0.6778633743740421, 0.6709112041037751, 0.7319613369706006, 0.8407363991117388, 0.8891737701153805, 0.8738852649425918, 0.9014333227411008, 1.0337030563943628, 1.0823944018347962, 1.033293042744158, 1.0458343673274535, 1.1098001648381175, 1.095692048926091, 1.07855425682897, 1.161847744206821, 1.1869417882993947, 1.208030396056306, 1.2077794038699878, 1.2062692112113589, 1.25349467685477, 1.235462390262609, 1.2114898104356318, 1.2211218770124659, 1.2378501558478996, 1.2069873789522723, 1.2299577432024658, 1.2741849172848048, 1.2766783515435478, 1.2896925689134475, 1.3053895339943369, 1.371006360144928, 1.4421150034805452, 1.3858411352686733, 1.3896705019873696, 1.3836635586910493, 1.4107440632724801, 1.3759163208120804, 1.378403567067588, 1.369893674923736, 1.3730929282629578, 1.3362735824085872, 1.353932322443938, 1.4317909455518947, 1.474903305566913, 1.4992357752538774, 1.5263299180614138, 1.495371489162674, 1.4704664318717473, 1.524534152993402, 1.538413893641127, 1.5681221992122811, 1.5714308077605157, 1.5370448222584308, 1.5366823457918495, 1.5262220435489018, 1.4881265710542806, 1.5132084826103085, 1.5588943709462066, 1.5223572991698955, 1.5288328876123696, 1.5251142936685456, 1.5322279708898652, 1.4718089767778755, 1.5043148173066319, 1.532659297278968, 1.5412766329642686, 1.554760075595603, 1.5617171381958699, 1.5802682615536734, 1.5707963926178736, 1.5742931580278923, 1.618845480722406, 1.681138369700384, 1.6638504557052491, 1.6770966182345686, 1.63650145752522, 1.624646826235528, 1.6389080214187726, 1.620934071169726, 1.6273822806358431, 1.613626319843429, 1.5480119745938379, 1.5782707857344804, 1.5856853888808533, 1.6018862150357147, 1.6425269661083213, 1.6230434969893033, 1.606394495180898, 1.6375761047233424, 1.6313965761703895, 1.6889079141102705, 1.7454538686861691, 1.7183754010737369, 1.744705453254853, 1.7503461068891613, 1.7638859317706597, 1.8400702719381603, 1.8697715372607322, 1.8458195232380712, 1.922130432215038, 1.8721037799579596, 1.8412629770024815, 1.8464906375091183, 1.8437538819941877, 1.807503633808255, 1.7754378892041494, 1.732152500254409, 1.7182342989005177, 1.7024477059964094, 1.6869846862283635, 1.6826654165518309, 1.6873143049668589, 1.718196635267509, 1.6883231143891928, 1.6928668349194156, 1.7060435906028304, 1.697957367866974, 1.726183009851417, 1.7559919002404611, 1.7728966967794821, 1.768641453597152, 1.7635819557344923, 1.7206717061452468, 1.7514717591439208, 1.7769463395064586, 1.8193774796029343, 1.8263204891869909, 1.7906362776835727, 1.796637931567253, 1.802112196942187, 1.7855615070187418], "raw_rewards": [7.869385480880737, 0.5789080858230591, -0.22971093654632568, -0.23191767930984497, -1.4611610174179077, -1.462523341178894, -0.16410590708255768, 0.9507952705025673, 0.7632522517815232, 0.12662386894226074, 0.6558951139450073, -1.9698248095810413, -1.4647120237350464, -1.4707489013671875, -1.7659942060709, -1.7310243248939514, -0.9493927657604218, -0.7137753665447235, -1.3451157063245773, -0.3434333801269531, -0.1224970817565918, -0.2025727927684784, 0.7919805943965912, -1.5900378823280334, -0.02213919162750244, 0.23868058621883392, 0.609815925359726, 1.617654800415039, -1.3175745010375977, -0.23402023315429688, -0.7919640690088272, 0.3521580994129181, 0.26505059003829956, 0.3243334889411926, 1.7180908620357513, -0.08261801302433014, -0.5888408124446869, 0.6898269057273865, 1.597887396812439, -3.4195436611771584, -1.036895513534546, 1.735375165939331, 1.1141240745782852, 1.0031541883945465, 3.58181095123291, 1.2519995421171188, -0.905962198972702, 2.344604343175888, -0.16510164737701416, -1.6151320934295654, 1.225896954536438, 2.166758894920349, -0.4428107738494873, -1.1809425950050354, 3.0023685693740845, -1.1184721887111664, 0.20370317995548248, -1.6265513896942139, 0.06449981778860092, 0.9455031752586365, -1.4633913040161133, 2.364822745323181, 0.4447557330131531, 0.6823182106018066, -1.8564150631427765, 2.234480082988739, -0.5784454345703125, 1.0068292915821075, 3.9354124665260315, 2.1249231696128845, -1.3315133452415466, 3.106228768825531, -1.1206167042255402, -1.624510645866394, 1.3102963268756866, -1.6248805522918701, -0.7630165815353394, 2.588159680366516, 0.18629544973373413, 1.871050775051117, 1.9939693808555603, 3.212405025959015, 1.0923259556293488, 1.4676193594932556, 1.850459337234497, -1.854575514793396, -0.6370886564254761, -0.3667806386947632, 0.5894009470939636, -1.4472685158252716, 1.1129309833049774, 2.8762494027614594, -0.3795955777168274, 3.6179640889167786, 1.412075400352478, 1.5585983395576477, 1.5607191920280457, 1.1473413854837418, 0.051368117332458496, -1.5951862335205078, 1.1134981513023376, 3.602319121360779, 2.647982209920883, 2.231625735759735, 0.09437358379364014, 2.98825940489769, 2.06882107257843, 3.894202470779419, 1.2249691784381866, 0.6109913885593414, 1.2600931525230408, 3.888312876224518, 2.653698205947876, 3.7928194403648376, 3.78766006231308, 2.497298628091812, 3.244860827922821, 1.5325765013694763, 2.1558673977851868, 1.0946676433086395, -0.5991351902484894, 0.9993070363998413, 1.0076514780521393, -0.9858678132295609, 3.0596781373023987, 0.9859656542539597, -0.6234785914421082, 2.0233528912067413, 3.1986913979053497, 1.2487848103046417, -1.1012918949127197, 1.9054069519042969, 0.5774856805801392, 2.873240500688553, 2.0751985609531403, 0.43692925572395325, 0.44107480347156525, 3.7445985674858093, 3.2738695442676544, 1.6449948251247406, 2.79997581243515, -0.5944533348083496, 0.9953901767730713, 1.9119168519973755, 1.992238163948059, 1.6357325911521912, 1.5611605048179626, 0.9630968570709229, -0.2885543704032898, 2.3379001021385193, 1.0430959463119507, 3.019285798072815, 2.56219819188118, -0.0103532075881958, 3.545325219631195, 1.6280039548873901, 2.413131445646286, 2.3907550573349, 0.48759880661964417, 3.15597265958786, 1.919751524925232, 0.7646693168208003, 1.5510457158088684, 1.2842804789543152, 2.391243815422058, 2.117287963628769, 1.5539557337760925, 0.17066586017608643, 1.6957352757453918, 2.897974669933319, 1.5834221541881561, 1.3663267493247986, 1.5662124156951904, 1.8789651691913605, -0.6412868201732635, 2.6010509729385376, -0.36556345224380493, 1.1735109984874725, 2.2970699667930603, 3.421109616756439, 2.345623791217804, 3.973730683326721, -0.2049075961112976, 0.8851002007722855, 3.5538004636764526, 1.331779658794403, 3.5597912073135376, 2.337675094604492, 2.5098856687545776, 1.7456613183021545, 2.1533316373825073, 2.513371169567108, 1.563796043395996, 0.9926588535308838, 2.324680268764496, 1.5514620542526245, 0.8437174558639526, 0.4093678444623947, 1.5524056553840637, 0.4966275990009308, -1.2454921007156372], "step": 15000}
 
1
+ {"episodes": [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630, 672, 714, 756, 798, 840, 882, 924, 966, 1008, 1050, 1092, 1134, 1176, 1218, 1260, 1302, 1344, 1386, 1428, 1470, 1512, 1554, 1596, 1638, 1680, 1722, 1764, 1806, 1848, 1890, 1932, 1974, 2016, 2058, 2100, 2142, 2184, 2226, 2268, 2310, 2352, 2394, 2436, 2478, 2520, 2562, 2604, 2646, 2688, 2730, 2772, 2814, 2856, 2898, 2940, 2982, 3024, 3066, 3108, 3150, 3192, 3234, 3276, 3318, 3360, 3402, 3444, 3486, 3528, 3570, 3612, 3654, 3696, 3738, 3780, 3822, 3864, 3906, 3948, 3990, 4032, 4074, 4116, 4158, 4200, 4242, 4284, 4326, 4368, 4410, 4452, 4494, 4536, 4578, 4620, 4662, 4704, 4746, 4788, 4830, 4872, 4914, 4956, 4998, 5040, 5082, 5124, 5166, 5208, 5250, 5292, 5334, 5376, 5418, 5460, 5502, 5544, 5586, 5628, 5670, 5712, 5754, 5796, 5838, 5880, 5922, 5964, 6006, 6048, 6090, 6132, 6174, 6216, 6258, 6300, 6342, 6384, 6426, 6468, 6510, 6552, 6594, 6636, 6678, 6720, 6762, 6804, 6846, 6888, 6930, 6972, 7014, 7056, 7098, 7140, 7182, 7224, 7266, 7308, 7350, 7392, 7434, 7476, 7518, 7560, 7602, 7644, 7686, 7728, 7770, 7812, 7854, 7896, 7938, 7980, 8022, 8064, 8106, 8148, 8190, 8232, 8274, 8316, 8358, 8400, 8442], "mean_rewards": [7.869385480880737, -0.2949352840524773, -0.4952996876748169, -0.5199973956043795, -0.6174052359106449, -0.5373458652470208, -0.49625358010896226, -0.47238873628804745, -0.4706542715678981, -0.47499917030069083, -0.5006424231640552, -0.5145500502043914, -0.5115612921467543, -0.4771832878161046, -0.4433126796612446, -0.4632618394482635, -0.456389642276643, -0.447543691553808, -0.4684159628484609, -0.4349985238832404, -0.3944579961507599, -0.33797204329737657, -0.2693970410179628, -0.24447371565104234, -0.16345096840347462, -0.11572722794119057, -0.09881766136946543, -0.07444735454782, -0.012611925175835221, 0.022194254436126014, 0.10274628089863877, 0.14136686710913157, 0.1848921677655593, 0.20126053468403318, 0.23584040265556705, 0.2791989072498356, 0.3556912602101154, 0.3711379658791043, 0.41904636950425384, 0.4871723190354148, 0.49190893542309255, 0.5197475980214976, 0.5344401113332012, 0.6119090946877753, 0.6077940996537503, 0.6080971452850137, 0.585951357499481, 0.671064635268127, 0.6593267258980425, 0.6873161014875634, 0.7041500217637118, 0.715485646066537, 0.8034420476211587, 0.7868460612098029, 0.818281575807974, 0.9098733552803441, 0.9931412801042653, 0.9620735797011291, 1.0410858473792617, 1.047098892554579, 1.1130033807529678, 1.1595609719266955, 1.1603956472084083, 1.189735481143492, 1.2016556340301046, 1.171680602527285, 1.1892861477088377, 1.2292941701027371, 1.2535419590674552, 1.3050417096731868, 1.2991100276143834, 1.3035879157426469, 1.3206478618678443, 1.3221849669693566, 1.3032227260201186, 1.3922748728918861, 1.3904742702253092, 1.4075497126268257, 1.3999544971508309, 1.3585951193719048, 1.401334112760488, 1.4241763773255647, 1.4504369786470461, 1.4591521133695078, 1.4979405961359658, 1.5108637377347376, 1.4940121352655005, 1.5021781849527713, 1.5182731466857664, 1.5629961201185272, 1.515429445260143, 1.5572113326897863, 1.5093122213048737, 1.5162708755763235, 1.4925893841122984, 1.5068567290713786, 1.5223539526421106, 1.5419064695666267, 1.561460198238459, 1.570351040876253, 1.570216936346216, 1.5761766723768438, 1.6068325873735065, 1.613772955745072, 1.6552447412857119, 1.6462390915587333, 1.6372316681285122, 1.6073656982873457, 1.592204071781504, 1.6016886271615802, 1.6301611374807043, 1.6145569434889557, 1.6107396750306024, 1.6238551464572462, 1.6357820498482878, 1.6619270062760305, 1.6894643379106677, 1.7275706677727665, 1.785425909732541, 1.7937826996195274, 1.8181767136666853, 1.8143030544116936, 1.8381750764328555, 1.8158522040225162, 1.8260731318941392, 1.7974370324934497, 1.7981947634833424, 1.7741890611028999, 1.7521160172363348, 1.712481968780817, 1.7148109309071617, 1.6759713221748276, 1.6941063414843431, 1.7073139056482656, 1.7217200488049234, 1.7622721078142551, 1.7518380036230807, 1.735415602409976, 1.7477551485732514, 1.7851004104705457, 1.7679061309746378, 1.7976991302610532, 1.8257586488759425, 1.8042287188559472, 1.777249003771259, 1.7483236596297775, 1.7362275231376805, 1.7692375490285759, 1.7370202693765127, 1.7360723117135592, 1.768777926935451, 1.7647074289960176, 1.7475759104234487, 1.7903672372657002, 1.8103490486842106, 1.8558027431794952, 1.9250150367657144, 1.9203967902093317, 1.9439025071849343, 1.938004134038907, 1.9217347231532123, 1.9146441257001947, 1.9125079813831758, 1.8749578054648393, 1.825682910269605, 1.7965219531802434, 1.7844053784496248, 1.7467980947755146, 1.7836011220095076, 1.7822839737896432, 1.7661089524238223, 1.786190782425039, 1.8014363352104172, 1.820912442409092, 1.822568523602693, 1.8153345679073816, 1.822242390221223, 1.8705398875596453, 1.8258761658026004, 1.8366791110157412, 1.8185347392055466, 1.8701389919316291, 1.8588750906604923, 1.8892388550459214, 1.9062272127155426, 1.9422058453827322, 1.9403631834111195, 1.9122930205721442, 1.894687196589075, 1.9080439570144627, 1.8851423501893703, 1.8484355723748933, 1.84887556860856, 1.8620412626725464, 1.8754797673186745, 1.8643548319002017, 1.856698218289376, 1.9023429482652507, 1.9486914368913035, 1.9762877621797774, 2.0026946232307106, 2.019449805252184], "raw_rewards": [7.869385480880737, 0.30212289094924927, -0.1653035283088684, -0.8742947578430176, -1.4332927465438843, 0.9507952705025673, -1.6128928735852242, -1.4756847620010376, 0.12714916467666626, -1.4666680097579956, -1.7659942060709, 0.0034030526876449585, -1.0772457867860794, -0.939087450504303, -0.09268271923065186, -0.2025727927684784, 1.2409679293632507, -1.549471378326416, -2.000951513648033, 1.5722772777080536, -1.3175745010375977, 2.127745598554611, 3.3618789315223694, -1.929141715168953, 0.09446060657501221, -0.08261801302433014, 0.2618691325187683, -0.5134999752044678, -2.073127508163452, 0.27165186405181885, 1.1141240745782852, 0.22413843870162964, 0.6378590166568756, 0.7410027906298637, -1.7340275347232819, -1.6151320934295654, -1.6202170848846436, 2.1107622385025024, 1.6147027611732483, 2.2008373141288757, 0.20370317995548248, 0.1037561446428299, 0.7834433019161224, 3.9312584400177, 1.9446720480918884, 0.6823182106018066, -0.11399078369140625, 4.084675073623657, 0.44461843371391296, 1.9997044205665588, -1.3315133452415466, 1.0650655627250671, 1.8860607743263245, -0.960464209318161, -0.8703119158744812, 2.588159680366516, 2.276316821575165, 0.3519379496574402, 3.8637195825576782, 0.9018396735191345, 1.850459337234497, 3.7698827385902405, 2.1691126227378845, -0.4405495524406433, -0.8176892399787903, 2.8762494027614594, 0.05018448829650879, -1.7433454543352127, 3.2934361696243286, 0.27119821310043335, 0.051368117332458496, 3.860495090484619, 1.6872042417526245, -1.595702052116394, 0.2707443833351135, 2.98825940489769, 2.3140229284763336, -0.6839565932750702, 1.537642002105713, 2.6002658009529114, 2.653698205947876, 2.018460303544998, 2.1577287912368774, 1.5650858879089355, 1.5288212597370148, 1.0946676433086395, 0.005232870578765869, -0.5721810460090637, 2.591393381357193, 3.504615902900696, -0.6234785914421082, 1.2751400172710419, 2.967794418334961, 0.6752512454986572, 0.929683268070221, 2.873240500688553, 1.3088274002075195, -1.563977837562561, 1.0894655883312225, 1.6831098198890686, 2.79997581243515, 0.9091663900762796, 1.039250373840332, -0.2278135120868683, 2.2370247542858124, 0.9630968570709229, 2.1286914348602295, 1.1031403839588165, 2.2346373796463013, 0.984773188829422, 3.545325219631195, 0.7524199932813644, 1.964707374572754, 0.95658740401268, 1.6900435984134674, 0.7646693168208003, 0.784693107008934, 2.9248103499412537, 2.764322817325592, 2.37806236743927, 1.6957352757453918, 2.5313061475753784, 2.9660938680171967, 2.546180844306946, 1.4264174401760101, 2.6010509729385376, 3.131833851337433, 2.490403026342392, 2.2305434346199036, 0.45485207438468933, -0.2049075961112976, 3.9700870513916016, 2.6260368824005127, 1.745518147945404, 2.2572388648986816, 1.7456613183021545, 2.950973927974701, 1.3027393817901611, 1.7355260252952576, 2.3226271867752075, 0.8437174558639526, 2.2532759606838226, 1.7347663044929504, -0.0050980448722839355, 1.169892281293869, 0.5850276499986649, 1.1715667843818665, 0.593328595161438, 1.353189766407013, 0.7872850801795721, -1.6005812138319016, -1.0769227743148804, 2.288356304168701, 3.108502149581909, 0.614934578537941, 0.6843967437744141, 2.204077661037445, -0.0004414021968841553, 3.388222098350525, 2.3059264421463013, 2.2062636613845825, -0.07324928045272827, 1.3386163115501404, 1.6970834732055664, 2.4391518235206604, 0.4920702278614044, 0.9900135844945908, 2.2905810475349426, 0.4571004608296789, 1.2120925784111023, 2.885680675506592, 1.2613102197647095, 0.6581690907478333, 3.0120148062705994, 2.863319218158722, 2.0909981727600098, 1.5308949947357178, 1.694356083869934, 1.9248302578926086, 2.037512093782425, 0.16763997077941895, 2.313068985939026, 0.20004019141197205, 2.676566481590271, 1.5560699701309204, 2.6811817288398743, 2.6683337092399597, 0.7397091090679169, 0.6628532111644745, 3.093649685382843, 3.0954139828681946, 1.6091610789299011, -0.8415878489613533, 1.2594474256038666, 3.1203470826148987, 1.0595019459724426, 0.6205319166183472, 2.963775932788849, 0.620332658290863, 2.686708927154541, 0.3639736622571945, 2.836131751537323], "step": 20000}
reward_curve.png CHANGED
spindleflow_model_latest.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74272b5c8f16c62fe53d9f29d8b79657c5dfeb342105db48c5b948fbef00681b
3
- size 143819552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aec515b0112638963c9c37fec3156be35df48402601b9a342c59b24557ae238
3
+ size 143819553
training_log.txt CHANGED
@@ -265,3 +265,103 @@
265
  [08:15:53] Ep 6000 | reward +3.170 | Phase 3/3 | Rolling mean: 1.835 / β€” | Episodes in phase: 3775
266
  [08:16:08] Ep 6025 | reward +0.068 | Phase 3/3 | Rolling mean: 1.834 / β€” | Episodes in phase: 3800
267
  [08:16:08] Periodic save at step 15,000 ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  [08:15:53] Ep 6000 | reward +3.170 | Phase 3/3 | Rolling mean: 1.835 / β€” | Episodes in phase: 3775
266
  [08:16:08] Ep 6025 | reward +0.068 | Phase 3/3 | Rolling mean: 1.834 / β€” | Episodes in phase: 3800
267
  [08:16:08] Periodic save at step 15,000 ...
268
+ [08:16:11] Periodic push done β€” 5 files at step 15,000
269
+ [08:16:12] Ep 6050 | reward +1.294 | Phase 3/3 | Rolling mean: 1.760 / β€” | Episodes in phase: 3825
270
+ [08:16:13] Ep 6075 | reward -0.963 | Phase 3/3 | Rolling mean: 1.749 / β€” | Episodes in phase: 3850
271
+ [08:16:15] Ep 6100 | reward +1.004 | Phase 3/3 | Rolling mean: 1.748 / β€” | Episodes in phase: 3875
272
+ [08:16:30] Ep 6125 | reward +1.414 | Phase 3/3 | Rolling mean: 1.696 / β€” | Episodes in phase: 3900
273
+ [08:16:32] Ep 6150 | reward +1.373 | Phase 3/3 | Rolling mean: 1.669 / β€” | Episodes in phase: 3925
274
+ [08:16:33] Ep 6175 | reward +0.593 | Phase 3/3 | Rolling mean: 1.626 / β€” | Episodes in phase: 3950
275
+ [08:16:34] Ep 6200 | reward +2.699 | Phase 3/3 | Rolling mean: 1.643 / β€” | Episodes in phase: 3975
276
+ [08:16:54] Ep 6225 | reward +2.586 | Phase 3/3 | Rolling mean: 1.619 / β€” | Episodes in phase: 4000
277
+ [08:16:55] Ep 6250 | reward +1.268 | Phase 3/3 | Rolling mean: 1.669 / β€” | Episodes in phase: 4025
278
+ [08:16:57] Ep 6275 | reward +3.113 | Phase 3/3 | Rolling mean: 1.792 / β€” | Episodes in phase: 4050
279
+ [08:16:58] Ep 6300 | reward +2.942 | Phase 3/3 | Rolling mean: 1.832 / β€” | Episodes in phase: 4075
280
+ [08:17:16] Ep 6325 | reward +2.941 | Phase 3/3 | Rolling mean: 1.865 / β€” | Episodes in phase: 4100
281
+ [08:17:17] Ep 6350 | reward +1.657 | Phase 3/3 | Rolling mean: 1.826 / β€” | Episodes in phase: 4125
282
+ [08:17:19] Ep 6375 | reward +1.085 | Phase 3/3 | Rolling mean: 1.824 / β€” | Episodes in phase: 4150
283
+ [08:17:20] Ep 6400 | reward +0.326 | Phase 3/3 | Rolling mean: 1.899 / β€” | Episodes in phase: 4175
284
+ [08:17:34] Ep 6425 | reward -0.289 | Phase 3/3 | Rolling mean: 1.936 / β€” | Episodes in phase: 4200
285
+ [08:17:35] Ep 6450 | reward +0.661 | Phase 3/3 | Rolling mean: 1.959 / β€” | Episodes in phase: 4225
286
+ [08:17:39] Ep 6475 | reward +3.628 | Phase 3/3 | Rolling mean: 1.923 / β€” | Episodes in phase: 4250
287
+ [08:17:41] Ep 6500 | reward +0.684 | Phase 3/3 | Rolling mean: 1.907 / β€” | Episodes in phase: 4275
288
+ [08:17:58] Ep 6525 | reward +2.853 | Phase 3/3 | Rolling mean: 1.924 / β€” | Episodes in phase: 4300
289
+ [08:17:59] Ep 6550 | reward +3.239 | Phase 3/3 | Rolling mean: 1.995 / β€” | Episodes in phase: 4325
290
+ [08:18:01] Ep 6575 | reward -1.630 | Phase 3/3 | Rolling mean: 2.006 / β€” | Episodes in phase: 4350
291
+ [08:18:02] Ep 6600 | reward +2.893 | Phase 3/3 | Rolling mean: 1.982 / β€” | Episodes in phase: 4375
292
+ [08:18:20] Ep 6625 | reward +1.059 | Phase 3/3 | Rolling mean: 1.976 / β€” | Episodes in phase: 4400
293
+ [08:18:21] Ep 6650 | reward +3.015 | Phase 3/3 | Rolling mean: 1.982 / β€” | Episodes in phase: 4425
294
+ [08:18:22] Ep 6675 | reward +1.059 | Phase 3/3 | Rolling mean: 1.907 / β€” | Episodes in phase: 4450
295
+ [08:18:23] Ep 6700 | reward +0.000 | Phase 3/3 | Rolling mean: 1.970 / β€” | Episodes in phase: 4475
296
+ [08:18:41] Ep 6725 | reward +2.527 | Phase 3/3 | Rolling mean: 1.933 / β€” | Episodes in phase: 4500
297
+ [08:18:43] Ep 6750 | reward +2.207 | Phase 3/3 | Rolling mean: 1.882 / β€” | Episodes in phase: 4525
298
+ [08:18:44] Ep 6775 | reward +0.991 | Phase 3/3 | Rolling mean: 1.876 / β€” | Episodes in phase: 4550
299
+ [08:18:45] Ep 6800 | reward +1.038 | Phase 3/3 | Rolling mean: 1.838 / β€” | Episodes in phase: 4575
300
+ [08:19:00] Ep 6825 | reward +1.530 | Phase 3/3 | Rolling mean: 1.793 / β€” | Episodes in phase: 4600
301
+ [08:19:01] Ep 6850 | reward -0.560 | Phase 3/3 | Rolling mean: 1.732 / β€” | Episodes in phase: 4625
302
+ [08:19:02] Ep 6875 | reward +2.855 | Phase 3/3 | Rolling mean: 1.745 / β€” | Episodes in phase: 4650
303
+ [08:19:04] Ep 6900 | reward +2.213 | Phase 3/3 | Rolling mean: 1.663 / β€” | Episodes in phase: 4675
304
+ [08:19:18] Ep 6925 | reward +2.214 | Phase 3/3 | Rolling mean: 1.676 / β€” | Episodes in phase: 4700
305
+ [08:19:20] Ep 6950 | reward +0.990 | Phase 3/3 | Rolling mean: 1.641 / β€” | Episodes in phase: 4725
306
+ [08:19:23] Ep 6975 | reward +0.355 | Phase 3/3 | Rolling mean: 1.665 / β€” | Episodes in phase: 4750
307
+ [08:19:24] Ep 7000 | reward +2.411 | Phase 3/3 | Rolling mean: 1.658 / β€” | Episodes in phase: 4775
308
+ [08:19:39] Ep 7025 | reward +0.505 | Phase 3/3 | Rolling mean: 1.714 / β€” | Episodes in phase: 4800
309
+ [08:19:40] Ep 7050 | reward +2.409 | Phase 3/3 | Rolling mean: 1.794 / β€” | Episodes in phase: 4825
310
+ [08:19:41] Ep 7075 | reward +2.886 | Phase 3/3 | Rolling mean: 1.792 / β€” | Episodes in phase: 4850
311
+ [08:19:43] Ep 7100 | reward +2.091 | Phase 3/3 | Rolling mean: 1.843 / β€” | Episodes in phase: 4875
312
+ [08:19:59] Ep 7125 | reward +0.874 | Phase 3/3 | Rolling mean: 1.878 / β€” | Episodes in phase: 4900
313
+ [08:20:00] Ep 7150 | reward +2.551 | Phase 3/3 | Rolling mean: 1.926 / β€” | Episodes in phase: 4925
314
+ [08:20:01] Ep 7175 | reward +2.947 | Phase 3/3 | Rolling mean: 1.910 / β€” | Episodes in phase: 4950
315
+ [08:20:02] Ep 7200 | reward +1.366 | Phase 3/3 | Rolling mean: 1.940 / β€” | Episodes in phase: 4975
316
+ [08:20:20] Ep 7225 | reward +0.658 | Phase 3/3 | Rolling mean: 1.939 / β€” | Episodes in phase: 5000
317
+ [08:20:21] Ep 7250 | reward +0.482 | Phase 3/3 | Rolling mean: 1.899 / β€” | Episodes in phase: 5025
318
+ [08:20:22] Ep 7275 | reward +1.392 | Phase 3/3 | Rolling mean: 1.878 / β€” | Episodes in phase: 5050
319
+ [08:20:24] Ep 7300 | reward +0.991 | Phase 3/3 | Rolling mean: 1.802 / β€” | Episodes in phase: 5075
320
+ [08:20:40] Ep 7325 | reward +1.144 | Phase 3/3 | Rolling mean: 1.774 / β€” | Episodes in phase: 5100
321
+ [08:20:41] Ep 7350 | reward +1.261 | Phase 3/3 | Rolling mean: 1.749 / β€” | Episodes in phase: 5125
322
+ [08:20:42] Ep 7375 | reward +2.112 | Phase 3/3 | Rolling mean: 1.724 / β€” | Episodes in phase: 5150
323
+ [08:20:43] Ep 7400 | reward +2.036 | Phase 3/3 | Rolling mean: 1.751 / β€” | Episodes in phase: 5175
324
+ [08:20:59] Ep 7425 | reward +2.308 | Phase 3/3 | Rolling mean: 1.775 / β€” | Episodes in phase: 5200
325
+ [08:21:02] Ep 7450 | reward +1.764 | Phase 3/3 | Rolling mean: 1.743 / β€” | Episodes in phase: 5225
326
+ [08:21:03] Ep 7475 | reward +1.355 | Phase 3/3 | Rolling mean: 1.790 / β€” | Episodes in phase: 5250
327
+ [08:21:04] Ep 7500 | reward +1.752 | Phase 3/3 | Rolling mean: 1.847 / β€” | Episodes in phase: 5275
328
+ [08:21:20] Ep 7525 | reward +1.202 | Phase 3/3 | Rolling mean: 1.866 / β€” | Episodes in phase: 5300
329
+ [08:21:21] Ep 7550 | reward +1.265 | Phase 3/3 | Rolling mean: 1.907 / β€” | Episodes in phase: 5325
330
+ [08:21:22] Ep 7575 | reward +2.410 | Phase 3/3 | Rolling mean: 1.918 / β€” | Episodes in phase: 5350
331
+ [08:21:24] Ep 7600 | reward +2.552 | Phase 3/3 | Rolling mean: 1.963 / β€” | Episodes in phase: 5375
332
+ [08:21:37] Ep 7625 | reward +3.110 | Phase 3/3 | Rolling mean: 1.961 / β€” | Episodes in phase: 5400
333
+ [08:21:38] Ep 7650 | reward +0.200 | Phase 3/3 | Rolling mean: 1.953 / β€” | Episodes in phase: 5425
334
+ [08:21:40] Ep 7675 | reward +2.411 | Phase 3/3 | Rolling mean: 1.982 / β€” | Episodes in phase: 5450
335
+ [08:21:41] Ep 7700 | reward +1.059 | Phase 3/3 | Rolling mean: 1.966 / β€” | Episodes in phase: 5475
336
+ [08:21:57] Ep 7725 | reward +3.306 | Phase 3/3 | Rolling mean: 1.974 / β€” | Episodes in phase: 5500
337
+ [08:21:58] Ep 7750 | reward +2.317 | Phase 3/3 | Rolling mean: 1.970 / β€” | Episodes in phase: 5525
338
+ [08:22:00] Ep 7775 | reward +2.925 | Phase 3/3 | Rolling mean: 2.017 / β€” | Episodes in phase: 5550
339
+ [08:22:01] Ep 7800 | reward +2.278 | Phase 3/3 | Rolling mean: 1.931 / β€” | Episodes in phase: 5575
340
+ [08:22:20] Ep 7825 | reward +3.443 | Phase 3/3 | Rolling mean: 1.886 / β€” | Episodes in phase: 5600
341
+ [08:22:21] Ep 7850 | reward +3.094 | Phase 3/3 | Rolling mean: 1.889 / β€” | Episodes in phase: 5625
342
+ [08:22:22] Ep 7875 | reward +2.675 | Phase 3/3 | Rolling mean: 1.833 / β€” | Episodes in phase: 5650
343
+ [08:22:23] Ep 7900 | reward +2.489 | Phase 3/3 | Rolling mean: 1.845 / β€” | Episodes in phase: 5675
344
+ [08:22:37] Ep 7925 | reward +2.072 | Phase 3/3 | Rolling mean: 1.816 / β€” | Episodes in phase: 5700
345
+ [08:22:39] Ep 7950 | reward +0.401 | Phase 3/3 | Rolling mean: 1.803 / β€” | Episodes in phase: 5725
346
+ [08:22:42] Ep 7975 | reward +0.619 | Phase 3/3 | Rolling mean: 1.770 / β€” | Episodes in phase: 5750
347
+ [08:22:43] Ep 8000 | reward +2.239 | Phase 3/3 | Rolling mean: 1.808 / β€” | Episodes in phase: 5775
348
+ [08:22:59] Ep 8025 | reward +1.391 | Phase 3/3 | Rolling mean: 1.785 / β€” | Episodes in phase: 5800
349
+ [08:23:00] Ep 8050 | reward +2.563 | Phase 3/3 | Rolling mean: 1.821 / β€” | Episodes in phase: 5825
350
+ [08:23:02] Ep 8075 | reward -0.242 | Phase 3/3 | Rolling mean: 1.838 / β€” | Episodes in phase: 5850
351
+ [08:23:03] Ep 8100 | reward +1.609 | Phase 3/3 | Rolling mean: 1.892 / β€” | Episodes in phase: 5875
352
+ [08:23:18] Ep 8125 | reward +1.544 | Phase 3/3 | Rolling mean: 1.920 / β€” | Episodes in phase: 5900
353
+ [08:23:20] Ep 8150 | reward +2.318 | Phase 3/3 | Rolling mean: 1.953 / β€” | Episodes in phase: 5925
354
+ [08:23:21] Ep 8175 | reward +3.053 | Phase 3/3 | Rolling mean: 1.952 / β€” | Episodes in phase: 5950
355
+ [08:23:22] Ep 8200 | reward +0.620 | Phase 3/3 | Rolling mean: 1.922 / β€” | Episodes in phase: 5975
356
+ [08:23:38] Ep 8225 | reward +2.690 | Phase 3/3 | Rolling mean: 1.960 / β€” | Episodes in phase: 6000
357
+ [08:23:40] Ep 8250 | reward +1.656 | Phase 3/3 | Rolling mean: 1.986 / β€” | Episodes in phase: 6025
358
+ [08:23:41] Ep 8275 | reward +2.964 | Phase 3/3 | Rolling mean: 1.998 / β€” | Episodes in phase: 6050
359
+ [08:23:42] Ep 8300 | reward +3.911 | Phase 3/3 | Rolling mean: 1.953 / β€” | Episodes in phase: 6075
360
+ [08:23:57] Ep 8325 | reward +1.108 | Phase 3/3 | Rolling mean: 1.979 / β€” | Episodes in phase: 6100
361
+ [08:23:58] Ep 8350 | reward +3.940 | Phase 3/3 | Rolling mean: 2.029 / β€” | Episodes in phase: 6125
362
+ [08:24:00] Ep 8375 | reward +2.965 | Phase 3/3 | Rolling mean: 2.016 / β€” | Episodes in phase: 6150
363
+ [08:24:01] Ep 8400 | reward +2.909 | Phase 3/3 | Rolling mean: 2.081 / β€” | Episodes in phase: 6175
364
+ [08:24:16] Ep 8425 | reward -0.087 | Phase 3/3 | Rolling mean: 2.094 / β€” | Episodes in phase: 6200
365
+ [08:24:17] Ep 8450 | reward +1.111 | Phase 3/3 | Rolling mean: 2.066 / β€” | Episodes in phase: 6225
366
+ [08:24:20] Ep 8475 | reward +3.093 | Phase 3/3 | Rolling mean: 2.109 / β€” | Episodes in phase: 6250
367
+ [08:24:20] Periodic save at step 20,000 ...
vec_normalize_latest.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0641d426a429ea3f6c247160b1575b38235caa910f82f5fc207532589b97759f
3
  size 166596
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74da382d70b483573ff4ffbe3b76bb4cfd839b08d630dbc2b8a60df92c0c7f71
3
  size 166596