garvitsachdeva commited on
Commit
9c7843a
Β·
verified Β·
1 Parent(s): 4599064

Checkpoint at step 25,000

Browse files
reward_curve.json CHANGED
@@ -1 +1 @@
1
- {"episodes": [0, 42, 84, 126, 168, 210, 252, 294, 336, 378, 420, 462, 504, 546, 588, 630, 672, 714, 756, 798, 840, 882, 924, 966, 1008, 1050, 1092, 1134, 1176, 1218, 1260, 1302, 1344, 1386, 1428, 1470, 1512, 1554, 1596, 1638, 1680, 1722, 1764, 1806, 1848, 1890, 1932, 1974, 2016, 2058, 2100, 2142, 2184, 2226, 2268, 2310, 2352, 2394, 2436, 2478, 2520, 2562, 2604, 2646, 2688, 2730, 2772, 2814, 2856, 2898, 2940, 2982, 3024, 3066, 3108, 3150, 3192, 3234, 3276, 3318, 3360, 3402, 3444, 3486, 3528, 3570, 3612, 3654, 3696, 3738, 3780, 3822, 3864, 3906, 3948, 3990, 4032, 4074, 4116, 4158, 4200, 4242, 4284, 4326, 4368, 4410, 4452, 4494, 4536, 4578, 4620, 4662, 4704, 4746, 4788, 4830, 4872, 4914, 4956, 4998, 5040, 5082, 5124, 5166, 5208, 5250, 5292, 5334, 5376, 5418, 5460, 5502, 5544, 5586, 5628, 5670, 5712, 5754, 5796, 5838, 5880, 5922, 5964, 6006, 6048, 6090, 6132, 6174, 6216, 6258, 6300, 6342, 6384, 6426, 6468, 6510, 6552, 6594, 6636, 6678, 6720, 6762, 6804, 6846, 6888, 6930, 6972, 7014, 7056, 7098, 7140, 7182, 7224, 7266, 7308, 7350, 7392, 7434, 7476, 7518, 7560, 7602, 7644, 7686, 7728, 7770, 7812, 7854, 7896, 7938, 7980, 8022, 8064, 8106, 8148, 8190, 8232, 8274, 8316, 8358, 8400, 8442], "mean_rewards": [7.869385480880737, -0.2949352840524773, -0.4952996876748169, -0.5199973956043795, -0.6174052359106449, -0.5373458652470208, -0.49625358010896226, -0.47238873628804745, -0.4706542715678981, -0.47499917030069083, -0.5006424231640552, -0.5145500502043914, -0.5115612921467543, -0.4771832878161046, -0.4433126796612446, -0.4632618394482635, -0.456389642276643, -0.447543691553808, -0.4684159628484609, -0.4349985238832404, -0.3944579961507599, -0.33797204329737657, -0.2693970410179628, -0.24447371565104234, -0.16345096840347462, -0.11572722794119057, -0.09881766136946543, -0.07444735454782, -0.012611925175835221, 0.022194254436126014, 0.10274628089863877, 0.14136686710913157, 0.1848921677655593, 0.20126053468403318, 0.23584040265556705, 0.2791989072498356, 0.3556912602101154, 0.3711379658791043, 0.41904636950425384, 0.4871723190354148, 0.49190893542309255, 0.5197475980214976, 0.5344401113332012, 0.6119090946877753, 0.6077940996537503, 0.6080971452850137, 0.585951357499481, 0.671064635268127, 0.6593267258980425, 0.6873161014875634, 0.7041500217637118, 0.715485646066537, 0.8034420476211587, 0.7868460612098029, 0.818281575807974, 0.9098733552803441, 0.9931412801042653, 0.9620735797011291, 1.0410858473792617, 1.047098892554579, 1.1130033807529678, 1.1595609719266955, 1.1603956472084083, 1.189735481143492, 1.2016556340301046, 1.171680602527285, 1.1892861477088377, 1.2292941701027371, 1.2535419590674552, 1.3050417096731868, 1.2991100276143834, 1.3035879157426469, 1.3206478618678443, 1.3221849669693566, 1.3032227260201186, 1.3922748728918861, 1.3904742702253092, 1.4075497126268257, 1.3999544971508309, 1.3585951193719048, 1.401334112760488, 1.4241763773255647, 1.4504369786470461, 1.4591521133695078, 1.4979405961359658, 1.5108637377347376, 1.4940121352655005, 1.5021781849527713, 1.5182731466857664, 1.5629961201185272, 1.515429445260143, 1.5572113326897863, 1.5093122213048737, 1.5162708755763235, 1.4925893841122984, 1.5068567290713786, 1.5223539526421106, 1.5419064695666267, 1.561460198238459, 1.570351040876253, 1.570216936346216, 1.5761766723768438, 1.6068325873735065, 1.613772955745072, 1.6552447412857119, 1.6462390915587333, 1.6372316681285122, 1.6073656982873457, 1.592204071781504, 1.6016886271615802, 1.6301611374807043, 1.6145569434889557, 1.6107396750306024, 1.6238551464572462, 1.6357820498482878, 1.6619270062760305, 1.6894643379106677, 1.7275706677727665, 1.785425909732541, 1.7937826996195274, 1.8181767136666853, 1.8143030544116936, 1.8381750764328555, 1.8158522040225162, 1.8260731318941392, 1.7974370324934497, 1.7981947634833424, 1.7741890611028999, 1.7521160172363348, 1.712481968780817, 1.7148109309071617, 1.6759713221748276, 1.6941063414843431, 1.7073139056482656, 1.7217200488049234, 1.7622721078142551, 1.7518380036230807, 1.735415602409976, 1.7477551485732514, 1.7851004104705457, 1.7679061309746378, 1.7976991302610532, 1.8257586488759425, 1.8042287188559472, 1.777249003771259, 1.7483236596297775, 1.7362275231376805, 1.7692375490285759, 1.7370202693765127, 1.7360723117135592, 1.768777926935451, 1.7647074289960176, 1.7475759104234487, 1.7903672372657002, 1.8103490486842106, 1.8558027431794952, 1.9250150367657144, 1.9203967902093317, 1.9439025071849343, 1.938004134038907, 1.9217347231532123, 1.9146441257001947, 1.9125079813831758, 1.8749578054648393, 1.825682910269605, 1.7965219531802434, 1.7844053784496248, 1.7467980947755146, 1.7836011220095076, 1.7822839737896432, 1.7661089524238223, 1.786190782425039, 1.8014363352104172, 1.820912442409092, 1.822568523602693, 1.8153345679073816, 1.822242390221223, 1.8705398875596453, 1.8258761658026004, 1.8366791110157412, 1.8185347392055466, 1.8701389919316291, 1.8588750906604923, 1.8892388550459214, 1.9062272127155426, 1.9422058453827322, 1.9403631834111195, 1.9122930205721442, 1.894687196589075, 1.9080439570144627, 1.8851423501893703, 1.8484355723748933, 1.84887556860856, 1.8620412626725464, 1.8754797673186745, 1.8643548319002017, 1.856698218289376, 1.9023429482652507, 1.9486914368913035, 1.9762877621797774, 2.0026946232307106, 2.019449805252184], "raw_rewards": [7.869385480880737, 0.30212289094924927, -0.1653035283088684, -0.8742947578430176, -1.4332927465438843, 0.9507952705025673, -1.6128928735852242, -1.4756847620010376, 0.12714916467666626, -1.4666680097579956, -1.7659942060709, 0.0034030526876449585, -1.0772457867860794, -0.939087450504303, -0.09268271923065186, -0.2025727927684784, 1.2409679293632507, -1.549471378326416, -2.000951513648033, 1.5722772777080536, -1.3175745010375977, 2.127745598554611, 3.3618789315223694, -1.929141715168953, 0.09446060657501221, -0.08261801302433014, 0.2618691325187683, -0.5134999752044678, -2.073127508163452, 0.27165186405181885, 1.1141240745782852, 0.22413843870162964, 0.6378590166568756, 0.7410027906298637, -1.7340275347232819, -1.6151320934295654, -1.6202170848846436, 2.1107622385025024, 1.6147027611732483, 2.2008373141288757, 0.20370317995548248, 0.1037561446428299, 0.7834433019161224, 3.9312584400177, 1.9446720480918884, 0.6823182106018066, -0.11399078369140625, 4.084675073623657, 0.44461843371391296, 1.9997044205665588, -1.3315133452415466, 1.0650655627250671, 1.8860607743263245, -0.960464209318161, -0.8703119158744812, 2.588159680366516, 2.276316821575165, 0.3519379496574402, 3.8637195825576782, 0.9018396735191345, 1.850459337234497, 3.7698827385902405, 2.1691126227378845, -0.4405495524406433, -0.8176892399787903, 2.8762494027614594, 0.05018448829650879, -1.7433454543352127, 3.2934361696243286, 0.27119821310043335, 0.051368117332458496, 3.860495090484619, 1.6872042417526245, -1.595702052116394, 0.2707443833351135, 2.98825940489769, 2.3140229284763336, -0.6839565932750702, 1.537642002105713, 2.6002658009529114, 2.653698205947876, 2.018460303544998, 2.1577287912368774, 1.5650858879089355, 1.5288212597370148, 1.0946676433086395, 0.005232870578765869, -0.5721810460090637, 2.591393381357193, 3.504615902900696, -0.6234785914421082, 1.2751400172710419, 2.967794418334961, 0.6752512454986572, 0.929683268070221, 2.873240500688553, 1.3088274002075195, -1.563977837562561, 1.0894655883312225, 1.6831098198890686, 2.79997581243515, 0.9091663900762796, 1.039250373840332, -0.2278135120868683, 2.2370247542858124, 0.9630968570709229, 2.1286914348602295, 1.1031403839588165, 2.2346373796463013, 0.984773188829422, 3.545325219631195, 0.7524199932813644, 1.964707374572754, 0.95658740401268, 1.6900435984134674, 0.7646693168208003, 0.784693107008934, 2.9248103499412537, 2.764322817325592, 2.37806236743927, 1.6957352757453918, 2.5313061475753784, 2.9660938680171967, 2.546180844306946, 1.4264174401760101, 2.6010509729385376, 3.131833851337433, 2.490403026342392, 2.2305434346199036, 0.45485207438468933, -0.2049075961112976, 3.9700870513916016, 2.6260368824005127, 1.745518147945404, 2.2572388648986816, 1.7456613183021545, 2.950973927974701, 1.3027393817901611, 1.7355260252952576, 2.3226271867752075, 0.8437174558639526, 2.2532759606838226, 1.7347663044929504, -0.0050980448722839355, 1.169892281293869, 0.5850276499986649, 1.1715667843818665, 0.593328595161438, 1.353189766407013, 0.7872850801795721, -1.6005812138319016, -1.0769227743148804, 2.288356304168701, 3.108502149581909, 0.614934578537941, 0.6843967437744141, 2.204077661037445, -0.0004414021968841553, 3.388222098350525, 2.3059264421463013, 2.2062636613845825, -0.07324928045272827, 1.3386163115501404, 1.6970834732055664, 2.4391518235206604, 0.4920702278614044, 0.9900135844945908, 2.2905810475349426, 0.4571004608296789, 1.2120925784111023, 2.885680675506592, 1.2613102197647095, 0.6581690907478333, 3.0120148062705994, 2.863319218158722, 2.0909981727600098, 1.5308949947357178, 1.694356083869934, 1.9248302578926086, 2.037512093782425, 0.16763997077941895, 2.313068985939026, 0.20004019141197205, 2.676566481590271, 1.5560699701309204, 2.6811817288398743, 2.6683337092399597, 0.7397091090679169, 0.6628532111644745, 3.093649685382843, 3.0954139828681946, 1.6091610789299011, -0.8415878489613533, 1.2594474256038666, 3.1203470826148987, 1.0595019459724426, 0.6205319166183472, 2.963775932788849, 0.620332658290863, 2.686708927154541, 0.3639736622571945, 2.836131751537323], "step": 20000}
 
1
+ {"episodes": [0, 54, 108, 162, 216, 270, 324, 378, 432, 486, 540, 594, 648, 702, 756, 810, 864, 918, 972, 1026, 1080, 1134, 1188, 1242, 1296, 1350, 1404, 1458, 1512, 1566, 1620, 1674, 1728, 1782, 1836, 1890, 1944, 1998, 2052, 2106, 2160, 2214, 2268, 2322, 2376, 2430, 2484, 2538, 2592, 2646, 2700, 2754, 2808, 2862, 2916, 2970, 3024, 3078, 3132, 3186, 3240, 3294, 3348, 3402, 3456, 3510, 3564, 3618, 3672, 3726, 3780, 3834, 3888, 3942, 3996, 4050, 4104, 4158, 4212, 4266, 4320, 4374, 4428, 4482, 4536, 4590, 4644, 4698, 4752, 4806, 4860, 4914, 4968, 5022, 5076, 5130, 5184, 5238, 5292, 5346, 5400, 5454, 5508, 5562, 5616, 5670, 5724, 5778, 5832, 5886, 5940, 5994, 6048, 6102, 6156, 6210, 6264, 6318, 6372, 6426, 6480, 6534, 6588, 6642, 6696, 6750, 6804, 6858, 6912, 6966, 7020, 7074, 7128, 7182, 7236, 7290, 7344, 7398, 7452, 7506, 7560, 7614, 7668, 7722, 7776, 7830, 7884, 7938, 7992, 8046, 8100, 8154, 8208, 8262, 8316, 8370, 8424, 8478, 8532, 8586, 8640, 8694, 8748, 8802, 8856, 8910, 8964, 9018, 9072, 9126, 9180, 9234, 9288, 9342, 9396, 9450, 9504, 9558, 9612, 9666, 9720, 9774, 9828, 9882, 9936, 9990, 10044, 10098, 10152, 10206, 10260, 10314, 10368, 10422, 10476, 10530, 10584, 10638, 10692, 10746, 10800, 10854, 10908], "mean_rewards": [7.869385480880737, -0.41051984910260547, -0.5492027676365244, -0.6168348340923252, -0.5351173416861603, -0.4764093054678442, -0.47253693533918034, -0.47499917030069083, -0.4891404308417412, -0.5025442564148586, -0.48840301800794667, -0.508802940635636, -0.46954920547538487, -0.42533945731213657, -0.4368745926138775, -0.4193249794919669, -0.4092346332987675, -0.353060252322088, -0.2940995312962056, -0.21927801998549679, -0.20089613068780868, -0.12506907433271408, -0.10114177332402471, -0.008871172632967685, 0.09518664632584002, 0.1262925854581578, 0.16667680813332708, 0.22568784479262566, 0.28477957568396506, 0.296171207164489, 0.3757471558196049, 0.42450997700199256, 0.4808117801295491, 0.5391666152812779, 0.5689808041716409, 0.5712222996449104, 0.5904879566519113, 0.6232858557695095, 0.6342131112949467, 0.6917316244849697, 0.7243372341715663, 0.7537346597256963, 0.789060527155507, 0.8554964856626531, 0.8602999568525496, 0.9521009895790326, 1.009716379080926, 1.051387386374726, 1.1155794020388206, 1.150166751366795, 1.1501598492774119, 1.1837558287689138, 1.213108567848341, 1.2131437046706581, 1.276654483436906, 1.3081771806341318, 1.3261926365713759, 1.2855303687261024, 1.3094462854481799, 1.3246912506211848, 1.3649099320547151, 1.3636714587647496, 1.4078118026861581, 1.4299958327098539, 1.448524320732639, 1.4219702863606902, 1.4664546345262006, 1.4783013143209591, 1.5082530105022551, 1.498775199950455, 1.4922605522485097, 1.5474505744811595, 1.5467187698928802, 1.5193020349385873, 1.5015674076244034, 1.5476209773602607, 1.5425808865028472, 1.5614539348704308, 1.5752295390951334, 1.601019313977224, 1.5956984089713384, 1.5783536112206618, 1.605170402980289, 1.609818503597577, 1.6114819045325042, 1.611166421031901, 1.626127223142819, 1.6224109640191466, 1.62811375268794, 1.6364221707344038, 1.6479085205436252, 1.68300795165462, 1.7199315566838804, 1.7624231781204547, 1.8120945872093999, 1.7900560836224428, 1.8056307126220696, 1.796357822860676, 1.794105003174606, 1.7866614152400977, 1.7825549535948721, 1.7799180618051378, 1.7159151644033828, 1.707240282961322, 1.7067030376529944, 1.7414429096118835, 1.7244301065825942, 1.7394534384313016, 1.7501649395389574, 1.768823225281977, 1.7936280370897266, 1.7753073548622782, 1.7985860085296523, 1.775856651936601, 1.761148449725007, 1.7298969913180418, 1.7499384344927467, 1.7930582820463181, 1.7775360784647882, 1.7903102684193166, 1.8150328708382464, 1.8257946563767191, 1.8465963355418513, 1.8954826608643507, 1.9210255016546431, 1.9286256749034234, 1.9029758592466757, 1.8584531546866923, 1.8594947501572603, 1.8344094237201358, 1.79941314168616, 1.8079466098409875, 1.8152153570509608, 1.8054590580533947, 1.7969712433690948, 1.7882612819430108, 1.7704781028353924, 1.8000406818682146, 1.8211428545306847, 1.8399917912533732, 1.8737868167589338, 1.8774200505343348, 1.8721188200453092, 1.8767408794616383, 1.889315270822169, 1.8973016155200473, 1.9045764394563345, 1.9062862954065067, 1.8882204007109233, 1.885903197714556, 1.8929937056258974, 1.8889157436584143, 1.8742003176619646, 1.8987999117736072, 1.8982257889345777, 1.9233697905769422, 1.9702272060323833, 1.992930272779339, 1.9961104948722832, 2.028669857688547, 1.9892342082619068, 1.9922661232102634, 2.014080071243981, 2.003148631695794, 1.9819643528737727, 1.9620282770742927, 1.9306926963329112, 1.911854676284528, 1.9251795592454404, 1.9032559321120885, 1.940680162529937, 1.9206769538671118, 1.8939569022338225, 1.9223596293718832, 1.9247678831291053, 1.9413379225448395, 1.9664991747436398, 1.9917000998382746, 1.9840289586591762, 1.965516634339595, 1.9864252154801343, 1.9808506768425562, 2.002361669342974, 2.0258850431815967, 2.0045235502224465, 2.0037248894380215, 1.9576798827025401, 1.9127588423060757, 1.9126606692596557, 1.9263554555385196, 1.9400756921152666, 1.9400606237476807, 1.9311223548206493, 1.913246230365892, 1.954121733201586, 1.9481463303932254, 1.9877335088026717, 2.0078176827846352, 2.0425200287990246, 2.0322421691869046, 2.001448381682824, 1.987356471108531, 1.9812557704595468], "raw_rewards": [7.869385480880737, 0.4599737599492073, -1.4565927982330322, -1.4442170858383179, -0.7505795061588287, 0.12662386894226074, -1.4533054828643799, -1.4666680097579956, -1.248689442873001, -1.1987037062644958, -1.3451157063245773, -2.121876023709774, -2.401705786585808, 1.871029555797577, -2.000951513648033, 1.617654800415039, -1.5481922626495361, -0.1357777714729309, 0.5319417044520378, -1.5726486444473267, -0.5888408124446869, -0.5134999752044678, 1.1500522196292877, -1.2830363810062408, 2.7681850492954254, 1.2519995421171188, 1.9592264890670776, -1.6138243675231934, -1.6202170848846436, -1.1529327481985092, 3.0023685693740845, 0.11245410144329071, -1.0573668628931046, -1.6283231973648071, -0.4163784980773926, 0.6823182106018066, -2.69570055603981, 2.345923662185669, 1.8043333292007446, 1.250470757484436, -1.1206167042255402, 1.209741860628128, -0.8703119158744812, 1.5336683094501495, 1.1777045745402575, 3.212405025959015, -1.6204973459243774, 2.229508101940155, 3.6350157856941223, -0.4405495524406433, 1.1129309833049774, 0.40909240394830704, 1.5946876406669617, 2.9371981620788574, 1.0661866962909698, -1.5951862335205078, 1.6872042417526245, 1.1412177383899689, 2.9885196685791016, 1.5259829759597778, 1.2249691784381866, -0.17562654614448547, -0.060656994581222534, 2.018460303544998, 2.586593806743622, 1.5325765013694763, 2.585873991250992, 0.8348989896476269, 2.7364750802516937, 0.5851811468601227, -0.6234785914421082, -0.3203362226486206, -0.05612260103225708, -0.7573689520359039, -1.0328965410590172, 0.43692925572395325, 1.6856676638126373, 1.6831098198890686, 2.5903921127319336, 0.19393183290958405, 1.992238163948059, 1.4300525188446045, -0.3287913426756859, 2.126563787460327, 2.2346373796463013, -0.0103532075881958, 3.960599184036255, 1.9564669132232666, 3.540530562400818, 0.22767803072929382, 1.5510457158088684, 2.9248103499412537, 1.305261641740799, 2.897787034511566, 1.3643755316734314, 1.3663267493247986, 3.1297138929367065, 0.6142806708812714, 3.131833851337433, 2.492442339658737, 2.345623791217804, 2.467067837715149, 0.9856889843940735, -0.20317912101745605, 1.002382606267929, 1.7456613183021545, -1.5415513515472412, 1.7634225487709045, 3.887290835380554, 2.3161216974258423, 1.5524056553840637, 0.06543898582458496, 1.169892281293869, 2.116620749235153, 3.16776305437088, 2.9264339208602905, 3.236627459526062, 2.9213297367095947, 2.2149168252944946, 3.108502149581909, 0.660636343061924, 2.7253071069717407, 1.2900128662586212, 2.9700556993484497, 3.236673951148987, 3.241437554359436, 1.3386163115501404, 0.6383883208036423, 2.8580138087272644, 2.79445818066597, 3.4975720643997192, 2.8863253593444824, 1.201033592224121, 1.2613102197647095, 2.8605822324752808, -0.2421724647283554, 1.5260725319385529, 2.30670964717865, 3.353128671646118, 0.9881656616926193, 0.16763997077941895, 2.20599102973938, 1.3687167763710022, 2.1226362586021423, -0.22471871972084045, 3.111227869987488, 3.1131207942962646, 3.093649685382843, 3.689613163471222, 3.1143359541893005, 1.050121396780014, 2.962377607822418, 0.5182003080844879, 2.6878902316093445, 0.620332658290863, 2.1800947785377502, -0.08711862564086914, 1.4942425191402435, 3.117049276828766, 1.7567228972911835, 3.5543485283851624, 1.0609619319438934, 3.5412749648094177, 1.003387212753296, -0.044568657875061035, 1.9571133255958557, 0.987617015838623, 1.6582836210727692, 2.9391005635261536, 0.6417786628007889, 2.9579904675483704, 3.2891563773155212, 3.2921172380447388, 1.1086584031581879, 3.292155683040619, 1.2546398341655731, 0.45421365124639124, 3.228146195411682, 1.0346056520938873, 1.0338847637176514, 0.9749908745288849, 0.7823807299137115, 0.0020377933979034424, 2.640851616859436, 0.45796782709658146, -0.08856308460235596, 2.266314923763275, 0.9856714010238647, 2.3894476294517517, 2.9557398557662964, 2.0732545256614685, 3.8009579181671143, 2.2961906790733337, 3.6587679386138916, 2.8332625031471252, 3.6581618189811707, 1.9579390287399292, 0.7483655419200659, 3.1734155416488647, 1.7330745458602905, 1.4089505076408386, 2.901742935180664, 2.012480288743973], "step": 25000}
reward_curve.png CHANGED
spindleflow_model_latest.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7aec515b0112638963c9c37fec3156be35df48402601b9a342c59b24557ae238
3
  size 143819553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af29b29400e31cb4f9980de6eafdead8e2bd0a405acfdeff3d3543f1d3eb877
3
  size 143819553
training_log.txt CHANGED
@@ -365,3 +365,103 @@
365
  [08:24:17] Ep 8450 | reward +1.111 | Phase 3/3 | Rolling mean: 2.066 / β€” | Episodes in phase: 6225
366
  [08:24:20] Ep 8475 | reward +3.093 | Phase 3/3 | Rolling mean: 2.109 / β€” | Episodes in phase: 6250
367
  [08:24:20] Periodic save at step 20,000 ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  [08:24:17] Ep 8450 | reward +1.111 | Phase 3/3 | Rolling mean: 2.066 / β€” | Episodes in phase: 6225
366
  [08:24:20] Ep 8475 | reward +3.093 | Phase 3/3 | Rolling mean: 2.109 / β€” | Episodes in phase: 6250
367
  [08:24:20] Periodic save at step 20,000 ...
368
+ [08:24:24] Periodic push done β€” 5 files at step 20,000
369
+ [08:24:25] Ep 8500 | reward +3.116 | Phase 3/3 | Rolling mean: 2.121 / β€” | Episodes in phase: 6275
370
+ [08:24:39] Ep 8525 | reward +1.944 | Phase 3/3 | Rolling mean: 2.051 / β€” | Episodes in phase: 6300
371
+ [08:24:40] Ep 8550 | reward +2.910 | Phase 3/3 | Rolling mean: 1.994 / β€” | Episodes in phase: 6325
372
+ [08:24:42] Ep 8575 | reward +2.345 | Phase 3/3 | Rolling mean: 2.057 / β€” | Episodes in phase: 6350
373
+ [08:24:43] Ep 8600 | reward +1.379 | Phase 3/3 | Rolling mean: 1.980 / β€” | Episodes in phase: 6375
374
+ [08:24:57] Ep 8625 | reward +0.658 | Phase 3/3 | Rolling mean: 1.964 / β€” | Episodes in phase: 6400
375
+ [08:24:58] Ep 8650 | reward +1.380 | Phase 3/3 | Rolling mean: 1.911 / β€” | Episodes in phase: 6425
376
+ [08:25:00] Ep 8675 | reward +1.496 | Phase 3/3 | Rolling mean: 1.918 / β€” | Episodes in phase: 6450
377
+ [08:25:01] Ep 8700 | reward +1.379 | Phase 3/3 | Rolling mean: 1.919 / β€” | Episodes in phase: 6475
378
+ [08:25:17] Ep 8725 | reward +1.979 | Phase 3/3 | Rolling mean: 1.922 / β€” | Episodes in phase: 6500
379
+ [08:25:18] Ep 8750 | reward +1.654 | Phase 3/3 | Rolling mean: 1.968 / β€” | Episodes in phase: 6525
380
+ [08:25:19] Ep 8775 | reward +3.113 | Phase 3/3 | Rolling mean: 1.884 / β€” | Episodes in phase: 6550
381
+ [08:25:20] Ep 8800 | reward +2.273 | Phase 3/3 | Rolling mean: 1.943 / β€” | Episodes in phase: 6575
382
+ [08:25:34] Ep 8825 | reward +2.554 | Phase 3/3 | Rolling mean: 1.984 / β€” | Episodes in phase: 6600
383
+ [08:25:36] Ep 8850 | reward +2.646 | Phase 3/3 | Rolling mean: 1.994 / β€” | Episodes in phase: 6625
384
+ [08:25:37] Ep 8875 | reward +2.192 | Phase 3/3 | Rolling mean: 1.889 / β€” | Episodes in phase: 6650
385
+ [08:25:38] Ep 8900 | reward +1.617 | Phase 3/3 | Rolling mean: 1.906 / β€” | Episodes in phase: 6675
386
+ [08:25:51] Ep 8925 | reward +2.647 | Phase 3/3 | Rolling mean: 1.952 / β€” | Episodes in phase: 6700
387
+ [08:25:53] Ep 8950 | reward +2.496 | Phase 3/3 | Rolling mean: 1.884 / β€” | Episodes in phase: 6725
388
+ [08:25:56] Ep 8975 | reward +2.195 | Phase 3/3 | Rolling mean: 1.936 / β€” | Episodes in phase: 6750
389
+ [08:25:57] Ep 9000 | reward +2.345 | Phase 3/3 | Rolling mean: 1.899 / β€” | Episodes in phase: 6775
390
+ [08:26:10] Ep 9025 | reward +2.477 | Phase 3/3 | Rolling mean: 1.844 / β€” | Episodes in phase: 6800
391
+ [08:26:11] Ep 9050 | reward +2.954 | Phase 3/3 | Rolling mean: 1.885 / β€” | Episodes in phase: 6825
392
+ [08:26:13] Ep 9075 | reward +1.034 | Phase 3/3 | Rolling mean: 1.946 / β€” | Episodes in phase: 6850
393
+ [08:26:14] Ep 9100 | reward +2.091 | Phase 3/3 | Rolling mean: 1.933 / β€” | Episodes in phase: 6875
394
+ [08:26:26] Ep 9125 | reward +2.799 | Phase 3/3 | Rolling mean: 1.908 / β€” | Episodes in phase: 6900
395
+ [08:26:28] Ep 9150 | reward +1.280 | Phase 3/3 | Rolling mean: 1.920 / β€” | Episodes in phase: 6925
396
+ [08:26:29] Ep 9175 | reward +2.092 | Phase 3/3 | Rolling mean: 1.903 / β€” | Episodes in phase: 6950
397
+ [08:26:30] Ep 9200 | reward +2.657 | Phase 3/3 | Rolling mean: 1.945 / β€” | Episodes in phase: 6975
398
+ [08:26:45] Ep 9225 | reward -1.512 | Phase 3/3 | Rolling mean: 1.969 / β€” | Episodes in phase: 7000
399
+ [08:26:46] Ep 9250 | reward +1.509 | Phase 3/3 | Rolling mean: 1.948 / β€” | Episodes in phase: 7025
400
+ [08:26:48] Ep 9275 | reward +1.571 | Phase 3/3 | Rolling mean: 1.916 / β€” | Episodes in phase: 7050
401
+ [08:26:49] Ep 9300 | reward +2.625 | Phase 3/3 | Rolling mean: 1.896 / β€” | Episodes in phase: 7075
402
+ [08:27:03] Ep 9325 | reward +2.590 | Phase 3/3 | Rolling mean: 1.909 / β€” | Episodes in phase: 7100
403
+ [08:27:04] Ep 9350 | reward +1.605 | Phase 3/3 | Rolling mean: 1.943 / β€” | Episodes in phase: 7125
404
+ [08:27:05] Ep 9375 | reward +0.890 | Phase 3/3 | Rolling mean: 1.975 / β€” | Episodes in phase: 7150
405
+ [08:27:06] Ep 9400 | reward +2.478 | Phase 3/3 | Rolling mean: 1.925 / β€” | Episodes in phase: 7175
406
+ [08:27:20] Ep 9425 | reward +1.982 | Phase 3/3 | Rolling mean: 1.950 / β€” | Episodes in phase: 7200
407
+ [08:27:22] Ep 9450 | reward +1.982 | Phase 3/3 | Rolling mean: 1.954 / β€” | Episodes in phase: 7225
408
+ [08:27:24] Ep 9475 | reward +3.113 | Phase 3/3 | Rolling mean: 2.030 / β€” | Episodes in phase: 7250
409
+ [08:27:26] Ep 9500 | reward +1.654 | Phase 3/3 | Rolling mean: 2.060 / β€” | Episodes in phase: 7275
410
+ [08:27:40] Ep 9525 | reward +0.484 | Phase 3/3 | Rolling mean: 2.067 / β€” | Episodes in phase: 7300
411
+ [08:27:41] Ep 9550 | reward +3.141 | Phase 3/3 | Rolling mean: 2.053 / β€” | Episodes in phase: 7325
412
+ [08:27:42] Ep 9575 | reward +2.562 | Phase 3/3 | Rolling mean: 2.070 / β€” | Episodes in phase: 7350
413
+ [08:27:43] Ep 9600 | reward +2.478 | Phase 3/3 | Rolling mean: 2.065 / β€” | Episodes in phase: 7375
414
+ [08:27:58] Ep 9625 | reward +0.659 | Phase 3/3 | Rolling mean: 2.022 / β€” | Episodes in phase: 7400
415
+ [08:27:59] Ep 9650 | reward +3.233 | Phase 3/3 | Rolling mean: 2.038 / β€” | Episodes in phase: 7425
416
+ [08:28:00] Ep 9675 | reward +0.474 | Phase 3/3 | Rolling mean: 1.934 / β€” | Episodes in phase: 7450
417
+ [08:28:01] Ep 9700 | reward +0.866 | Phase 3/3 | Rolling mean: 1.898 / β€” | Episodes in phase: 7475
418
+ [08:28:20] Ep 9725 | reward +2.296 | Phase 3/3 | Rolling mean: 1.938 / β€” | Episodes in phase: 7500
419
+ [08:28:21] Ep 9750 | reward +1.855 | Phase 3/3 | Rolling mean: 1.926 / β€” | Episodes in phase: 7525
420
+ [08:28:22] Ep 9775 | reward +0.782 | Phase 3/3 | Rolling mean: 1.905 / β€” | Episodes in phase: 7550
421
+ [08:28:24] Ep 9800 | reward +2.960 | Phase 3/3 | Rolling mean: 1.954 / β€” | Episodes in phase: 7575
422
+ [08:28:37] Ep 9825 | reward +2.961 | Phase 3/3 | Rolling mean: 1.974 / β€” | Episodes in phase: 7600
423
+ [08:28:38] Ep 9850 | reward +2.306 | Phase 3/3 | Rolling mean: 1.992 / β€” | Episodes in phase: 7625
424
+ [08:28:39] Ep 9875 | reward +2.611 | Phase 3/3 | Rolling mean: 2.085 / β€” | Episodes in phase: 7650
425
+ [08:28:40] Ep 9900 | reward +0.457 | Phase 3/3 | Rolling mean: 2.099 / β€” | Episodes in phase: 7675
426
+ [08:28:53] Ep 9925 | reward +0.458 | Phase 3/3 | Rolling mean: 2.034 / β€” | Episodes in phase: 7700
427
+ [08:28:54] Ep 9950 | reward +0.803 | Phase 3/3 | Rolling mean: 2.032 / β€” | Episodes in phase: 7725
428
+ [08:28:57] Ep 9975 | reward +1.350 | Phase 3/3 | Rolling mean: 2.003 / β€” | Episodes in phase: 7750
429
+ [08:28:58] Ep 10000 | reward +3.037 | Phase 3/3 | Rolling mean: 1.991 / β€” | Episodes in phase: 7775
430
+ [08:29:12] Ep 10025 | reward +0.658 | Phase 3/3 | Rolling mean: 1.935 / β€” | Episodes in phase: 7800
431
+ [08:29:13] Ep 10050 | reward +0.985 | Phase 3/3 | Rolling mean: 1.886 / β€” | Episodes in phase: 7825
432
+ [08:29:14] Ep 10075 | reward +1.687 | Phase 3/3 | Rolling mean: 1.832 / β€” | Episodes in phase: 7850
433
+ [08:29:15] Ep 10100 | reward +1.688 | Phase 3/3 | Rolling mean: 1.776 / β€” | Episodes in phase: 7875
434
+ [08:29:29] Ep 10125 | reward +0.801 | Phase 3/3 | Rolling mean: 1.762 / β€” | Episodes in phase: 7900
435
+ [08:29:31] Ep 10150 | reward +0.985 | Phase 3/3 | Rolling mean: 1.787 / β€” | Episodes in phase: 7925
436
+ [08:29:32] Ep 10175 | reward +0.660 | Phase 3/3 | Rolling mean: 1.784 / β€” | Episodes in phase: 7950
437
+ [08:29:33] Ep 10200 | reward +0.632 | Phase 3/3 | Rolling mean: 1.787 / β€” | Episodes in phase: 7975
438
+ [08:29:47] Ep 10225 | reward +1.526 | Phase 3/3 | Rolling mean: 1.852 / β€” | Episodes in phase: 8000
439
+ [08:29:50] Ep 10250 | reward +1.526 | Phase 3/3 | Rolling mean: 1.900 / β€” | Episodes in phase: 8025
440
+ [08:29:51] Ep 10275 | reward +3.283 | Phase 3/3 | Rolling mean: 1.906 / β€” | Episodes in phase: 8050
441
+ [08:29:52] Ep 10300 | reward +2.186 | Phase 3/3 | Rolling mean: 1.991 / β€” | Episodes in phase: 8075
442
+ [08:30:05] Ep 10325 | reward +1.118 | Phase 3/3 | Rolling mean: 2.017 / β€” | Episodes in phase: 8100
443
+ [08:30:06] Ep 10350 | reward +3.127 | Phase 3/3 | Rolling mean: 2.023 / β€” | Episodes in phase: 8125
444
+ [08:30:07] Ep 10375 | reward +2.992 | Phase 3/3 | Rolling mean: 2.033 / β€” | Episodes in phase: 8150
445
+ [08:30:09] Ep 10400 | reward +3.099 | Phase 3/3 | Rolling mean: 2.033 / β€” | Episodes in phase: 8175
446
+ [08:30:22] Ep 10425 | reward +1.935 | Phase 3/3 | Rolling mean: 2.025 / β€” | Episodes in phase: 8200
447
+ [08:30:23] Ep 10450 | reward +0.764 | Phase 3/3 | Rolling mean: 2.032 / β€” | Episodes in phase: 8225
448
+ [08:30:24] Ep 10475 | reward +1.935 | Phase 3/3 | Rolling mean: 2.103 / β€” | Episodes in phase: 8250
449
+ [08:30:27] Ep 10500 | reward +2.951 | Phase 3/3 | Rolling mean: 2.061 / β€” | Episodes in phase: 8275
450
+ [08:30:40] Ep 10525 | reward +1.709 | Phase 3/3 | Rolling mean: 2.034 / β€” | Episodes in phase: 8300
451
+ [08:30:41] Ep 10550 | reward +2.188 | Phase 3/3 | Rolling mean: 1.996 / β€” | Episodes in phase: 8325
452
+ [08:30:42] Ep 10575 | reward +1.201 | Phase 3/3 | Rolling mean: 2.052 / β€” | Episodes in phase: 8350
453
+ [08:30:44] Ep 10600 | reward +0.480 | Phase 3/3 | Rolling mean: 2.033 / β€” | Episodes in phase: 8375
454
+ [08:30:58] Ep 10625 | reward +2.980 | Phase 3/3 | Rolling mean: 2.070 / β€” | Episodes in phase: 8400
455
+ [08:30:59] Ep 10650 | reward +1.055 | Phase 3/3 | Rolling mean: 2.034 / β€” | Episodes in phase: 8425
456
+ [08:31:00] Ep 10675 | reward +1.155 | Phase 3/3 | Rolling mean: 1.964 / β€” | Episodes in phase: 8450
457
+ [08:31:01] Ep 10700 | reward +0.480 | Phase 3/3 | Rolling mean: 2.021 / β€” | Episodes in phase: 8475
458
+ [08:31:15] Ep 10725 | reward +2.531 | Phase 3/3 | Rolling mean: 2.078 / β€” | Episodes in phase: 8500
459
+ [08:31:18] Ep 10750 | reward +2.950 | Phase 3/3 | Rolling mean: 2.079 / β€” | Episodes in phase: 8525
460
+ [08:31:19] Ep 10775 | reward +1.978 | Phase 3/3 | Rolling mean: 1.999 / β€” | Episodes in phase: 8550
461
+ [08:31:20] Ep 10800 | reward +2.013 | Phase 3/3 | Rolling mean: 1.963 / β€” | Episodes in phase: 8575
462
+ [08:31:33] Ep 10825 | reward +1.748 | Phase 3/3 | Rolling mean: 1.908 / β€” | Episodes in phase: 8600
463
+ [08:31:34] Ep 10850 | reward +2.468 | Phase 3/3 | Rolling mean: 1.910 / β€” | Episodes in phase: 8625
464
+ [08:31:35] Ep 10875 | reward +2.013 | Phase 3/3 | Rolling mean: 1.912 / β€” | Episodes in phase: 8650
465
+ [08:31:36] Ep 10900 | reward +2.468 | Phase 3/3 | Rolling mean: 1.867 / β€” | Episodes in phase: 8675
466
+ [08:31:50] Ep 10925 | reward +1.010 | Phase 3/3 | Rolling mean: 1.803 / β€” | Episodes in phase: 8700
467
+ [08:31:50] Periodic save at step 25,000 ...
vec_normalize_latest.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74da382d70b483573ff4ffbe3b76bb4cfd839b08d630dbc2b8a60df92c0c7f71
3
  size 166596
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a19434ad2950b7e456a15d2a529b3534fa1a4ef989b9d6bb52ad9da04c5b2a9
3
  size 166596