garvitsachdeva commited on
Commit
dd6cf6a
Β·
verified Β·
1 Parent(s): 9c7843a

Checkpoint at step 30,000

Browse files
reward_curve.json CHANGED
@@ -1 +1 @@
1
- {"episodes": [0, 54, 108, 162, 216, 270, 324, 378, 432, 486, 540, 594, 648, 702, 756, 810, 864, 918, 972, 1026, 1080, 1134, 1188, 1242, 1296, 1350, 1404, 1458, 1512, 1566, 1620, 1674, 1728, 1782, 1836, 1890, 1944, 1998, 2052, 2106, 2160, 2214, 2268, 2322, 2376, 2430, 2484, 2538, 2592, 2646, 2700, 2754, 2808, 2862, 2916, 2970, 3024, 3078, 3132, 3186, 3240, 3294, 3348, 3402, 3456, 3510, 3564, 3618, 3672, 3726, 3780, 3834, 3888, 3942, 3996, 4050, 4104, 4158, 4212, 4266, 4320, 4374, 4428, 4482, 4536, 4590, 4644, 4698, 4752, 4806, 4860, 4914, 4968, 5022, 5076, 5130, 5184, 5238, 5292, 5346, 5400, 5454, 5508, 5562, 5616, 5670, 5724, 5778, 5832, 5886, 5940, 5994, 6048, 6102, 6156, 6210, 6264, 6318, 6372, 6426, 6480, 6534, 6588, 6642, 6696, 6750, 6804, 6858, 6912, 6966, 7020, 7074, 7128, 7182, 7236, 7290, 7344, 7398, 7452, 7506, 7560, 7614, 7668, 7722, 7776, 7830, 7884, 7938, 7992, 8046, 8100, 8154, 8208, 8262, 8316, 8370, 8424, 8478, 8532, 8586, 8640, 8694, 8748, 8802, 8856, 8910, 8964, 9018, 9072, 9126, 9180, 9234, 9288, 9342, 9396, 9450, 9504, 9558, 9612, 9666, 9720, 9774, 9828, 9882, 9936, 9990, 10044, 10098, 10152, 10206, 10260, 10314, 10368, 10422, 10476, 10530, 10584, 10638, 10692, 10746, 10800, 10854, 10908], "mean_rewards": [7.869385480880737, -0.41051984910260547, -0.5492027676365244, -0.6168348340923252, -0.5351173416861603, -0.4764093054678442, -0.47253693533918034, -0.47499917030069083, -0.4891404308417412, -0.5025442564148586, -0.48840301800794667, -0.508802940635636, -0.46954920547538487, -0.42533945731213657, -0.4368745926138775, -0.4193249794919669, -0.4092346332987675, -0.353060252322088, -0.2940995312962056, -0.21927801998549679, -0.20089613068780868, -0.12506907433271408, -0.10114177332402471, -0.008871172632967685, 0.09518664632584002, 0.1262925854581578, 0.16667680813332708, 0.22568784479262566, 0.28477957568396506, 0.296171207164489, 0.3757471558196049, 0.42450997700199256, 0.4808117801295491, 0.5391666152812779, 0.5689808041716409, 0.5712222996449104, 0.5904879566519113, 0.6232858557695095, 0.6342131112949467, 0.6917316244849697, 0.7243372341715663, 0.7537346597256963, 0.789060527155507, 0.8554964856626531, 0.8602999568525496, 0.9521009895790326, 1.009716379080926, 1.051387386374726, 1.1155794020388206, 1.150166751366795, 1.1501598492774119, 1.1837558287689138, 1.213108567848341, 1.2131437046706581, 1.276654483436906, 1.3081771806341318, 1.3261926365713759, 1.2855303687261024, 1.3094462854481799, 1.3246912506211848, 1.3649099320547151, 1.3636714587647496, 1.4078118026861581, 1.4299958327098539, 1.448524320732639, 1.4219702863606902, 1.4664546345262006, 1.4783013143209591, 1.5082530105022551, 1.498775199950455, 1.4922605522485097, 1.5474505744811595, 1.5467187698928802, 1.5193020349385873, 1.5015674076244034, 1.5476209773602607, 1.5425808865028472, 1.5614539348704308, 1.5752295390951334, 1.601019313977224, 1.5956984089713384, 1.5783536112206618, 1.605170402980289, 1.609818503597577, 1.6114819045325042, 1.611166421031901, 1.626127223142819, 1.6224109640191466, 1.62811375268794, 1.6364221707344038, 1.6479085205436252, 1.68300795165462, 1.7199315566838804, 1.7624231781204547, 1.8120945872093999, 1.7900560836224428, 1.8056307126220696, 1.796357822860676, 1.794105003174606, 1.7866614152400977, 1.7825549535948721, 1.7799180618051378, 1.7159151644033828, 1.707240282961322, 1.7067030376529944, 1.7414429096118835, 1.7244301065825942, 1.7394534384313016, 1.7501649395389574, 1.768823225281977, 1.7936280370897266, 1.7753073548622782, 1.7985860085296523, 1.775856651936601, 1.761148449725007, 1.7298969913180418, 1.7499384344927467, 1.7930582820463181, 1.7775360784647882, 1.7903102684193166, 1.8150328708382464, 1.8257946563767191, 1.8465963355418513, 1.8954826608643507, 1.9210255016546431, 1.9286256749034234, 1.9029758592466757, 1.8584531546866923, 1.8594947501572603, 1.8344094237201358, 1.79941314168616, 1.8079466098409875, 1.8152153570509608, 1.8054590580533947, 1.7969712433690948, 1.7882612819430108, 1.7704781028353924, 1.8000406818682146, 1.8211428545306847, 1.8399917912533732, 1.8737868167589338, 1.8774200505343348, 1.8721188200453092, 1.8767408794616383, 1.889315270822169, 1.8973016155200473, 1.9045764394563345, 1.9062862954065067, 1.8882204007109233, 1.885903197714556, 1.8929937056258974, 1.8889157436584143, 1.8742003176619646, 1.8987999117736072, 1.8982257889345777, 1.9233697905769422, 1.9702272060323833, 1.992930272779339, 1.9961104948722832, 2.028669857688547, 1.9892342082619068, 1.9922661232102634, 2.014080071243981, 2.003148631695794, 1.9819643528737727, 1.9620282770742927, 1.9306926963329112, 1.911854676284528, 1.9251795592454404, 1.9032559321120885, 1.940680162529937, 1.9206769538671118, 1.8939569022338225, 1.9223596293718832, 1.9247678831291053, 1.9413379225448395, 1.9664991747436398, 1.9917000998382746, 1.9840289586591762, 1.965516634339595, 1.9864252154801343, 1.9808506768425562, 2.002361669342974, 2.0258850431815967, 2.0045235502224465, 2.0037248894380215, 1.9576798827025401, 1.9127588423060757, 1.9126606692596557, 1.9263554555385196, 1.9400756921152666, 1.9400606237476807, 1.9311223548206493, 1.913246230365892, 1.954121733201586, 1.9481463303932254, 1.9877335088026717, 2.0078176827846352, 2.0425200287990246, 2.0322421691869046, 2.001448381682824, 1.987356471108531, 1.9812557704595468], "raw_rewards": [7.869385480880737, 0.4599737599492073, -1.4565927982330322, -1.4442170858383179, -0.7505795061588287, 0.12662386894226074, -1.4533054828643799, -1.4666680097579956, -1.248689442873001, -1.1987037062644958, -1.3451157063245773, -2.121876023709774, -2.401705786585808, 1.871029555797577, -2.000951513648033, 1.617654800415039, -1.5481922626495361, -0.1357777714729309, 0.5319417044520378, -1.5726486444473267, -0.5888408124446869, -0.5134999752044678, 1.1500522196292877, -1.2830363810062408, 2.7681850492954254, 1.2519995421171188, 1.9592264890670776, -1.6138243675231934, -1.6202170848846436, -1.1529327481985092, 3.0023685693740845, 0.11245410144329071, -1.0573668628931046, -1.6283231973648071, -0.4163784980773926, 0.6823182106018066, -2.69570055603981, 2.345923662185669, 1.8043333292007446, 1.250470757484436, -1.1206167042255402, 1.209741860628128, -0.8703119158744812, 1.5336683094501495, 1.1777045745402575, 3.212405025959015, -1.6204973459243774, 2.229508101940155, 3.6350157856941223, -0.4405495524406433, 1.1129309833049774, 0.40909240394830704, 1.5946876406669617, 2.9371981620788574, 1.0661866962909698, -1.5951862335205078, 1.6872042417526245, 1.1412177383899689, 2.9885196685791016, 1.5259829759597778, 1.2249691784381866, -0.17562654614448547, -0.060656994581222534, 2.018460303544998, 2.586593806743622, 1.5325765013694763, 2.585873991250992, 0.8348989896476269, 2.7364750802516937, 0.5851811468601227, -0.6234785914421082, -0.3203362226486206, -0.05612260103225708, -0.7573689520359039, -1.0328965410590172, 0.43692925572395325, 1.6856676638126373, 1.6831098198890686, 2.5903921127319336, 0.19393183290958405, 1.992238163948059, 1.4300525188446045, -0.3287913426756859, 2.126563787460327, 2.2346373796463013, -0.0103532075881958, 3.960599184036255, 1.9564669132232666, 3.540530562400818, 0.22767803072929382, 1.5510457158088684, 2.9248103499412537, 1.305261641740799, 2.897787034511566, 1.3643755316734314, 1.3663267493247986, 3.1297138929367065, 0.6142806708812714, 3.131833851337433, 2.492442339658737, 2.345623791217804, 2.467067837715149, 0.9856889843940735, -0.20317912101745605, 1.002382606267929, 1.7456613183021545, -1.5415513515472412, 1.7634225487709045, 3.887290835380554, 2.3161216974258423, 1.5524056553840637, 0.06543898582458496, 1.169892281293869, 2.116620749235153, 3.16776305437088, 2.9264339208602905, 3.236627459526062, 2.9213297367095947, 2.2149168252944946, 3.108502149581909, 0.660636343061924, 2.7253071069717407, 1.2900128662586212, 2.9700556993484497, 3.236673951148987, 3.241437554359436, 1.3386163115501404, 0.6383883208036423, 2.8580138087272644, 2.79445818066597, 3.4975720643997192, 2.8863253593444824, 1.201033592224121, 1.2613102197647095, 2.8605822324752808, -0.2421724647283554, 1.5260725319385529, 2.30670964717865, 3.353128671646118, 0.9881656616926193, 0.16763997077941895, 2.20599102973938, 1.3687167763710022, 2.1226362586021423, -0.22471871972084045, 3.111227869987488, 3.1131207942962646, 3.093649685382843, 3.689613163471222, 3.1143359541893005, 1.050121396780014, 2.962377607822418, 0.5182003080844879, 2.6878902316093445, 0.620332658290863, 2.1800947785377502, -0.08711862564086914, 1.4942425191402435, 3.117049276828766, 1.7567228972911835, 3.5543485283851624, 1.0609619319438934, 3.5412749648094177, 1.003387212753296, -0.044568657875061035, 1.9571133255958557, 0.987617015838623, 1.6582836210727692, 2.9391005635261536, 0.6417786628007889, 2.9579904675483704, 3.2891563773155212, 3.2921172380447388, 1.1086584031581879, 3.292155683040619, 1.2546398341655731, 0.45421365124639124, 3.228146195411682, 1.0346056520938873, 1.0338847637176514, 0.9749908745288849, 0.7823807299137115, 0.0020377933979034424, 2.640851616859436, 0.45796782709658146, -0.08856308460235596, 2.266314923763275, 0.9856714010238647, 2.3894476294517517, 2.9557398557662964, 2.0732545256614685, 3.8009579181671143, 2.2961906790733337, 3.6587679386138916, 2.8332625031471252, 3.6581618189811707, 1.9579390287399292, 0.7483655419200659, 3.1734155416488647, 1.7330745458602905, 1.4089505076408386, 2.901742935180664, 2.012480288743973], "step": 25000}
 
1
+ {"episodes": [0, 67, 134, 201, 268, 335, 402, 469, 536, 603, 670, 737, 804, 871, 938, 1005, 1072, 1139, 1206, 1273, 1340, 1407, 1474, 1541, 1608, 1675, 1742, 1809, 1876, 1943, 2010, 2077, 2144, 2211, 2278, 2345, 2412, 2479, 2546, 2613, 2680, 2747, 2814, 2881, 2948, 3015, 3082, 3149, 3216, 3283, 3350, 3417, 3484, 3551, 3618, 3685, 3752, 3819, 3886, 3953, 4020, 4087, 4154, 4221, 4288, 4355, 4422, 4489, 4556, 4623, 4690, 4757, 4824, 4891, 4958, 5025, 5092, 5159, 5226, 5293, 5360, 5427, 5494, 5561, 5628, 5695, 5762, 5829, 5896, 5963, 6030, 6097, 6164, 6231, 6298, 6365, 6432, 6499, 6566, 6633, 6700, 6767, 6834, 6901, 6968, 7035, 7102, 7169, 7236, 7303, 7370, 7437, 7504, 7571, 7638, 7705, 7772, 7839, 7906, 7973, 8040, 8107, 8174, 8241, 8308, 8375, 8442, 8509, 8576, 8643, 8710, 8777, 8844, 8911, 8978, 9045, 9112, 9179, 9246, 9313, 9380, 9447, 9514, 9581, 9648, 9715, 9782, 9849, 9916, 9983, 10050, 10117, 10184, 10251, 10318, 10385, 10452, 10519, 10586, 10653, 10720, 10787, 10854, 10921, 10988, 11055, 11122, 11189, 11256, 11323, 11390, 11457, 11524, 11591, 11658, 11725, 11792, 11859, 11926, 11993, 12060, 12127, 12194, 12261, 12328, 12395, 12462, 12529, 12596, 12663, 12730, 12797, 12864, 12931, 12998, 13065, 13132, 13199, 13266, 13333, 13400], "mean_rewards": [7.869385480880737, -0.4680378864902784, -0.48984615507501145, -0.5551410619040379, -0.4882915579094153, -0.4724334484614831, -0.4706858817982504, -0.49536512678172046, -0.488181437265121, -0.49218673040220723, -0.4815726703055059, -0.47754327278345865, -0.43784233579173726, -0.3844110888298712, -0.3559347061479154, -0.30412048705953304, -0.266127535926422, -0.1874622176040984, -0.14371091075008735, -0.02650309386219652, 0.03699350765202239, 0.11568798260192298, 0.1813001428686437, 0.234962336935236, 0.29998476978480104, 0.3494067042688368, 0.4188010052682893, 0.4982103691668488, 0.5493174741537327, 0.5519611505521989, 0.5843701104372269, 0.6528626296241695, 0.6805431383403456, 0.7341929864990648, 0.7834733703783795, 0.8403997419894993, 0.8835701591176855, 0.9107975078377638, 0.9910712995973492, 1.0697907954830996, 1.1305848434251078, 1.1324089907647075, 1.172262050650997, 1.2235095935128422, 1.2517143468368366, 1.2724046870820789, 1.3005815128812432, 1.3231664394510776, 1.3306386821592848, 1.3325763804993283, 1.3586343029331016, 1.40881654574436, 1.4413863711607908, 1.4488289435588133, 1.4570241300250582, 1.4681397577771262, 1.499341548685888, 1.5262605464494776, 1.5235170595468455, 1.513340130324477, 1.5307294156028504, 1.5475655648487667, 1.5575386175265262, 1.5641262251109376, 1.5792971486511564, 1.5843108974196574, 1.6019751663429598, 1.5715943494819948, 1.5931392893901157, 1.6346125779210805, 1.6342209757898392, 1.617359425582728, 1.6516810876938788, 1.6577659129061755, 1.7004437399976031, 1.7311268771364081, 1.739649844424992, 1.7626731381482645, 1.7597383664972204, 1.7712974552394458, 1.7726923163304273, 1.7791328078125108, 1.7655685633567155, 1.7530806338330702, 1.7465425723497727, 1.7384169118655943, 1.7192243574163946, 1.7400900246355464, 1.7620760225301118, 1.7782208430467326, 1.7714304287994007, 1.763625342698084, 1.7601928197433543, 1.7576644544521427, 1.7743939839113496, 1.769043635923286, 1.810667814309779, 1.8216552026374302, 1.8478464421820198, 1.8269981258918702, 1.8744575704903192, 1.8827178977458279, 1.866869720512893, 1.8740134621361253, 1.8397641184360727, 1.8410966690533401, 1.8426638447435835, 1.8283637543299847, 1.8131030868182625, 1.8027979614466207, 1.7552007339833966, 1.7917554398061926, 1.809832562660328, 1.833266973746071, 1.8741590295668997, 1.880611007449434, 1.8882735193283657, 1.8773927914049378, 1.865357443877396, 1.8789978755369139, 1.8915645213065935, 1.8952674455164622, 1.9155843285400243, 1.9085990070265584, 1.90080054640393, 1.9256198932583044, 1.9366444012710071, 1.9493767221422222, 1.9839627611148725, 1.9764564716675697, 2.002629030457077, 1.9820295862487651, 1.9856562615653577, 1.9859819096593474, 1.9698493608136363, 1.947345323234913, 1.9319047969357122, 1.9212369738192396, 1.915823275121511, 1.9219170818084552, 1.9338193936557564, 1.9390760462537227, 1.9549254322946799, 1.9771323770544638, 1.968752693505264, 1.9706327813298576, 1.979237121560172, 1.994255026763767, 1.9968812663644582, 2.0071281646386785, 1.9723110195435158, 1.9356860965843745, 1.9251148387156718, 1.9225462930269368, 1.9381719721641275, 1.9472862571208334, 1.9549440001736955, 1.943592506697141, 1.9607074977289554, 1.966358552660538, 1.9941627379199158, 2.017277606701966, 2.0028778048013227, 1.9798156342786526, 1.9463988085238573, 1.961345897422039, 1.9355557291343022, 1.9354226221486377, 1.9281403603033271, 1.9492224078499671, 1.9400939026771604, 1.957794354431826, 1.9947251642922874, 2.016812087167117, 2.0479436611309385, 2.025213644837225, 2.0251720844453738, 1.9995202234105298, 1.9965415502759112, 1.974851788492829, 1.9879056021842796, 1.9780626381897102, 1.9584395736040148, 1.9372385981398457, 1.9209191222658515, 1.9350956156822143, 1.960611907474231, 1.995446871384047, 1.9792517610698095, 1.9596557471280296, 1.9577739956190012, 1.9561633419888538, 1.967589806283026, 1.9925558521534272, 2.0040653944348117, 2.0273019113921604, 2.0106458478063964, 2.0137654255044515, 2.0376576196867973, 2.0654571994618025, 2.042709940440199], "raw_rewards": [7.869385480880737, -1.4724462032318115, -0.9430539831519127, -2.300199344754219, -0.8089542239904404, -1.4609763622283936, 1.1505420207977295, -1.4753345251083374, -1.0061301589012146, 1.4091460406780243, -1.7102899551391602, 0.8560748100280762, -1.2121876776218414, 0.8951746374368668, -0.3283916711807251, 1.4731740355491638, -1.5742558240890503, 1.3293038997799158, -1.9147660434246063, 2.114831119775772, 1.0735967457294464, 1.6181618869304657, 0.5473835095763206, -0.971550777554512, 2.4433728456497192, -0.5766929388046265, 0.6217614114284515, 0.1080636978149414, -1.6272716522216797, 0.801971822977066, 1.0068292915821075, 3.228931248188019, 0.9021086767315865, -0.1995646357536316, -1.6238511800765991, -1.621373176574707, 2.2451024651527405, 2.389804482460022, 0.174017071723938, -0.6571964621543884, 2.4821357131004333, 0.0889243632555008, -1.7433454543352127, 3.609505534172058, 2.2524542808532715, 2.5349577367305756, 1.3504652380943298, 2.934185355901718, 2.9824480414390564, -0.17685255408287048, 2.9204043447971344, 2.178996592760086, 1.329479992389679, 0.6736606955528259, 0.8348989896476269, 3.0606049299240112, 1.8705639839172363, 2.923104405403137, 1.4114362634718418, -0.1116487979888916, 2.0751985609531403, -1.1050611436367035, 0.5921751260757446, 0.9946736395359039, 1.8352702856063843, 0.796254113316536, 1.7449798882007599, 0.7606222331523895, 0.12062180042266846, 1.7189838290214539, 3.541672945022583, 1.5339390933513641, 2.767375946044922, 2.2297381162643433, -0.27031809091567993, 1.6640383005142212, 0.8177437596023083, 1.9806551933288574, 1.2750858068466187, 1.5373672842979431, 1.3882066011428833, 0.6665303558111191, 1.0681654214859009, 1.0687440931797028, 2.2572388648986816, 2.4432771801948547, 1.3739816546440125, 2.555147707462311, 1.5532113909721375, 1.5525203347206116, 1.0045947134494781, 2.4867050647735596, 2.0954394340515137, 1.6426711678504944, 2.5995242595672607, 0.4808852504938841, 1.495392918586731, 0.6842523217201233, 3.7635045051574707, 2.212641790509224, 3.5520986318588257, 1.2874490916728973, 1.5225331783294678, 2.5105870962142944, 2.647844910621643, 0.5050036013126373, 2.310435175895691, 2.4384663701057434, 2.8605822324752808, 2.404734432697296, 0.4821899086236954, 1.2641501389443874, 1.6644573211669922, 1.6970094442367554, 1.0590541064739227, -0.8185135126113892, 2.923989415168762, 0.46643319725990295, 1.3798049688339233, 1.27825129032135, 3.6883862614631653, 1.7710249423980713, 3.0534738898277283, 2.207607924938202, 1.0182976424694061, 2.965312957763672, 2.836131751537323, -1.1759849786758423, 0.8884927332401276, 1.3794639706611633, 1.0612505674362183, 1.510947585105896, 1.0037859827280045, 2.1822192072868347, 2.5573002099990845, 0.9873855113983154, 0.9950265288352966, 1.686583399772644, 2.587091416120529, 1.0622715055942535, 2.9554337859153748, 1.7556167542934418, 2.477279305458069, 1.6855851411819458, 1.325159728527069, 3.6202566027641296, 0.9852837026119232, 2.3055906891822815, 1.8989630937576294, 3.8421117663383484, 3.716716766357422, 3.286076307296753, 2.095924496650696, 1.8744736313819885, 1.2755839824676514, 2.1855685114860535, 2.951150119304657, -0.03335070610046387, 2.9794004559516907, 1.974601686000824, 1.5195317268371582, 2.607335329055786, 2.901742935180664, 2.9537888169288635, 2.556188404560089, 1.6433136463165283, 1.981132984161377, 0.7490701808128506, 2.8644319772720337, 1.9312551617622375, 3.218564510345459, 1.717534363269806, 1.7507363855838776, 0.8022308275103569, 0.9781382381916046, 0.9786528944969177, 2.802109658718109, 2.290560722351074, 0.803216639906168, 3.095746695995331, 3.292268693447113, 2.4694343209266663, 1.7080549597740173, 2.6128552556037903, 1.0149105489253998, 2.6496033668518066, 1.5982393622398376, 3.1078230142593384, 2.3775070905685425, 1.684832751750946, 2.6528014540672302, 2.9457955360412598, 2.1817689538002014, 1.707306683063507, 2.385865569114685, 2.928332030773163, 1.6798464059829712, 3.398195207118988, 1.6161009706556797, 4.029117822647095, 1.5490476191043854], "step": 30000}
reward_curve.png CHANGED
spindleflow_model_latest.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4af29b29400e31cb4f9980de6eafdead8e2bd0a405acfdeff3d3543f1d3eb877
3
- size 143819553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f19b14daaf52c171779e401edb5c73ca27af9785c0cbb20e0ef84ceca08999
3
+ size 143819554
training_log.txt CHANGED
@@ -465,3 +465,104 @@
465
  [08:31:36] Ep 10900 | reward +2.468 | Phase 3/3 | Rolling mean: 1.867 / β€” | Episodes in phase: 8675
466
  [08:31:50] Ep 10925 | reward +1.010 | Phase 3/3 | Rolling mean: 1.803 / β€” | Episodes in phase: 8700
467
  [08:31:50] Periodic save at step 25,000 ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  [08:31:36] Ep 10900 | reward +2.468 | Phase 3/3 | Rolling mean: 1.867 / β€” | Episodes in phase: 8675
466
  [08:31:50] Ep 10925 | reward +1.010 | Phase 3/3 | Rolling mean: 1.803 / β€” | Episodes in phase: 8700
467
  [08:31:50] Periodic save at step 25,000 ...
468
+ [08:31:53] Periodic push done β€” 5 files at step 25,000
469
+ [08:31:54] Ep 10950 | reward -0.240 | Phase 3/3 | Rolling mean: 1.819 / β€” | Episodes in phase: 8725
470
+ [08:31:55] Ep 10975 | reward +2.557 | Phase 3/3 | Rolling mean: 1.794 / β€” | Episodes in phase: 8750
471
+ [08:31:58] Ep 11000 | reward +1.679 | Phase 3/3 | Rolling mean: 1.837 / β€” | Episodes in phase: 8775
472
+ [08:32:12] Ep 11025 | reward +1.643 | Phase 3/3 | Rolling mean: 1.874 / β€” | Episodes in phase: 8800
473
+ [08:32:13] Ep 11050 | reward +1.746 | Phase 3/3 | Rolling mean: 1.874 / β€” | Episodes in phase: 8825
474
+ [08:32:14] Ep 11075 | reward +2.555 | Phase 3/3 | Rolling mean: 1.932 / β€” | Episodes in phase: 8850
475
+ [08:32:15] Ep 11100 | reward +1.653 | Phase 3/3 | Rolling mean: 1.908 / β€” | Episodes in phase: 8875
476
+ [08:32:28] Ep 11125 | reward +0.585 | Phase 3/3 | Rolling mean: 1.906 / β€” | Episodes in phase: 8900
477
+ [08:32:30] Ep 11150 | reward -0.566 | Phase 3/3 | Rolling mean: 1.899 / β€” | Episodes in phase: 8925
478
+ [08:32:31] Ep 11175 | reward +3.657 | Phase 3/3 | Rolling mean: 1.977 / β€” | Episodes in phase: 8950
479
+ [08:32:32] Ep 11200 | reward +3.456 | Phase 3/3 | Rolling mean: 1.963 / β€” | Episodes in phase: 8975
480
+ [08:32:47] Ep 11225 | reward +3.456 | Phase 3/3 | Rolling mean: 2.016 / β€” | Episodes in phase: 9000
481
+ [08:32:50] Ep 11250 | reward +2.387 | Phase 3/3 | Rolling mean: 1.988 / β€” | Episodes in phase: 9025
482
+ [08:32:52] Ep 11275 | reward +3.258 | Phase 3/3 | Rolling mean: 1.953 / β€” | Episodes in phase: 9050
483
+ [08:32:53] Ep 11300 | reward +3.181 | Phase 3/3 | Rolling mean: 2.048 / β€” | Episodes in phase: 9075
484
+ [08:33:07] Ep 11325 | reward +2.864 | Phase 3/3 | Rolling mean: 2.096 / β€” | Episodes in phase: 9100
485
+ [08:33:08] Ep 11350 | reward +1.978 | Phase 3/3 | Rolling mean: 2.099 / β€” | Episodes in phase: 9125
486
+ [08:33:09] Ep 11375 | reward +0.465 | Phase 3/3 | Rolling mean: 2.069 / β€” | Episodes in phase: 9150
487
+ [08:33:10] Ep 11400 | reward +3.197 | Phase 3/3 | Rolling mean: 2.076 / β€” | Episodes in phase: 9175
488
+ [08:33:23] Ep 11425 | reward +1.710 | Phase 3/3 | Rolling mean: 2.003 / β€” | Episodes in phase: 9200
489
+ [08:33:24] Ep 11450 | reward +2.868 | Phase 3/3 | Rolling mean: 2.058 / β€” | Episodes in phase: 9225
490
+ [08:33:26] Ep 11475 | reward +1.904 | Phase 3/3 | Rolling mean: 2.068 / β€” | Episodes in phase: 9250
491
+ [08:33:28] Ep 11500 | reward +1.711 | Phase 3/3 | Rolling mean: 2.019 / β€” | Episodes in phase: 9275
492
+ [08:33:41] Ep 11525 | reward +1.751 | Phase 3/3 | Rolling mean: 2.053 / β€” | Episodes in phase: 9300
493
+ [08:33:42] Ep 11550 | reward +1.387 | Phase 3/3 | Rolling mean: 2.094 / β€” | Episodes in phase: 9325
494
+ [08:33:44] Ep 11575 | reward +1.035 | Phase 3/3 | Rolling mean: 2.112 / β€” | Episodes in phase: 9350
495
+ [08:33:45] Ep 11600 | reward +2.211 | Phase 3/3 | Rolling mean: 2.132 / β€” | Episodes in phase: 9375
496
+ [08:33:57] Ep 11625 | reward +3.292 | Phase 3/3 | Rolling mean: 2.128 / β€” | Episodes in phase: 9400
497
+ [08:33:58] Ep 11650 | reward +0.802 | Phase 3/3 | Rolling mean: 2.109 / β€” | Episodes in phase: 9425
498
+ [08:33:59] Ep 11675 | reward +3.068 | Phase 3/3 | Rolling mean: 2.110 / β€” | Episodes in phase: 9450
499
+ [08:34:00] Ep 11700 | reward +2.431 | Phase 3/3 | Rolling mean: 2.051 / β€” | Episodes in phase: 9475
500
+ [08:34:14] Ep 11725 | reward +1.524 | Phase 3/3 | Rolling mean: 1.986 / β€” | Episodes in phase: 9500
501
+ [08:34:17] Ep 11750 | reward +0.803 | Phase 3/3 | Rolling mean: 1.906 / β€” | Episodes in phase: 9525
502
+ [08:34:18] Ep 11775 | reward +1.523 | Phase 3/3 | Rolling mean: 1.945 / β€” | Episodes in phase: 9550
503
+ [08:34:19] Ep 11800 | reward +3.072 | Phase 3/3 | Rolling mean: 1.877 / β€” | Episodes in phase: 9575
504
+ [08:34:33] Ep 11825 | reward -0.071 | Phase 3/3 | Rolling mean: 1.839 / β€” | Episodes in phase: 9600
505
+ [08:34:35] Ep 11850 | reward +3.872 | Phase 3/3 | Rolling mean: 1.807 / β€” | Episodes in phase: 9625
506
+ [08:34:36] Ep 11875 | reward +0.767 | Phase 3/3 | Rolling mean: 1.798 / β€” | Episodes in phase: 9650
507
+ [08:34:37] Ep 11900 | reward +2.220 | Phase 3/3 | Rolling mean: 1.846 / β€” | Episodes in phase: 9675
508
+ [08:34:52] Ep 11925 | reward -0.071 | Phase 3/3 | Rolling mean: 1.883 / β€” | Episodes in phase: 9700
509
+ [08:34:54] Ep 11950 | reward +0.199 | Phase 3/3 | Rolling mean: 1.926 / β€” | Episodes in phase: 9725
510
+ [08:34:55] Ep 11975 | reward +0.804 | Phase 3/3 | Rolling mean: 1.835 / β€” | Episodes in phase: 9750
511
+ [08:34:58] Ep 12000 | reward +0.978 | Phase 3/3 | Rolling mean: 1.918 / β€” | Episodes in phase: 9775
512
+ [08:35:11] Ep 12025 | reward +3.038 | Phase 3/3 | Rolling mean: 2.018 / β€” | Episodes in phase: 9800
513
+ [08:35:13] Ep 12050 | reward +1.684 | Phase 3/3 | Rolling mean: 2.013 / β€” | Episodes in phase: 9825
514
+ [08:35:14] Ep 12075 | reward +2.189 | Phase 3/3 | Rolling mean: 2.020 / β€” | Episodes in phase: 9850
515
+ [08:35:16] Ep 12100 | reward +1.327 | Phase 3/3 | Rolling mean: 1.993 / β€” | Episodes in phase: 9875
516
+ [08:35:29] Ep 12125 | reward +1.684 | Phase 3/3 | Rolling mean: 1.987 / β€” | Episodes in phase: 9900
517
+ [08:35:31] Ep 12150 | reward +0.586 | Phase 3/3 | Rolling mean: 1.976 / β€” | Episodes in phase: 9925
518
+ [08:35:32] Ep 12175 | reward +2.470 | Phase 3/3 | Rolling mean: 2.025 / β€” | Episodes in phase: 9950
519
+ [08:35:34] Ep 12200 | reward +1.239 | Phase 3/3 | Rolling mean: 2.021 / β€” | Episodes in phase: 9975
520
+ [08:35:46] Ep 12225 | reward +2.182 | Phase 3/3 | Rolling mean: 1.990 / β€” | Episodes in phase: 10000
521
+ [08:35:49] Ep 12250 | reward +2.294 | Phase 3/3 | Rolling mean: 1.973 / β€” | Episodes in phase: 10025
522
+ [08:35:50] Ep 12275 | reward +3.076 | Phase 3/3 | Rolling mean: 1.959 / β€” | Episodes in phase: 10050
523
+ [08:35:52] Ep 12300 | reward +2.295 | Phase 3/3 | Rolling mean: 1.911 / β€” | Episodes in phase: 10075
524
+ [08:36:07] Ep 12325 | reward +2.958 | Phase 3/3 | Rolling mean: 1.919 / β€” | Episodes in phase: 10100
525
+ [08:36:09] Ep 12350 | reward +3.208 | Phase 3/3 | Rolling mean: 1.923 / β€” | Episodes in phase: 10125
526
+ [08:36:10] Ep 12375 | reward +2.612 | Phase 3/3 | Rolling mean: 1.879 / β€” | Episodes in phase: 10150
527
+ [08:36:11] Ep 12400 | reward +2.650 | Phase 3/3 | Rolling mean: 1.900 / β€” | Episodes in phase: 10175
528
+ [08:36:23] Ep 12425 | reward +3.601 | Phase 3/3 | Rolling mean: 1.897 / β€” | Episodes in phase: 10200
529
+ [08:36:25] Ep 12450 | reward +3.106 | Phase 3/3 | Rolling mean: 1.974 / β€” | Episodes in phase: 10225
530
+ [08:36:26] Ep 12475 | reward +1.523 | Phase 3/3 | Rolling mean: 1.975 / β€” | Episodes in phase: 10250
531
+ [08:36:27] Ep 12500 | reward +3.107 | Phase 3/3 | Rolling mean: 2.040 / β€” | Episodes in phase: 10275
532
+ [08:36:42] Ep 12525 | reward +2.442 | Phase 3/3 | Rolling mean: 2.050 / β€” | Episodes in phase: 10300
533
+ [08:36:43] Ep 12550 | reward +0.632 | Phase 3/3 | Rolling mean: 2.060 / β€” | Episodes in phase: 10325
534
+ [08:36:45] Ep 12575 | reward +1.523 | Phase 3/3 | Rolling mean: 2.061 / β€” | Episodes in phase: 10350
535
+ [08:36:46] Ep 12600 | reward +2.288 | Phase 3/3 | Rolling mean: 2.032 / β€” | Episodes in phase: 10375
536
+ [08:36:58] Ep 12625 | reward +1.717 | Phase 3/3 | Rolling mean: 1.994 / β€” | Episodes in phase: 10400
537
+ [08:36:59] Ep 12650 | reward +2.290 | Phase 3/3 | Rolling mean: 1.946 / β€” | Episodes in phase: 10425
538
+ [08:37:00] Ep 12675 | reward +2.676 | Phase 3/3 | Rolling mean: 1.909 / β€” | Episodes in phase: 10450
539
+ [08:37:02] Ep 12700 | reward +1.032 | Phase 3/3 | Rolling mean: 1.874 / β€” | Episodes in phase: 10475
540
+ [08:37:14] Ep 12725 | reward +1.160 | Phase 3/3 | Rolling mean: 1.875 / β€” | Episodes in phase: 10500
541
+ [08:37:15] Ep 12750 | reward +0.979 | Phase 3/3 | Rolling mean: 1.908 / β€” | Episodes in phase: 10525
542
+ [08:37:18] Ep 12775 | reward +1.161 | Phase 3/3 | Rolling mean: 1.910 / β€” | Episodes in phase: 10550
543
+ [08:37:19] Ep 12800 | reward +3.096 | Phase 3/3 | Rolling mean: 1.908 / β€” | Episodes in phase: 10575
544
+ [08:37:32] Ep 12825 | reward +3.052 | Phase 3/3 | Rolling mean: 1.965 / β€” | Episodes in phase: 10600
545
+ [08:37:33] Ep 12850 | reward +1.685 | Phase 3/3 | Rolling mean: 1.986 / β€” | Episodes in phase: 10625
546
+ [08:37:34] Ep 12875 | reward +2.534 | Phase 3/3 | Rolling mean: 2.051 / β€” | Episodes in phase: 10650
547
+ [08:37:35] Ep 12900 | reward +1.032 | Phase 3/3 | Rolling mean: 2.098 / β€” | Episodes in phase: 10675
548
+ [08:37:48] Ep 12925 | reward +2.050 | Phase 3/3 | Rolling mean: 2.111 / β€” | Episodes in phase: 10700
549
+ [08:37:50] Ep 12950 | reward +2.943 | Phase 3/3 | Rolling mean: 2.070 / β€” | Episodes in phase: 10725
550
+ [08:37:51] Ep 12975 | reward +1.045 | Phase 3/3 | Rolling mean: 2.084 / β€” | Episodes in phase: 10750
551
+ [08:37:52] Ep 13000 | reward +1.650 | Phase 3/3 | Rolling mean: 2.082 / β€” | Episodes in phase: 10775
552
+ [08:38:09] Ep 13025 | reward +2.441 | Phase 3/3 | Rolling mean: 2.071 / β€” | Episodes in phase: 10800
553
+ [08:38:10] Ep 13050 | reward +0.963 | Phase 3/3 | Rolling mean: 2.100 / β€” | Episodes in phase: 10825
554
+ [08:38:11] Ep 13075 | reward +1.686 | Phase 3/3 | Rolling mean: 2.095 / β€” | Episodes in phase: 10850
555
+ [08:38:13] Ep 13100 | reward +1.686 | Phase 3/3 | Rolling mean: 2.062 / β€” | Episodes in phase: 10875
556
+ [08:38:27] Ep 13125 | reward +3.282 | Phase 3/3 | Rolling mean: 2.038 / β€” | Episodes in phase: 10900
557
+ [08:38:28] Ep 13150 | reward +0.731 | Phase 3/3 | Rolling mean: 2.049 / β€” | Episodes in phase: 10925
558
+ [08:38:29] Ep 13175 | reward +2.536 | Phase 3/3 | Rolling mean: 2.073 / β€” | Episodes in phase: 10950
559
+ [08:38:30] Ep 13200 | reward +3.398 | Phase 3/3 | Rolling mean: 2.072 / β€” | Episodes in phase: 10975
560
+ [08:38:43] Ep 13225 | reward +1.518 | Phase 3/3 | Rolling mean: 2.098 / β€” | Episodes in phase: 11000
561
+ [08:38:44] Ep 13250 | reward +2.680 | Phase 3/3 | Rolling mean: 2.090 / β€” | Episodes in phase: 11025
562
+ [08:38:47] Ep 13275 | reward +1.597 | Phase 3/3 | Rolling mean: 2.038 / β€” | Episodes in phase: 11050
563
+ [08:38:48] Ep 13300 | reward +1.098 | Phase 3/3 | Rolling mean: 2.072 / β€” | Episodes in phase: 11075
564
+ [08:39:02] Ep 13325 | reward +2.680 | Phase 3/3 | Rolling mean: 2.070 / β€” | Episodes in phase: 11100
565
+ [08:39:03] Ep 13350 | reward +2.517 | Phase 3/3 | Rolling mean: 2.073 / β€” | Episodes in phase: 11125
566
+ [08:39:04] Ep 13375 | reward +0.413 | Phase 3/3 | Rolling mean: 2.023 / β€” | Episodes in phase: 11150
567
+ [08:39:05] Ep 13400 | reward +3.291 | Phase 3/3 | Rolling mean: 1.999 / β€” | Episodes in phase: 11175
568
+ [08:39:19] Periodic save at step 30,000 ...
vec_normalize_latest.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a19434ad2950b7e456a15d2a529b3534fa1a4ef989b9d6bb52ad9da04c5b2a9
3
  size 166596
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfbb011443883d8e840fa1ee5aabaa1811de6c62e2b52edccfc92451d8294c05
3
  size 166596