Instructions to use garvitsachdeva/spindleflow-rl with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- stable-baselines3
How to use garvitsachdeva/spindleflow-rl with stable-baselines3:
from huggingface_sb3 import load_from_hub checkpoint = load_from_hub( repo_id="garvitsachdeva/spindleflow-rl", filename="{MODEL FILENAME}.zip", ) - Notebooks
- Google Colab
- Kaggle
Checkpoint at step 30,000
Browse files- reward_curve.json +1 -1
- reward_curve.png +0 -0
- spindleflow_model_latest.zip +2 -2
- training_log.txt +101 -0
- vec_normalize_latest.pkl +1 -1
reward_curve.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"episodes": [0,
|
|
|
|
| 1 |
+
{"episodes": [0, 67, 134, 201, 268, 335, 402, 469, 536, 603, 670, 737, 804, 871, 938, 1005, 1072, 1139, 1206, 1273, 1340, 1407, 1474, 1541, 1608, 1675, 1742, 1809, 1876, 1943, 2010, 2077, 2144, 2211, 2278, 2345, 2412, 2479, 2546, 2613, 2680, 2747, 2814, 2881, 2948, 3015, 3082, 3149, 3216, 3283, 3350, 3417, 3484, 3551, 3618, 3685, 3752, 3819, 3886, 3953, 4020, 4087, 4154, 4221, 4288, 4355, 4422, 4489, 4556, 4623, 4690, 4757, 4824, 4891, 4958, 5025, 5092, 5159, 5226, 5293, 5360, 5427, 5494, 5561, 5628, 5695, 5762, 5829, 5896, 5963, 6030, 6097, 6164, 6231, 6298, 6365, 6432, 6499, 6566, 6633, 6700, 6767, 6834, 6901, 6968, 7035, 7102, 7169, 7236, 7303, 7370, 7437, 7504, 7571, 7638, 7705, 7772, 7839, 7906, 7973, 8040, 8107, 8174, 8241, 8308, 8375, 8442, 8509, 8576, 8643, 8710, 8777, 8844, 8911, 8978, 9045, 9112, 9179, 9246, 9313, 9380, 9447, 9514, 9581, 9648, 9715, 9782, 9849, 9916, 9983, 10050, 10117, 10184, 10251, 10318, 10385, 10452, 10519, 10586, 10653, 10720, 10787, 10854, 10921, 10988, 11055, 11122, 11189, 11256, 11323, 11390, 11457, 11524, 11591, 11658, 11725, 11792, 11859, 11926, 11993, 12060, 12127, 12194, 12261, 12328, 12395, 12462, 12529, 12596, 12663, 12730, 12797, 12864, 12931, 12998, 13065, 13132, 13199, 13266, 13333, 13400], "mean_rewards": [7.869385480880737, -0.4680378864902784, -0.48984615507501145, -0.5551410619040379, -0.4882915579094153, -0.4724334484614831, -0.4706858817982504, -0.49536512678172046, -0.488181437265121, -0.49218673040220723, -0.4815726703055059, -0.47754327278345865, -0.43784233579173726, -0.3844110888298712, -0.3559347061479154, -0.30412048705953304, -0.266127535926422, -0.1874622176040984, -0.14371091075008735, -0.02650309386219652, 0.03699350765202239, 0.11568798260192298, 0.1813001428686437, 0.234962336935236, 0.29998476978480104, 0.3494067042688368, 0.4188010052682893, 0.4982103691668488, 0.5493174741537327, 0.5519611505521989, 0.5843701104372269, 0.6528626296241695, 0.6805431383403456, 0.7341929864990648, 0.7834733703783795, 0.8403997419894993, 0.8835701591176855, 0.9107975078377638, 0.9910712995973492, 1.0697907954830996, 1.1305848434251078, 1.1324089907647075, 1.172262050650997, 1.2235095935128422, 1.2517143468368366, 1.2724046870820789, 1.3005815128812432, 1.3231664394510776, 1.3306386821592848, 1.3325763804993283, 1.3586343029331016, 1.40881654574436, 1.4413863711607908, 1.4488289435588133, 1.4570241300250582, 1.4681397577771262, 1.499341548685888, 1.5262605464494776, 1.5235170595468455, 1.513340130324477, 1.5307294156028504, 1.5475655648487667, 1.5575386175265262, 1.5641262251109376, 1.5792971486511564, 1.5843108974196574, 1.6019751663429598, 1.5715943494819948, 1.5931392893901157, 1.6346125779210805, 1.6342209757898392, 1.617359425582728, 1.6516810876938788, 1.6577659129061755, 1.7004437399976031, 1.7311268771364081, 1.739649844424992, 1.7626731381482645, 1.7597383664972204, 1.7712974552394458, 1.7726923163304273, 1.7791328078125108, 1.7655685633567155, 1.7530806338330702, 1.7465425723497727, 1.7384169118655943, 1.7192243574163946, 1.7400900246355464, 1.7620760225301118, 1.7782208430467326, 1.7714304287994007, 1.763625342698084, 1.7601928197433543, 1.7576644544521427, 1.7743939839113496, 1.769043635923286, 1.810667814309779, 1.8216552026374302, 1.8478464421820198, 1.8269981258918702, 1.8744575704903192, 1.8827178977458279, 1.866869720512893, 1.8740134621361253, 1.8397641184360727, 1.8410966690533401, 1.8426638447435835, 1.8283637543299847, 1.8131030868182625, 1.8027979614466207, 1.7552007339833966, 1.7917554398061926, 1.809832562660328, 1.833266973746071, 1.8741590295668997, 1.880611007449434, 1.8882735193283657, 1.8773927914049378, 1.865357443877396, 1.8789978755369139, 1.8915645213065935, 1.8952674455164622, 1.9155843285400243, 1.9085990070265584, 1.90080054640393, 1.9256198932583044, 1.9366444012710071, 1.9493767221422222, 1.9839627611148725, 1.9764564716675697, 2.002629030457077, 1.9820295862487651, 1.9856562615653577, 1.9859819096593474, 1.9698493608136363, 1.947345323234913, 1.9319047969357122, 1.9212369738192396, 1.915823275121511, 1.9219170818084552, 1.9338193936557564, 1.9390760462537227, 1.9549254322946799, 1.9771323770544638, 1.968752693505264, 1.9706327813298576, 1.979237121560172, 1.994255026763767, 1.9968812663644582, 2.0071281646386785, 1.9723110195435158, 1.9356860965843745, 1.9251148387156718, 1.9225462930269368, 1.9381719721641275, 1.9472862571208334, 1.9549440001736955, 1.943592506697141, 1.9607074977289554, 1.966358552660538, 1.9941627379199158, 2.017277606701966, 2.0028778048013227, 1.9798156342786526, 1.9463988085238573, 1.961345897422039, 1.9355557291343022, 1.9354226221486377, 1.9281403603033271, 1.9492224078499671, 1.9400939026771604, 1.957794354431826, 1.9947251642922874, 2.016812087167117, 2.0479436611309385, 2.025213644837225, 2.0251720844453738, 1.9995202234105298, 1.9965415502759112, 1.974851788492829, 1.9879056021842796, 1.9780626381897102, 1.9584395736040148, 1.9372385981398457, 1.9209191222658515, 1.9350956156822143, 1.960611907474231, 1.995446871384047, 1.9792517610698095, 1.9596557471280296, 1.9577739956190012, 1.9561633419888538, 1.967589806283026, 1.9925558521534272, 2.0040653944348117, 2.0273019113921604, 2.0106458478063964, 2.0137654255044515, 2.0376576196867973, 2.0654571994618025, 2.042709940440199], "raw_rewards": [7.869385480880737, -1.4724462032318115, -0.9430539831519127, -2.300199344754219, -0.8089542239904404, -1.4609763622283936, 1.1505420207977295, -1.4753345251083374, -1.0061301589012146, 1.4091460406780243, -1.7102899551391602, 0.8560748100280762, -1.2121876776218414, 0.8951746374368668, -0.3283916711807251, 1.4731740355491638, -1.5742558240890503, 1.3293038997799158, -1.9147660434246063, 2.114831119775772, 1.0735967457294464, 1.6181618869304657, 0.5473835095763206, -0.971550777554512, 2.4433728456497192, -0.5766929388046265, 0.6217614114284515, 0.1080636978149414, -1.6272716522216797, 0.801971822977066, 1.0068292915821075, 3.228931248188019, 0.9021086767315865, -0.1995646357536316, -1.6238511800765991, -1.621373176574707, 2.2451024651527405, 2.389804482460022, 0.174017071723938, -0.6571964621543884, 2.4821357131004333, 0.0889243632555008, -1.7433454543352127, 3.609505534172058, 2.2524542808532715, 2.5349577367305756, 1.3504652380943298, 2.934185355901718, 2.9824480414390564, -0.17685255408287048, 2.9204043447971344, 2.178996592760086, 1.329479992389679, 0.6736606955528259, 0.8348989896476269, 3.0606049299240112, 1.8705639839172363, 2.923104405403137, 1.4114362634718418, -0.1116487979888916, 2.0751985609531403, -1.1050611436367035, 0.5921751260757446, 0.9946736395359039, 1.8352702856063843, 0.796254113316536, 1.7449798882007599, 0.7606222331523895, 0.12062180042266846, 1.7189838290214539, 3.541672945022583, 1.5339390933513641, 2.767375946044922, 2.2297381162643433, -0.27031809091567993, 1.6640383005142212, 0.8177437596023083, 1.9806551933288574, 1.2750858068466187, 1.5373672842979431, 1.3882066011428833, 0.6665303558111191, 1.0681654214859009, 1.0687440931797028, 2.2572388648986816, 2.4432771801948547, 1.3739816546440125, 2.555147707462311, 1.5532113909721375, 1.5525203347206116, 1.0045947134494781, 2.4867050647735596, 2.0954394340515137, 1.6426711678504944, 2.5995242595672607, 0.4808852504938841, 1.495392918586731, 0.6842523217201233, 3.7635045051574707, 2.212641790509224, 3.5520986318588257, 1.2874490916728973, 1.5225331783294678, 2.5105870962142944, 2.647844910621643, 0.5050036013126373, 2.310435175895691, 2.4384663701057434, 2.8605822324752808, 2.404734432697296, 0.4821899086236954, 1.2641501389443874, 1.6644573211669922, 1.6970094442367554, 1.0590541064739227, -0.8185135126113892, 2.923989415168762, 0.46643319725990295, 1.3798049688339233, 1.27825129032135, 3.6883862614631653, 1.7710249423980713, 3.0534738898277283, 2.207607924938202, 1.0182976424694061, 2.965312957763672, 2.836131751537323, -1.1759849786758423, 0.8884927332401276, 1.3794639706611633, 1.0612505674362183, 1.510947585105896, 1.0037859827280045, 2.1822192072868347, 2.5573002099990845, 0.9873855113983154, 0.9950265288352966, 1.686583399772644, 2.587091416120529, 1.0622715055942535, 2.9554337859153748, 1.7556167542934418, 2.477279305458069, 1.6855851411819458, 1.325159728527069, 3.6202566027641296, 0.9852837026119232, 2.3055906891822815, 1.8989630937576294, 3.8421117663383484, 3.716716766357422, 3.286076307296753, 2.095924496650696, 1.8744736313819885, 1.2755839824676514, 2.1855685114860535, 2.951150119304657, -0.03335070610046387, 2.9794004559516907, 1.974601686000824, 1.5195317268371582, 2.607335329055786, 2.901742935180664, 2.9537888169288635, 2.556188404560089, 1.6433136463165283, 1.981132984161377, 0.7490701808128506, 2.8644319772720337, 1.9312551617622375, 3.218564510345459, 1.717534363269806, 1.7507363855838776, 0.8022308275103569, 0.9781382381916046, 0.9786528944969177, 2.802109658718109, 2.290560722351074, 0.803216639906168, 3.095746695995331, 3.292268693447113, 2.4694343209266663, 1.7080549597740173, 2.6128552556037903, 1.0149105489253998, 2.6496033668518066, 1.5982393622398376, 3.1078230142593384, 2.3775070905685425, 1.684832751750946, 2.6528014540672302, 2.9457955360412598, 2.1817689538002014, 1.707306683063507, 2.385865569114685, 2.928332030773163, 1.6798464059829712, 3.398195207118988, 1.6161009706556797, 4.029117822647095, 1.5490476191043854], "step": 30000}
|
reward_curve.png
CHANGED
|
|
spindleflow_model_latest.zip
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6f19b14daaf52c171779e401edb5c73ca27af9785c0cbb20e0ef84ceca08999
|
| 3 |
+
size 143819554
|
training_log.txt
CHANGED
|
@@ -465,3 +465,104 @@
|
|
| 465 |
[08:31:36] Ep 10900 | reward +2.468 | Phase 3/3 | Rolling mean: 1.867 / β | Episodes in phase: 8675
|
| 466 |
[08:31:50] Ep 10925 | reward +1.010 | Phase 3/3 | Rolling mean: 1.803 / β | Episodes in phase: 8700
|
| 467 |
[08:31:50] Periodic save at step 25,000 ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
[08:31:36] Ep 10900 | reward +2.468 | Phase 3/3 | Rolling mean: 1.867 / β | Episodes in phase: 8675
|
| 466 |
[08:31:50] Ep 10925 | reward +1.010 | Phase 3/3 | Rolling mean: 1.803 / β | Episodes in phase: 8700
|
| 467 |
[08:31:50] Periodic save at step 25,000 ...
|
| 468 |
+
[08:31:53] Periodic push done β 5 files at step 25,000
|
| 469 |
+
[08:31:54] Ep 10950 | reward -0.240 | Phase 3/3 | Rolling mean: 1.819 / β | Episodes in phase: 8725
|
| 470 |
+
[08:31:55] Ep 10975 | reward +2.557 | Phase 3/3 | Rolling mean: 1.794 / β | Episodes in phase: 8750
|
| 471 |
+
[08:31:58] Ep 11000 | reward +1.679 | Phase 3/3 | Rolling mean: 1.837 / β | Episodes in phase: 8775
|
| 472 |
+
[08:32:12] Ep 11025 | reward +1.643 | Phase 3/3 | Rolling mean: 1.874 / β | Episodes in phase: 8800
|
| 473 |
+
[08:32:13] Ep 11050 | reward +1.746 | Phase 3/3 | Rolling mean: 1.874 / β | Episodes in phase: 8825
|
| 474 |
+
[08:32:14] Ep 11075 | reward +2.555 | Phase 3/3 | Rolling mean: 1.932 / β | Episodes in phase: 8850
|
| 475 |
+
[08:32:15] Ep 11100 | reward +1.653 | Phase 3/3 | Rolling mean: 1.908 / β | Episodes in phase: 8875
|
| 476 |
+
[08:32:28] Ep 11125 | reward +0.585 | Phase 3/3 | Rolling mean: 1.906 / β | Episodes in phase: 8900
|
| 477 |
+
[08:32:30] Ep 11150 | reward -0.566 | Phase 3/3 | Rolling mean: 1.899 / β | Episodes in phase: 8925
|
| 478 |
+
[08:32:31] Ep 11175 | reward +3.657 | Phase 3/3 | Rolling mean: 1.977 / β | Episodes in phase: 8950
|
| 479 |
+
[08:32:32] Ep 11200 | reward +3.456 | Phase 3/3 | Rolling mean: 1.963 / β | Episodes in phase: 8975
|
| 480 |
+
[08:32:47] Ep 11225 | reward +3.456 | Phase 3/3 | Rolling mean: 2.016 / β | Episodes in phase: 9000
|
| 481 |
+
[08:32:50] Ep 11250 | reward +2.387 | Phase 3/3 | Rolling mean: 1.988 / β | Episodes in phase: 9025
|
| 482 |
+
[08:32:52] Ep 11275 | reward +3.258 | Phase 3/3 | Rolling mean: 1.953 / β | Episodes in phase: 9050
|
| 483 |
+
[08:32:53] Ep 11300 | reward +3.181 | Phase 3/3 | Rolling mean: 2.048 / β | Episodes in phase: 9075
|
| 484 |
+
[08:33:07] Ep 11325 | reward +2.864 | Phase 3/3 | Rolling mean: 2.096 / β | Episodes in phase: 9100
|
| 485 |
+
[08:33:08] Ep 11350 | reward +1.978 | Phase 3/3 | Rolling mean: 2.099 / β | Episodes in phase: 9125
|
| 486 |
+
[08:33:09] Ep 11375 | reward +0.465 | Phase 3/3 | Rolling mean: 2.069 / β | Episodes in phase: 9150
|
| 487 |
+
[08:33:10] Ep 11400 | reward +3.197 | Phase 3/3 | Rolling mean: 2.076 / β | Episodes in phase: 9175
|
| 488 |
+
[08:33:23] Ep 11425 | reward +1.710 | Phase 3/3 | Rolling mean: 2.003 / β | Episodes in phase: 9200
|
| 489 |
+
[08:33:24] Ep 11450 | reward +2.868 | Phase 3/3 | Rolling mean: 2.058 / β | Episodes in phase: 9225
|
| 490 |
+
[08:33:26] Ep 11475 | reward +1.904 | Phase 3/3 | Rolling mean: 2.068 / β | Episodes in phase: 9250
|
| 491 |
+
[08:33:28] Ep 11500 | reward +1.711 | Phase 3/3 | Rolling mean: 2.019 / β | Episodes in phase: 9275
|
| 492 |
+
[08:33:41] Ep 11525 | reward +1.751 | Phase 3/3 | Rolling mean: 2.053 / β | Episodes in phase: 9300
|
| 493 |
+
[08:33:42] Ep 11550 | reward +1.387 | Phase 3/3 | Rolling mean: 2.094 / β | Episodes in phase: 9325
|
| 494 |
+
[08:33:44] Ep 11575 | reward +1.035 | Phase 3/3 | Rolling mean: 2.112 / β | Episodes in phase: 9350
|
| 495 |
+
[08:33:45] Ep 11600 | reward +2.211 | Phase 3/3 | Rolling mean: 2.132 / β | Episodes in phase: 9375
|
| 496 |
+
[08:33:57] Ep 11625 | reward +3.292 | Phase 3/3 | Rolling mean: 2.128 / β | Episodes in phase: 9400
|
| 497 |
+
[08:33:58] Ep 11650 | reward +0.802 | Phase 3/3 | Rolling mean: 2.109 / β | Episodes in phase: 9425
|
| 498 |
+
[08:33:59] Ep 11675 | reward +3.068 | Phase 3/3 | Rolling mean: 2.110 / β | Episodes in phase: 9450
|
| 499 |
+
[08:34:00] Ep 11700 | reward +2.431 | Phase 3/3 | Rolling mean: 2.051 / β | Episodes in phase: 9475
|
| 500 |
+
[08:34:14] Ep 11725 | reward +1.524 | Phase 3/3 | Rolling mean: 1.986 / β | Episodes in phase: 9500
|
| 501 |
+
[08:34:17] Ep 11750 | reward +0.803 | Phase 3/3 | Rolling mean: 1.906 / β | Episodes in phase: 9525
|
| 502 |
+
[08:34:18] Ep 11775 | reward +1.523 | Phase 3/3 | Rolling mean: 1.945 / β | Episodes in phase: 9550
|
| 503 |
+
[08:34:19] Ep 11800 | reward +3.072 | Phase 3/3 | Rolling mean: 1.877 / β | Episodes in phase: 9575
|
| 504 |
+
[08:34:33] Ep 11825 | reward -0.071 | Phase 3/3 | Rolling mean: 1.839 / β | Episodes in phase: 9600
|
| 505 |
+
[08:34:35] Ep 11850 | reward +3.872 | Phase 3/3 | Rolling mean: 1.807 / β | Episodes in phase: 9625
|
| 506 |
+
[08:34:36] Ep 11875 | reward +0.767 | Phase 3/3 | Rolling mean: 1.798 / β | Episodes in phase: 9650
|
| 507 |
+
[08:34:37] Ep 11900 | reward +2.220 | Phase 3/3 | Rolling mean: 1.846 / β | Episodes in phase: 9675
|
| 508 |
+
[08:34:52] Ep 11925 | reward -0.071 | Phase 3/3 | Rolling mean: 1.883 / β | Episodes in phase: 9700
|
| 509 |
+
[08:34:54] Ep 11950 | reward +0.199 | Phase 3/3 | Rolling mean: 1.926 / β | Episodes in phase: 9725
|
| 510 |
+
[08:34:55] Ep 11975 | reward +0.804 | Phase 3/3 | Rolling mean: 1.835 / β | Episodes in phase: 9750
|
| 511 |
+
[08:34:58] Ep 12000 | reward +0.978 | Phase 3/3 | Rolling mean: 1.918 / β | Episodes in phase: 9775
|
| 512 |
+
[08:35:11] Ep 12025 | reward +3.038 | Phase 3/3 | Rolling mean: 2.018 / β | Episodes in phase: 9800
|
| 513 |
+
[08:35:13] Ep 12050 | reward +1.684 | Phase 3/3 | Rolling mean: 2.013 / β | Episodes in phase: 9825
|
| 514 |
+
[08:35:14] Ep 12075 | reward +2.189 | Phase 3/3 | Rolling mean: 2.020 / β | Episodes in phase: 9850
|
| 515 |
+
[08:35:16] Ep 12100 | reward +1.327 | Phase 3/3 | Rolling mean: 1.993 / β | Episodes in phase: 9875
|
| 516 |
+
[08:35:29] Ep 12125 | reward +1.684 | Phase 3/3 | Rolling mean: 1.987 / β | Episodes in phase: 9900
|
| 517 |
+
[08:35:31] Ep 12150 | reward +0.586 | Phase 3/3 | Rolling mean: 1.976 / β | Episodes in phase: 9925
|
| 518 |
+
[08:35:32] Ep 12175 | reward +2.470 | Phase 3/3 | Rolling mean: 2.025 / β | Episodes in phase: 9950
|
| 519 |
+
[08:35:34] Ep 12200 | reward +1.239 | Phase 3/3 | Rolling mean: 2.021 / β | Episodes in phase: 9975
|
| 520 |
+
[08:35:46] Ep 12225 | reward +2.182 | Phase 3/3 | Rolling mean: 1.990 / β | Episodes in phase: 10000
|
| 521 |
+
[08:35:49] Ep 12250 | reward +2.294 | Phase 3/3 | Rolling mean: 1.973 / β | Episodes in phase: 10025
|
| 522 |
+
[08:35:50] Ep 12275 | reward +3.076 | Phase 3/3 | Rolling mean: 1.959 / β | Episodes in phase: 10050
|
| 523 |
+
[08:35:52] Ep 12300 | reward +2.295 | Phase 3/3 | Rolling mean: 1.911 / β | Episodes in phase: 10075
|
| 524 |
+
[08:36:07] Ep 12325 | reward +2.958 | Phase 3/3 | Rolling mean: 1.919 / β | Episodes in phase: 10100
|
| 525 |
+
[08:36:09] Ep 12350 | reward +3.208 | Phase 3/3 | Rolling mean: 1.923 / β | Episodes in phase: 10125
|
| 526 |
+
[08:36:10] Ep 12375 | reward +2.612 | Phase 3/3 | Rolling mean: 1.879 / β | Episodes in phase: 10150
|
| 527 |
+
[08:36:11] Ep 12400 | reward +2.650 | Phase 3/3 | Rolling mean: 1.900 / β | Episodes in phase: 10175
|
| 528 |
+
[08:36:23] Ep 12425 | reward +3.601 | Phase 3/3 | Rolling mean: 1.897 / β | Episodes in phase: 10200
|
| 529 |
+
[08:36:25] Ep 12450 | reward +3.106 | Phase 3/3 | Rolling mean: 1.974 / β | Episodes in phase: 10225
|
| 530 |
+
[08:36:26] Ep 12475 | reward +1.523 | Phase 3/3 | Rolling mean: 1.975 / β | Episodes in phase: 10250
|
| 531 |
+
[08:36:27] Ep 12500 | reward +3.107 | Phase 3/3 | Rolling mean: 2.040 / β | Episodes in phase: 10275
|
| 532 |
+
[08:36:42] Ep 12525 | reward +2.442 | Phase 3/3 | Rolling mean: 2.050 / β | Episodes in phase: 10300
|
| 533 |
+
[08:36:43] Ep 12550 | reward +0.632 | Phase 3/3 | Rolling mean: 2.060 / β | Episodes in phase: 10325
|
| 534 |
+
[08:36:45] Ep 12575 | reward +1.523 | Phase 3/3 | Rolling mean: 2.061 / β | Episodes in phase: 10350
|
| 535 |
+
[08:36:46] Ep 12600 | reward +2.288 | Phase 3/3 | Rolling mean: 2.032 / β | Episodes in phase: 10375
|
| 536 |
+
[08:36:58] Ep 12625 | reward +1.717 | Phase 3/3 | Rolling mean: 1.994 / β | Episodes in phase: 10400
|
| 537 |
+
[08:36:59] Ep 12650 | reward +2.290 | Phase 3/3 | Rolling mean: 1.946 / β | Episodes in phase: 10425
|
| 538 |
+
[08:37:00] Ep 12675 | reward +2.676 | Phase 3/3 | Rolling mean: 1.909 / β | Episodes in phase: 10450
|
| 539 |
+
[08:37:02] Ep 12700 | reward +1.032 | Phase 3/3 | Rolling mean: 1.874 / β | Episodes in phase: 10475
|
| 540 |
+
[08:37:14] Ep 12725 | reward +1.160 | Phase 3/3 | Rolling mean: 1.875 / β | Episodes in phase: 10500
|
| 541 |
+
[08:37:15] Ep 12750 | reward +0.979 | Phase 3/3 | Rolling mean: 1.908 / β | Episodes in phase: 10525
|
| 542 |
+
[08:37:18] Ep 12775 | reward +1.161 | Phase 3/3 | Rolling mean: 1.910 / β | Episodes in phase: 10550
|
| 543 |
+
[08:37:19] Ep 12800 | reward +3.096 | Phase 3/3 | Rolling mean: 1.908 / β | Episodes in phase: 10575
|
| 544 |
+
[08:37:32] Ep 12825 | reward +3.052 | Phase 3/3 | Rolling mean: 1.965 / β | Episodes in phase: 10600
|
| 545 |
+
[08:37:33] Ep 12850 | reward +1.685 | Phase 3/3 | Rolling mean: 1.986 / β | Episodes in phase: 10625
|
| 546 |
+
[08:37:34] Ep 12875 | reward +2.534 | Phase 3/3 | Rolling mean: 2.051 / β | Episodes in phase: 10650
|
| 547 |
+
[08:37:35] Ep 12900 | reward +1.032 | Phase 3/3 | Rolling mean: 2.098 / β | Episodes in phase: 10675
|
| 548 |
+
[08:37:48] Ep 12925 | reward +2.050 | Phase 3/3 | Rolling mean: 2.111 / β | Episodes in phase: 10700
|
| 549 |
+
[08:37:50] Ep 12950 | reward +2.943 | Phase 3/3 | Rolling mean: 2.070 / β | Episodes in phase: 10725
|
| 550 |
+
[08:37:51] Ep 12975 | reward +1.045 | Phase 3/3 | Rolling mean: 2.084 / β | Episodes in phase: 10750
|
| 551 |
+
[08:37:52] Ep 13000 | reward +1.650 | Phase 3/3 | Rolling mean: 2.082 / β | Episodes in phase: 10775
|
| 552 |
+
[08:38:09] Ep 13025 | reward +2.441 | Phase 3/3 | Rolling mean: 2.071 / β | Episodes in phase: 10800
|
| 553 |
+
[08:38:10] Ep 13050 | reward +0.963 | Phase 3/3 | Rolling mean: 2.100 / β | Episodes in phase: 10825
|
| 554 |
+
[08:38:11] Ep 13075 | reward +1.686 | Phase 3/3 | Rolling mean: 2.095 / β | Episodes in phase: 10850
|
| 555 |
+
[08:38:13] Ep 13100 | reward +1.686 | Phase 3/3 | Rolling mean: 2.062 / β | Episodes in phase: 10875
|
| 556 |
+
[08:38:27] Ep 13125 | reward +3.282 | Phase 3/3 | Rolling mean: 2.038 / β | Episodes in phase: 10900
|
| 557 |
+
[08:38:28] Ep 13150 | reward +0.731 | Phase 3/3 | Rolling mean: 2.049 / β | Episodes in phase: 10925
|
| 558 |
+
[08:38:29] Ep 13175 | reward +2.536 | Phase 3/3 | Rolling mean: 2.073 / β | Episodes in phase: 10950
|
| 559 |
+
[08:38:30] Ep 13200 | reward +3.398 | Phase 3/3 | Rolling mean: 2.072 / β | Episodes in phase: 10975
|
| 560 |
+
[08:38:43] Ep 13225 | reward +1.518 | Phase 3/3 | Rolling mean: 2.098 / β | Episodes in phase: 11000
|
| 561 |
+
[08:38:44] Ep 13250 | reward +2.680 | Phase 3/3 | Rolling mean: 2.090 / β | Episodes in phase: 11025
|
| 562 |
+
[08:38:47] Ep 13275 | reward +1.597 | Phase 3/3 | Rolling mean: 2.038 / β | Episodes in phase: 11050
|
| 563 |
+
[08:38:48] Ep 13300 | reward +1.098 | Phase 3/3 | Rolling mean: 2.072 / β | Episodes in phase: 11075
|
| 564 |
+
[08:39:02] Ep 13325 | reward +2.680 | Phase 3/3 | Rolling mean: 2.070 / β | Episodes in phase: 11100
|
| 565 |
+
[08:39:03] Ep 13350 | reward +2.517 | Phase 3/3 | Rolling mean: 2.073 / β | Episodes in phase: 11125
|
| 566 |
+
[08:39:04] Ep 13375 | reward +0.413 | Phase 3/3 | Rolling mean: 2.023 / β | Episodes in phase: 11150
|
| 567 |
+
[08:39:05] Ep 13400 | reward +3.291 | Phase 3/3 | Rolling mean: 1.999 / β | Episodes in phase: 11175
|
| 568 |
+
[08:39:19] Periodic save at step 30,000 ...
|
vec_normalize_latest.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 166596
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dfbb011443883d8e840fa1ee5aabaa1811de6c62e2b52edccfc92451d8294c05
|
| 3 |
size 166596
|