Instructions to use garvitsachdeva/spindleflow-rl with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- stable-baselines3
How to use garvitsachdeva/spindleflow-rl with stable-baselines3:
from huggingface_sb3 import load_from_hub checkpoint = load_from_hub( repo_id="garvitsachdeva/spindleflow-rl", filename="{MODEL FILENAME}.zip", ) - Notebooks
- Google Colab
- Kaggle
Checkpoint at step 10,000
Browse files- reward_curve.json +1 -1
- reward_curve.png +0 -0
- spindleflow_model_latest.zip +2 -2
- training_log.txt +86 -0
- vec_normalize_latest.pkl +1 -1
reward_curve.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"episodes": [0, 7, 14, 21, 28, 35, 42, 49, 56, 63, 70, 77, 84, 91, 98, 105, 112, 119, 126, 133, 140, 147, 154, 161, 168, 175, 182, 189, 196, 203, 210, 217, 224, 231, 238, 245, 252, 259, 266, 273, 280, 287, 294, 301, 308, 315, 322, 329, 336, 343, 350, 357, 364, 371, 378, 385, 392, 399, 406, 413, 420, 427, 434, 441, 448, 455, 462, 469, 476, 483, 490, 497, 504, 511, 518, 525, 532, 539, 546, 553, 560, 567, 574, 581, 588, 595, 602, 609, 616, 623, 630, 637, 644, 651, 658, 665, 672, 679, 686, 693, 700, 707, 714, 721, 728, 735, 742, 749, 756, 763, 770, 777, 784, 791, 798, 805, 812, 819, 826, 833, 840, 847, 854, 861, 868, 875, 882, 889, 896, 903, 910, 917, 924, 931, 938, 945, 952, 959, 966, 973, 980, 987, 994, 1001, 1008, 1015, 1022, 1029, 1036, 1043, 1050, 1057, 1064, 1071, 1078, 1085, 1092, 1099, 1106, 1113, 1120, 1127, 1134, 1141, 1148, 1155, 1162, 1169, 1176, 1183, 1190, 1197, 1204, 1211, 1218, 1225, 1232, 1239, 1246, 1253, 1260, 1267, 1274, 1281, 1288, 1295, 1302, 1309, 1316, 1323, 1330, 1337, 1344, 1351, 1358, 1365, 1372, 1379, 1386, 1393, 1400, 1407, 1414, 1421, 1428, 1435, 1442, 1449, 1456, 1463, 1470, 1477, 1484, 1491, 1498, 1505, 1512], "mean_rewards": [2.011087656021118, -0.005159212742000818, -0.1737367016573747, -0.16313832066953182, -0.3787871667280279, -0.48352695473780233, -0.5125730392024961, -0.5108446795493364, -0.591070648936326, -0.548012486717198, -0.5592205684474656, -0.648504812123352, -0.6919027066936618, -0.6955952520825361, -0.7180035078015766, -0.6574326691854941, -0.5642441839077755, -0.4426701495816049, -0.42057076522982434, -0.39051416334941197, -0.34610133123044906, -0.37051013518909093, -0.34348269900012957, -0.4140826832797182, -0.3751730122358391, -0.4399783388270359, -0.47836029264879854, -0.49714391801114144, -0.5542931245443853, -0.5445789469798145, -0.5709554771157471, -0.6067243746334785, -0.6017522437890109, -0.6369883727870489, -0.5803498540465769, -0.5577858655076278, -0.5325302795359963, -0.48865976008145434, -0.5554399732990485, -0.5570450848164527, -0.5919859797056568, -0.5692832094960307, -0.5678086980295024, -0.5382031025364995, -0.47720208489581156, -0.3598009528298127, -0.40263426323470314, -0.362639883925256, -0.43668582216885526, -0.44475414255928053, -0.4044125404110865, -0.38040371361727776, -0.4100033921728793, -0.4820523009586491, -0.44493751438628687, -0.43020405066444684, -0.39207222337197317, -0.35661065328474106, -0.3769962504800213, -0.38675586683185476, -0.38980928779040513, -0.37443856669491843, -0.4122140955081896, -0.39305691680821936, -0.2994311030856089, -0.2579036255514151, -0.28448272915557027, -0.43960564032098964, -0.4300747401767263, -0.4077215094462429, -0.3728640738041385, -0.27169732158807547, -0.2620353500456794, -0.18189748504052036, -0.1810223087668419, -0.20924439489547358, -0.2289466186622648, -0.2507834549209005, -0.15905087524534842, -0.13111531837402204, -0.15006807980764852, -0.18575034001351973, -0.2604610369491734, -0.2928624492089607, -0.3075465619269955, -0.24049769925247683, -0.22923651928278177, -0.2999157253897896, -0.31141990337422804, -0.33937625506108526, -0.42456708659761044, -0.43582449468636986, -0.5326400213678809, -0.5070261559939306, -0.4317533519658211, -0.33543822365371806, -0.44631047938999374, -0.3564543535834865, -0.26606688962170955, -0.21719291747400635, -0.27412202130807073, -0.2125598310345882, -0.07335185637011339, -0.0011550859106998694, 0.018151712015663322, -0.0774569240349688, -0.14214420813674988, -0.11551256982707664, -0.16595394688805468, -0.23713979571077384, -0.16795892655653388, -0.06744629551509493, -0.11995828043865531, -0.12436404804650106, -0.14944038054856815, -0.23180527205726034, -0.2659248520963286, -0.2523376572583067, -0.19898790969072203, -0.20628121068799182, -0.16211953068054036, -0.1457904805490551, -0.16110725120616784, -0.200850348215011, -0.22606607534506015, -0.2517521789505784, -0.17527172132395208, -0.07480273323476706, -0.0609145848889296, -0.04951209085993469, -0.021120717859287794, -0.016507455736006562, -0.032915668678469956, -0.07218090113325927, -0.044014879569754396, 0.02370264898912099, 0.1031873529887219, 0.10730368968736577, 0.0996610147903036, 0.050899871637260445, 0.09100137392402087, 0.015319903932609841, -0.10438356234839088, -0.0165395066632252, -0.004070776514708996, -0.07922083899182708, -0.13058845507667252, -0.12486932470806335, -0.09150928616719811, -0.1680179141755951, -0.13921746273378008, -0.09083811982877944, -0.03548207682998557, -0.04591693949738616, 0.0030767117675982022, 0.07675606031951152, 0.10681556083114915, 0.07763870174408351, 0.11034342362300346, 0.06843050755560398, 0.06757143875093836, 0.01234445064083526, 0.02761492897805415, -0.03187786944602665, -0.032681759593910294, -0.16471537766291908, -0.24135849561150136, -0.23454911743388757, -0.1854007679503411, -0.18656958046516306, -0.18510495003704963, -0.10837438682976522, -0.030741750409728603, -0.02555339254046741, 0.013502296648527446, 0.19569833522760555, 0.3064040861729729, 0.29516894110527475, 0.33559935329187857, 0.3795126823120211, 0.3801108545280601, 0.3302895774280554, 0.369674541350258, 0.31977684952710805, 0.29791844812662976, 0.30011620813686596, 0.308363941056948, 0.2674849516663112, 0.3455620188462107, 0.34136716354834407, 0.3282235764751309, 0.4036587957096727, 0.4394211591662545, 0.5384622680905619, 0.5320534381623331, 0.47804044314513083, 0.5449790290899967, 0.41111522383595767, 0.5577673092484474, 0.5171146543794557, 0.5383412212280458, 0.5662813418740896, 0.48640087474823784, 0.5216721576932621, 0.5288327510566696, 0.5378777498232299, 0.6355279753425795, 0.6394623351087304, 0.8364699263076641, 0.7872928960101777, 0.753283737594948, 0.8241279435981261, 0.8046827170213586, 0.8140687913584866, 0.7523131846499286, 0.6723473424974241, 0.6993183063735303], "raw_rewards": [2.011087656021118, -1.6073665618896484, 0.7498452365398407, 0.2025524079799652, -1.6622264385223389, -1.5905271768569946, -0.38584309816360474, 1.0968736708164215, -1.2686316668987274, -1.5422813892364502, -1.5154712200164795, -1.494507908821106, -1.4630639553070068, -0.7266715168952942, 1.3441461846232414, -1.4742072820663452, 1.3427048027515411, -1.486713171005249, 0.37279677391052246, -0.08876031637191772, 1.0587169826030731, -1.384795069694519, -1.5081825256347656, -1.4929274320602417, 2.797714591026306, -1.47134530544281, 1.7549763023853302, -1.4590506553649902, -0.3829669952392578, 0.158383309841156, -1.9421470761299133, -1.2414164543151855, -1.3137173354625702, -1.481404185295105, -0.9551828801631927, -0.2936665415763855, -2.085797905921936, 1.0033707320690155, 0.8391484022140503, 0.653633639216423, -1.8033610582351685, 0.8675942420959473, -1.5180444717407227, -1.5836696326732635, 0.6177796721458435, 0.7355930209159851, 0.877733588218689, 1.3888882398605347, 0.3279541917145252, -0.8971331119537354, 1.4678781032562256, -0.8665207028388977, -1.5461901426315308, -1.3819766342639923, -0.6615731120109558, -1.552343487739563, -2.1833449825644493, -1.5418636798858643, 2.6514883637428284, -3.489000581204891, -1.532758355140686, -1.525796890258789, -2.4441225975751877, -2.568617880344391, -2.0896497666835785, 0.02511417865753174, 0.9480623602867126, 1.7094351649284363, 1.4755454063415527, 1.8405121266841888, 0.42442914843559265, -0.7997810021042824, 1.0508716404438019, 1.2704868093132973, -3.3804259300231934, 0.980677954852581, -1.4998018816113472, -1.544227957725525, -3.4981040954589844, 1.5663873851299286, -0.6641147136688232, 0.27374209463596344, -2.0531827434897423, -1.657704919576645, -1.8326283991336823, 1.6819107234477997, -0.3654499650001526, -0.6246205270290375, 1.4166848957538605, 2.029248148202896, -0.29613277316093445, -0.588357999920845, -0.25787171721458435, 0.7583343088626862, 0.562331348657608, -1.557992935180664, -1.1823661029338837, 0.3907746523618698, -0.4797786474227905, -1.5714150667190552, -0.3134910464286804, 0.8237113058567047, -0.9736100733280182, -1.211975336074829, -1.5755164623260498, -0.9478763937950134, -1.5776058435440063, 2.1475477814674377, -0.8160175681114197, -3.0692324340343475, -0.7797359824180603, 1.4694251716136932, -0.8319140374660492, 2.2456377148628235, -2.098494417965412, -1.287732794880867, -1.4832541942596436, -1.5874537229537964, -1.588030457496643, -1.2483505010604858, -0.2727857828140259, 0.8989545307122171, -1.5966110229492188, 2.2053601145744324, -0.717426210641861, -2.831665724515915, 1.736989140510559, -1.188669204711914, -0.19181019067764282, -1.1811168491840363, -0.4813724160194397, 0.6415759027004242, 1.293091356754303, -0.0336492657661438, -0.44722092151641846, -1.9554425179958344, 0.1328943967819214, -0.7047230452299118, 1.6609033942222595, -1.382057100534439, 0.07072758674621582, 0.12381599843502045, 0.25281383097171783, 1.180851012468338, -0.23011904954910278, -1.0367010533809662, -0.6570718288421631, -1.613729476928711, 2.0679474472999573, -1.5930011868476868, -1.0900622457265854, -0.1502295732498169, 0.48140305280685425, 2.2967147827148438, 2.0166079998016357, 1.2933931052684784, 1.0831645131111145, -1.3386335968971252, 0.8910820409655571, -0.5921838283538818, -0.7030514478683472, -1.617857813835144, 0.7024455070495605, -1.753804087638855, -0.3411940596997738, -1.7523786127567291, -0.8349017053842545, -1.6273200511932373, 1.2151376754045486, -1.0690277218818665, -1.3782235980033875, 1.008091401308775, 2.0703495740890503, 0.4502451419830322, -1.6283951997756958, 0.477810263633728, 0.9067497551441193, 1.4728646278381348, -1.6322773694992065, 1.5017837882041931, 1.5531878471374512, 1.020134299993515, 0.8596443384885788, -0.8000854253768921, 1.0358693972229958, -1.3619660139083862, 1.6163530200719833, -1.6371392011642456, 0.16331470012664795, 0.5777678936719894, 0.47991079092025757, 0.7756211757659912, 1.0058870911598206, 1.6384191513061523, -1.2334475964307785, -1.3401591777801514, 0.2782462537288666, 0.10824376344680786, 0.5431969165802002, 0.7870588600635529, -0.8065369129180908, 0.5831526145339012, 2.441283255815506, 2.3787267208099365, 0.2636760473251343, 2.3850573301315308, 2.1936150193214417, 0.1838168501853943, 0.938224121928215, 2.5081106424331665, 0.312656968832016, 3.3604268431663513, 1.5672679245471954, 1.7494555711746216, -0.3077901601791382, 1.4622975587844849, -2.4785069823265076], "step": 5000}
|
|
|
|
| 1 |
+
{"episodes": [0, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255, 272, 289, 306, 323, 340, 357, 374, 391, 408, 425, 442, 459, 476, 493, 510, 527, 544, 561, 578, 595, 612, 629, 646, 663, 680, 697, 714, 731, 748, 765, 782, 799, 816, 833, 850, 867, 884, 901, 918, 935, 952, 969, 986, 1003, 1020, 1037, 1054, 1071, 1088, 1105, 1122, 1139, 1156, 1173, 1190, 1207, 1224, 1241, 1258, 1275, 1292, 1309, 1326, 1343, 1360, 1377, 1394, 1411, 1428, 1445, 1462, 1479, 1496, 1513, 1530, 1547, 1564, 1581, 1598, 1615, 1632, 1649, 1666, 1683, 1700, 1717, 1734, 1751, 1768, 1785, 1802, 1819, 1836, 1853, 1870, 1887, 1904, 1921, 1938, 1955, 1972, 1989, 2006, 2023, 2040, 2057, 2074, 2091, 2108, 2125, 2142, 2159, 2176, 2193, 2210, 2227, 2244, 2261, 2278, 2295, 2312, 2329, 2346, 2363, 2380, 2397, 2414, 2431, 2448, 2465, 2482, 2499, 2516, 2533, 2550, 2567, 2584, 2601, 2618, 2635, 2652, 2669, 2686, 2703, 2720, 2737, 2754, 2771, 2788, 2805, 2822, 2839, 2856, 2873, 2890, 2907, 2924, 2941, 2958, 2975, 2992, 3009, 3026, 3043, 3060, 3077, 3094, 3111, 3128, 3145, 3162, 3179, 3196, 3213, 3230, 3247, 3264, 3281, 3298, 3315, 3332, 3349, 3366, 3383, 3400, 3417, 3434, 3451, 3468, 3485, 3502, 3519, 3536, 3553, 3570, 3587], "mean_rewards": [2.011087656021118, -0.2955534774810076, -0.4518983769629683, -0.5130270347715571, -0.5884510348348514, -0.6146727343371441, -0.5832104569716939, -0.4769364972909292, -0.45907663548514793, -0.47041571469275983, -0.5141931981371161, -0.5323584432196286, -0.5486976694729593, -0.5091634937458568, -0.5360742221896847, -0.49734610350181657, -0.47413736968818637, -0.5107768824117052, -0.5592596196466022, -0.5413830657800038, -0.4893766444383396, -0.45496046719037825, -0.5066750696549813, -0.43206344383458295, -0.4036316062220269, -0.4232319062575698, -0.42188012678590087, -0.3698560318288704, -0.37320280266511774, -0.3746683648166557, -0.34388839825987816, -0.31580650557039514, -0.2758511932463282, -0.24050097487245997, -0.32197332010707924, -0.26467262854178747, -0.2839709573942754, -0.2670445270629393, -0.34365008569632965, -0.28455998617120914, -0.2997434325205783, -0.30469939913600685, -0.242857457148946, -0.25027625085785987, -0.27829118674207065, -0.24337980265004766, -0.21990590929571124, -0.16854962930083275, -0.20288090908692943, -0.1292373951524496, -0.12241048797602869, -0.1614725338217492, -0.14964828973946473, -0.144838829861126, -0.11243235824836625, -0.10747746042907239, -0.06723430049088266, -0.07589628394279214, -0.0374208166781399, -0.03335582692590025, -0.021998412451810306, -0.02321966322294126, -0.006664830971405738, -0.027873310069036154, 0.00842238911944959, -0.016705316863954066, -0.04267268580281072, -0.04158623887019025, -0.11595900414718521, -0.05244716699752543, -0.054362980876531866, -0.003884143920408355, 0.06428486878673235, 0.09581019547250536, 0.08221854733096229, 0.07250368547004958, 0.12679913942184712, 0.18743467438552114, 0.23325088487731085, 0.33666753431575164, 0.40760328796588713, 0.41092346865269874, 0.4055561059878932, 0.4046554202731285, 0.48118276003127297, 0.5378358737772537, 0.6073094252393477, 0.6417163390976687, 0.6189819134668344, 0.6163609216714071, 0.5998688423902624, 0.6330475635102226, 0.689348201877955, 0.6989480689374937, 0.7731089792731736, 0.7278057174343202, 0.7121029947160019, 0.6729883205637128, 0.6418123173035888, 0.6852266083600828, 0.7023001437204787, 0.752039143218038, 0.7627319326650144, 0.7618968613264668, 0.6623063287206201, 0.7456497497882487, 0.7275835569559906, 0.6967754760989919, 0.8090677741242366, 0.8169394264152895, 0.919980839220807, 0.8789278343785554, 0.9529377199347234, 0.9400865443930444, 1.0124707058537752, 1.058885036641732, 1.0660031672204948, 1.1172220833754787, 1.065370492776856, 1.0548661435643831, 1.0662114470369286, 1.0407538239611314, 1.05513615018119, 1.0300908629860108, 1.0446762847998696, 0.9896005688209294, 0.9840956180847974, 0.9738223666402822, 0.9963563766615051, 1.1109895625332784, 1.0624168494793897, 1.0680639948980468, 1.0884154788942801, 1.07744239454882, 1.1098082588985563, 1.0813470534773337, 1.125043768932422, 1.064650530512962, 1.08939098291513, 1.1070979656755096, 1.1352286228599648, 1.1627384287512137, 1.1512736585301657, 1.1557260794875523, 1.21115065679575, 1.208054312158169, 1.2400472414835046, 1.2546729096987594, 1.2588966596664655, 1.2651438022446302, 1.1969161996203992, 1.1765289112304649, 1.2188173211904036, 1.2705786953783698, 1.283640646520588, 1.2604090388502098, 1.2808126877217243, 1.3434747755786196, 1.286310778832477, 1.3189314196196695, 1.3499132139194343, 1.3817899861062566, 1.3618891238959299, 1.374397218061818, 1.321923399137126, 1.2750914818917711, 1.2636046069156792, 1.2587104220150245, 1.2528403878832857, 1.3148597282667955, 1.3641217712726859, 1.3989715475795999, 1.4416805575084355, 1.4428113733314805, 1.5163850539881323, 1.582816757261753, 1.662100506035818, 1.710362924511234, 1.6252859222806162, 1.6343649457726213, 1.5908087775628599, 1.6567976927281254, 1.5895861686420782, 1.5422962702133292, 1.576456757298082, 1.4782307463227577, 1.449665484297293, 1.4552131047568562, 1.416624917301346, 1.457598918435057, 1.4965214410867096, 1.5269871315782237, 1.4251934912229545, 1.4937172901259652, 1.5414583398156134, 1.5439956232721872, 1.511578370628154, 1.4506849671377697, 1.538774687747678, 1.573884256559217, 1.5299264794836442, 1.4935364206011097, 1.476107554582672, 1.5306899834320777, 1.4757408027045635, 1.462761623890967, 1.493960052400103, 1.5705851888516917, 1.5299893841794174, 1.5680797027656808, 1.5726150192397956, 1.5512120474673188], "raw_rewards": [2.011087656021118, -1.5471627712249756, -1.6126691102981567, 0.4004072844982147, -0.08317989110946655, -1.224005103111267, 0.6020284295082092, -1.486713171005249, 2.19078665971756, 0.341314435005188, -1.4946295022964478, 2.557842969894409, -1.4711508750915527, -1.3040433824062347, -0.9551828801631927, -1.062305599451065, -0.3481462001800537, -1.5207006931304932, -0.4579683244228363, -1.5436073541641235, -1.5383447408676147, -0.8665207028388977, -1.5446007251739502, 0.5839693248271942, 2.29667067527771, -0.7865875363349915, -1.1456421315670013, 0.9137205183506012, 1.4755454063415527, 0.9054943472146988, -1.5415323972702026, 1.3973109722137451, 0.15189069882035255, -0.08075442910194397, -0.9792364537715912, 1.6819107234477997, -1.5608779191970825, 1.2009978294372559, -1.5563358068466187, -0.7095421552658081, 0.2122175097465515, 0.8033093363046646, -0.9736100733280182, 1.0859556198120117, -0.08152272552251816, 1.8443847298622131, -1.584234356880188, -0.0956476628780365, -0.4221470355987549, -1.2483505010604858, 0.6453126072883606, -0.5324428081512451, 1.2303812503814697, 1.6745995879173279, -0.06619137525558472, -1.605749249458313, 0.1328943967819214, -2.3350325226783752, -1.1350712180137634, 1.1782336831092834, 1.6558053493499756, -1.0271587371826172, 0.47524410486221313, 2.2967147827148438, 0.1643688576295972, 0.6760996878147125, -0.6354265213012695, 0.48033928871154785, -1.4070106446743011, -1.0077587962150574, -1.3782235980033875, -1.6286134719848633, 1.1489348113536835, 0.664410799741745, 2.142772227525711, -1.184206247329712, 0.4014209508895874, -1.6371392011642456, -1.6380943059921265, -0.27162760496139526, -0.7245838344097137, -0.8357173800468445, -1.0230455100536346, -1.644477128982544, 0.2636760473251343, -1.229697048664093, 0.2970864176750183, 0.4837959408760071, 0.16495420038700104, 0.5941878855228424, 2.01269268989563, 0.473050594329834, 3.1993696093559265, 0.0764971524477005, 1.210176169872284, -0.19965451955795288, 3.8576712608337402, 1.4280883893370628, 1.0428679287433624, -1.6568362712860107, 0.49463270604610443, 0.8363811671733856, 0.32087884843349457, -0.6993918418884277, 2.251273363828659, 1.3570592142641544, -0.36918799579143524, 1.0084454417228699, 1.038003921508789, -0.17219772189855576, 1.493634045124054, 2.4674428701400757, 0.0033206045627593994, 2.040954351425171, 3.4656790494918823, 1.2375374846160412, 2.4681331515312195, 3.211431860923767, 0.6175565421581268, -0.09031233191490173, 0.389397032558918, 0.9961078143678606, 1.6544716358184814, 1.1933340430259705, 0.6135563850402832, -0.5938667058944702, 2.0285049080848694, 0.5292582809925079, 1.6015831232070923, 2.7143699526786804, 0.16865530610084534, 1.178409457206726, 0.4636725187301636, 1.8611661195755005, 1.9806989431381226, 1.6743857860565186, -0.6647513210773468, 4.274432420730591, 1.024116426706314, -0.8673376441001892, -1.3124529868364334, 3.1190399825572968, 0.6326296627521515, 2.4692362546920776, 2.093276470899582, 2.9282543063163757, 1.8036649823188782, 0.6122289001941681, 2.3818284273147583, 2.0692192912101746, -1.513732671737671, 1.2522088885307312, 1.518700361251831, 1.8946461379528046, 2.2273122668266296, -0.18672674894332886, 2.5309237241744995, 1.5318231582641602, 1.720530927181244, 0.3675469756126404, -1.27837023884058, 2.3678015172481537, 1.194051444530487, 1.427152693271637, 1.454512745141983, 2.6037117540836334, 1.0772528052330017, -0.5957517325878143, 3.805203139781952, 1.8656059503555298, 1.2644962072372437, 2.5997721552848816, 0.015331029891967773, 0.5913978815078735, 1.9545496106147766, 0.06699442863464355, 0.8849751353263855, 1.4087436497211456, -0.8760052919387817, -0.18333446979522705, 3.1888164281845093, 1.8018192052841187, 0.8150603175163269, 0.6160659193992615, 3.9308770895004272, -0.19315111637115479, 2.2416495084762573, 1.264603167772293, -0.06790724396705627, 3.5337868332862854, 1.2718881778419018, 2.6125504076480865, 1.6027227640151978, 0.5309821367263794, 0.3643292784690857, -0.08935242891311646, 3.717008888721466, 1.0185938812792301, 0.9128011465072632, 0.8284586071968079, 3.682827889919281, 2.2481080889701843, 0.4784552324563265, 0.08008977770805359, 0.4824959528632462, 0.4021200016140938, 1.6239447593688965, -0.8015319854021072, 1.2684380412101746, 1.7758225500583649, 2.997434377670288, -0.8897154331207275], "step": 10000}
|
reward_curve.png
CHANGED
|
|
spindleflow_model_latest.zip
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d1f21b851297b76cb0b8b4c5f73db8582c98f42ef7d03028af42b44f73455df
|
| 3 |
+
size 143819919
|
training_log.txt
CHANGED
|
@@ -80,3 +80,89 @@
|
|
| 80 |
[11:25:54] Ep 1478 | reward +3.360 | Phase 2/3 | Rolling mean: 0.000 / 1.0 | Episodes in phase: 0
|
| 81 |
[11:25:59] Ep 1500 | reward -0.227 | Phase 2/3 | Rolling mean: 0.121 / 1.0 | Episodes in phase: 22
|
| 82 |
[11:26:17] Periodic save at step 5,000 ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
[11:25:54] Ep 1478 | reward +3.360 | Phase 2/3 | Rolling mean: 0.000 / 1.0 | Episodes in phase: 0
|
| 81 |
[11:25:59] Ep 1500 | reward -0.227 | Phase 2/3 | Rolling mean: 0.121 / 1.0 | Episodes in phase: 22
|
| 82 |
[11:26:17] Periodic save at step 5,000 ...
|
| 83 |
+
[11:26:20] Periodic push done β 5 files at step 5,000
|
| 84 |
+
[11:26:24] Ep 1525 | reward +0.519 | Phase 2/3 | Rolling mean: 0.336 / 1.0 | Episodes in phase: 47
|
| 85 |
+
[11:26:31] Ep 1550 | reward -1.655 | Phase 2/3 | Rolling mean: 0.464 / 1.0 | Episodes in phase: 72
|
| 86 |
+
[11:27:44] Ep 1575 | reward +1.494 | Phase 2/3 | Rolling mean: 0.580 / 1.0 | Episodes in phase: 97
|
| 87 |
+
[11:27:51] Ep 1600 | reward +1.042 | Phase 2/3 | Rolling mean: 0.682 / 1.0 | Episodes in phase: 122
|
| 88 |
+
[11:28:15] Ep 1625 | reward +0.742 | Phase 2/3 | Rolling mean: 0.627 / 1.0 | Episodes in phase: 147
|
| 89 |
+
[11:28:21] Ep 1650 | reward +1.428 | Phase 2/3 | Rolling mean: 0.635 / 1.0 | Episodes in phase: 172
|
| 90 |
+
[11:28:28] Ep 1675 | reward +0.336 | Phase 2/3 | Rolling mean: 0.634 / 1.0 | Episodes in phase: 197
|
| 91 |
+
[11:28:33] Ep 1700 | reward +4.152 | Phase 2/3 | Rolling mean: 0.681 / 1.0 | Episodes in phase: 222
|
| 92 |
+
[11:28:50] Ep 1725 | reward +2.964 | Phase 2/3 | Rolling mean: 0.746 / 1.0 | Episodes in phase: 247
|
| 93 |
+
[11:30:12] Ep 1750 | reward -0.352 | Phase 2/3 | Rolling mean: 0.756 / 1.0 | Episodes in phase: 272
|
| 94 |
+
[11:30:18] Ep 1775 | reward +3.482 | Phase 2/3 | Rolling mean: 0.721 / 1.0 | Episodes in phase: 297
|
| 95 |
+
[11:30:26] Ep 1800 | reward -0.045 | Phase 2/3 | Rolling mean: 0.699 / 1.0 | Episodes in phase: 322
|
| 96 |
+
[11:30:50] Ep 1825 | reward +2.169 | Phase 2/3 | Rolling mean: 0.783 / 1.0 | Episodes in phase: 347
|
| 97 |
+
[11:30:55] Ep 1850 | reward +1.839 | Phase 2/3 | Rolling mean: 0.795 / 1.0 | Episodes in phase: 372
|
| 98 |
+
[11:31:01] Ep 1875 | reward +0.765 | Phase 2/3 | Rolling mean: 0.870 / 1.0 | Episodes in phase: 397
|
| 99 |
+
[11:31:06] Ep 1900 | reward +1.146 | Phase 2/3 | Rolling mean: 0.942 / 1.0 | Episodes in phase: 422
|
| 100 |
+
[11:32:32] Ep 1925 | reward +1.780 | Phase 2/3 | Rolling mean: 0.934 / 1.0 | Episodes in phase: 447
|
| 101 |
+
[11:32:38] Ep 1950 | reward +1.365 | Phase 2/3 | Rolling mean: 1.008 / 1.0 | Episodes in phase: 472
|
| 102 |
+
[11:32:45] Ep 1975 | reward -1.427 | Phase 2/3 | Rolling mean: 1.078 / 1.0 | Episodes in phase: 497
|
| 103 |
+
[11:32:45] Ep 1978 | reward +0.503 | Phase 3/3 | Rolling mean: 0.000 / β | Episodes in phase: 0
|
| 104 |
+
[11:32:49] Ep 2000 | reward +3.096 | Phase 3/3 | Rolling mean: 0.995 / β | Episodes in phase: 22
|
| 105 |
+
[11:33:12] Ep 2025 | reward +2.753 | Phase 3/3 | Rolling mean: 0.718 / β | Episodes in phase: 47
|
| 106 |
+
[11:33:18] Ep 2050 | reward +0.487 | Phase 3/3 | Rolling mean: 0.844 / β | Episodes in phase: 72
|
| 107 |
+
[11:33:23] Ep 2075 | reward +1.654 | Phase 3/3 | Rolling mean: 0.959 / β | Episodes in phase: 97
|
| 108 |
+
[11:33:29] Ep 2100 | reward +1.654 | Phase 3/3 | Rolling mean: 0.925 / β | Episodes in phase: 122
|
| 109 |
+
[11:34:46] Ep 2125 | reward +0.516 | Phase 3/3 | Rolling mean: 0.959 / β | Episodes in phase: 147
|
| 110 |
+
[11:34:52] Ep 2150 | reward +3.792 | Phase 3/3 | Rolling mean: 0.979 / β | Episodes in phase: 172
|
| 111 |
+
[11:34:57] Ep 2175 | reward +0.073 | Phase 3/3 | Rolling mean: 0.995 / β | Episodes in phase: 197
|
| 112 |
+
[11:35:02] Ep 2200 | reward +2.109 | Phase 3/3 | Rolling mean: 1.027 / β | Episodes in phase: 222
|
| 113 |
+
[11:35:22] Ep 2225 | reward +1.313 | Phase 3/3 | Rolling mean: 1.061 / β | Episodes in phase: 247
|
| 114 |
+
[11:35:27] Ep 2250 | reward +3.740 | Phase 3/3 | Rolling mean: 1.103 / β | Episodes in phase: 272
|
| 115 |
+
[11:35:31] Ep 2275 | reward +2.000 | Phase 3/3 | Rolling mean: 1.066 / β | Episodes in phase: 297
|
| 116 |
+
[11:35:37] Ep 2300 | reward +0.179 | Phase 3/3 | Rolling mean: 1.106 / β | Episodes in phase: 322
|
| 117 |
+
[11:36:53] Ep 2325 | reward +1.694 | Phase 3/3 | Rolling mean: 1.085 / β | Episodes in phase: 347
|
| 118 |
+
[11:36:59] Ep 2350 | reward -0.421 | Phase 3/3 | Rolling mean: 1.064 / β | Episodes in phase: 372
|
| 119 |
+
[11:37:03] Ep 2375 | reward +1.838 | Phase 3/3 | Rolling mean: 1.123 / β | Episodes in phase: 397
|
| 120 |
+
[11:37:08] Ep 2400 | reward -0.246 | Phase 3/3 | Rolling mean: 1.117 / β | Episodes in phase: 422
|
| 121 |
+
[11:37:26] Ep 2425 | reward +3.134 | Phase 3/3 | Rolling mean: 1.167 / β | Episodes in phase: 447
|
| 122 |
+
[11:37:31] Ep 2450 | reward -0.659 | Phase 3/3 | Rolling mean: 1.173 / β | Episodes in phase: 472
|
| 123 |
+
[11:37:36] Ep 2475 | reward +2.264 | Phase 3/3 | Rolling mean: 1.210 / β | Episodes in phase: 497
|
| 124 |
+
[11:37:40] Ep 2500 | reward +0.612 | Phase 3/3 | Rolling mean: 1.224 / β | Episodes in phase: 522
|
| 125 |
+
[11:37:59] Ep 2525 | reward +0.474 | Phase 3/3 | Rolling mean: 1.223 / β | Episodes in phase: 547
|
| 126 |
+
[11:38:36] Ep 2550 | reward -0.258 | Phase 3/3 | Rolling mean: 1.246 / β | Episodes in phase: 572
|
| 127 |
+
[11:38:40] Ep 2575 | reward +2.700 | Phase 3/3 | Rolling mean: 1.239 / β | Episodes in phase: 597
|
| 128 |
+
[11:38:45] Ep 2600 | reward +1.871 | Phase 3/3 | Rolling mean: 1.240 / β | Episodes in phase: 622
|
| 129 |
+
[11:39:09] Ep 2625 | reward +1.806 | Phase 3/3 | Rolling mean: 1.325 / β | Episodes in phase: 647
|
| 130 |
+
[11:39:13] Ep 2650 | reward +1.485 | Phase 3/3 | Rolling mean: 1.266 / β | Episodes in phase: 672
|
| 131 |
+
[11:39:17] Ep 2675 | reward +0.647 | Phase 3/3 | Rolling mean: 1.290 / β | Episodes in phase: 697
|
| 132 |
+
[11:39:21] Ep 2700 | reward -0.045 | Phase 3/3 | Rolling mean: 1.263 / β | Episodes in phase: 722
|
| 133 |
+
[11:39:41] Ep 2725 | reward -0.226 | Phase 3/3 | Rolling mean: 1.311 / β | Episodes in phase: 747
|
| 134 |
+
[11:39:44] Ep 2750 | reward +0.837 | Phase 3/3 | Rolling mean: 1.374 / β | Episodes in phase: 772
|
| 135 |
+
[11:39:49] Ep 2775 | reward +1.761 | Phase 3/3 | Rolling mean: 1.325 / β | Episodes in phase: 797
|
| 136 |
+
[11:40:32] Ep 2800 | reward +0.013 | Phase 3/3 | Rolling mean: 1.313 / β | Episodes in phase: 822
|
| 137 |
+
[11:40:51] Ep 2825 | reward +0.972 | Phase 3/3 | Rolling mean: 1.223 / β | Episodes in phase: 847
|
| 138 |
+
[11:40:56] Ep 2850 | reward +0.477 | Phase 3/3 | Rolling mean: 1.272 / β | Episodes in phase: 872
|
| 139 |
+
[11:41:00] Ep 2875 | reward +1.260 | Phase 3/3 | Rolling mean: 1.281 / β | Episodes in phase: 897
|
| 140 |
+
[11:41:04] Ep 2900 | reward +0.083 | Phase 3/3 | Rolling mean: 1.360 / β | Episodes in phase: 922
|
| 141 |
+
[11:41:23] Ep 2925 | reward +0.015 | Phase 3/3 | Rolling mean: 1.454 / β | Episodes in phase: 947
|
| 142 |
+
[11:41:29] Ep 2950 | reward +1.500 | Phase 3/3 | Rolling mean: 1.414 / β | Episodes in phase: 972
|
| 143 |
+
[11:41:33] Ep 2975 | reward +3.524 | Phase 3/3 | Rolling mean: 1.559 / β | Episodes in phase: 997
|
| 144 |
+
[11:41:38] Ep 3000 | reward +1.860 | Phase 3/3 | Rolling mean: 1.650 / β | Episodes in phase: 1022
|
| 145 |
+
[11:42:41] Ep 3025 | reward +1.604 | Phase 3/3 | Rolling mean: 1.644 / β | Episodes in phase: 1047
|
| 146 |
+
[11:42:46] Ep 3050 | reward +1.799 | Phase 3/3 | Rolling mean: 1.636 / β | Episodes in phase: 1072
|
| 147 |
+
[11:42:50] Ep 3075 | reward +3.127 | Phase 3/3 | Rolling mean: 1.658 / β | Episodes in phase: 1097
|
| 148 |
+
[11:42:54] Ep 3100 | reward +0.200 | Phase 3/3 | Rolling mean: 1.640 / β | Episodes in phase: 1122
|
| 149 |
+
[11:43:12] Ep 3125 | reward +0.282 | Phase 3/3 | Rolling mean: 1.549 / β | Episodes in phase: 1147
|
| 150 |
+
[11:43:16] Ep 3150 | reward +0.694 | Phase 3/3 | Rolling mean: 1.551 / β | Episodes in phase: 1172
|
| 151 |
+
[11:43:21] Ep 3175 | reward +1.753 | Phase 3/3 | Rolling mean: 1.503 / β | Episodes in phase: 1197
|
| 152 |
+
[11:43:24] Ep 3200 | reward +1.497 | Phase 3/3 | Rolling mean: 1.392 / β | Episodes in phase: 1222
|
| 153 |
+
[11:43:43] Ep 3225 | reward +0.607 | Phase 3/3 | Rolling mean: 1.443 / β | Episodes in phase: 1247
|
| 154 |
+
[11:44:25] Ep 3250 | reward +2.374 | Phase 3/3 | Rolling mean: 1.528 / β | Episodes in phase: 1272
|
| 155 |
+
[11:44:30] Ep 3275 | reward +1.900 | Phase 3/3 | Rolling mean: 1.483 / β | Episodes in phase: 1297
|
| 156 |
+
[11:44:33] Ep 3300 | reward +1.744 | Phase 3/3 | Rolling mean: 1.515 / β | Episodes in phase: 1322
|
| 157 |
+
[11:44:53] Ep 3325 | reward +1.466 | Phase 3/3 | Rolling mean: 1.504 / β | Episodes in phase: 1347
|
| 158 |
+
[11:44:57] Ep 3350 | reward +1.019 | Phase 3/3 | Rolling mean: 1.499 / β | Episodes in phase: 1372
|
| 159 |
+
[11:45:00] Ep 3375 | reward -0.277 | Phase 3/3 | Rolling mean: 1.479 / β | Episodes in phase: 1397
|
| 160 |
+
[11:45:05] Ep 3400 | reward +2.236 | Phase 3/3 | Rolling mean: 1.526 / β | Episodes in phase: 1422
|
| 161 |
+
[11:45:25] Ep 3425 | reward +0.735 | Phase 3/3 | Rolling mean: 1.572 / β | Episodes in phase: 1447
|
| 162 |
+
[11:45:28] Ep 3450 | reward +2.571 | Phase 3/3 | Rolling mean: 1.515 / β | Episodes in phase: 1472
|
| 163 |
+
[11:45:32] Ep 3475 | reward -2.395 | Phase 3/3 | Rolling mean: 1.490 / β | Episodes in phase: 1497
|
| 164 |
+
[11:46:29] Ep 3500 | reward +1.103 | Phase 3/3 | Rolling mean: 1.466 / β | Episodes in phase: 1522
|
| 165 |
+
[11:46:49] Ep 3525 | reward +1.210 | Phase 3/3 | Rolling mean: 1.537 / β | Episodes in phase: 1547
|
| 166 |
+
[11:46:53] Ep 3550 | reward +2.704 | Phase 3/3 | Rolling mean: 1.597 / β | Episodes in phase: 1572
|
| 167 |
+
[11:46:57] Ep 3575 | reward +3.341 | Phase 3/3 | Rolling mean: 1.570 / β | Episodes in phase: 1597
|
| 168 |
+
[11:46:59] Periodic save at step 10,000 ...
|
vec_normalize_latest.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 166596
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89f4e32efc4ca3a0098b7ddcdf9f2208fc357b7b1a16aa828827ea4900ca2bfe
|
| 3 |
size 166596
|