diff --git a/.gitignore b/.gitignore index acde99e0805d3c9adda2d8512fb6022649977c0d..41bfd899d9eb6e035686f3244cf80b0893b17f09 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,6 @@ # Ignore the __pycache__ directory anywhere in the repository __pycache__/ -# Ignore all .txt files anywhere in the repository -*.txt # Ignore the 'runs' directory anywhere in the repository, regardless of nesting runs/ diff --git a/data/cache/sample_177.pt b/data/cache/sample_177.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1e6e0a9918b3012e2390934ea0aaf52937146c9 --- /dev/null +++ b/data/cache/sample_177.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44145f00efffa55fb6e8714820091045aef0e23991ad42fb29e33b52b7184e7e +size 8639 diff --git a/data/cache/sample_179.pt b/data/cache/sample_179.pt new file mode 100644 index 0000000000000000000000000000000000000000..4904671cddbb8c4b98c23f4cd1a0efc3e8b9dbe7 --- /dev/null +++ b/data/cache/sample_179.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e97ca9b7de7d87cda3833332b677f24fa013c2b7c86a339f4e561ea7b947975 +size 6143 diff --git a/data/cache/sample_180.pt b/data/cache/sample_180.pt new file mode 100644 index 0000000000000000000000000000000000000000..b124828cdd538444752a628133430957646cd5d6 --- /dev/null +++ b/data/cache/sample_180.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3a2233b6b5b46473d004ba5a95e142a4e3359991a27e7a9c86e8f0ef1970bc1 +size 4415 diff --git a/data/cache/sample_183.pt b/data/cache/sample_183.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ca6be7c208b805d97e91689fbfa28ea6b324e43 --- /dev/null +++ b/data/cache/sample_183.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d85e6ceb562c9dccb0725fcef1e6ee4bee9f8185a5cad3babadbfb854afa9ea +size 22079 diff --git a/data/cache/sample_184.pt b/data/cache/sample_184.pt new file mode 100644 index 0000000000000000000000000000000000000000..35a5923f492f1fdd0287beee2da931ab77850488 --- /dev/null +++ b/data/cache/sample_184.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b01753da3836dc6c8eec6fbb54ef3cb9ac03203d83c5de16bb964b760ec2608 +size 50047 diff --git a/data/cache/sample_185.pt b/data/cache/sample_185.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cf5e6589f386536a4d68c67d6dc573083607b71 --- /dev/null +++ b/data/cache/sample_185.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e75db11fe0d929ea7c8e0867496c6797d32b564dfefaaa6677909affa297177e +size 11903 diff --git a/data/cache/sample_186.pt b/data/cache/sample_186.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1eccf3f66fc691591928ce195374e6481fb6653 --- /dev/null +++ b/data/cache/sample_186.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd7e868b619f5cd216c0e52b3237d88945371927877d4fda0b3c45caba042c5 +size 10815 diff --git a/data/cache/sample_187.pt b/data/cache/sample_187.pt new file mode 100644 index 0000000000000000000000000000000000000000..f35a8ec45a13bd95c5ccfda3c051e8aebe1895a0 --- /dev/null +++ b/data/cache/sample_187.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d14263efa517983b7b76993bfc74368351a301dbcdf8f2b586e8f17899065fa +size 4415 diff --git a/data/cache/sample_188.pt b/data/cache/sample_188.pt new file mode 100644 index 0000000000000000000000000000000000000000..0646eb249c78615db916b53a361520461eac9b18 --- /dev/null +++ b/data/cache/sample_188.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c53cef04b5853db1b40736770b8952957957e818c4cb75b8423be38e77fbc1fd +size 81343 diff --git a/data/cache/sample_189.pt b/data/cache/sample_189.pt new file mode 100644 index 0000000000000000000000000000000000000000..00411862341a0805b9366d6fbfec615d39efd4cb --- /dev/null +++ b/data/cache/sample_189.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c50ae2bab235ada474997a851a5b6aaa8c1c744a626ff4a20f50212dc650809 +size 92223 diff --git a/data/cache/sample_190.pt b/data/cache/sample_190.pt new file mode 100644 index 0000000000000000000000000000000000000000..e52df4fe80b0bfd515bbd367ae16a9c31b331d0a --- /dev/null +++ b/data/cache/sample_190.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c18f3dc69f58ccc800de4d671ec9e94f4ed90b1c7566d35e51faf77b979af0 +size 3391 diff --git a/data/cache/sample_191.pt b/data/cache/sample_191.pt new file mode 100644 index 0000000000000000000000000000000000000000..cae6896065d715bf5ff438b57074c42f4cf66a86 --- /dev/null +++ b/data/cache/sample_191.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a0d2c288391920f4a338dee6f5485a8362585233db91fca476c41e923a9cad7 +size 11839 diff --git a/data/cache/sample_192.pt b/data/cache/sample_192.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c4b9ddb33201f59f23d26f67bf812bb0c0d43e8 --- /dev/null +++ b/data/cache/sample_192.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2712d55a09c42675c6d01591205b7e7dde6e9095b3a78361d09d1ac7ab85a09c +size 5503 diff --git a/data/cache/sample_193.pt b/data/cache/sample_193.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c46640349d8c519b481b4ec86662093f0c55e66 --- /dev/null +++ b/data/cache/sample_193.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c567d2ca81b8a8154b59f890d0ebad1c9b780ea8e1a2cce6fa039429d31bdedc +size 9727 diff --git a/data/cache/sample_194.pt b/data/cache/sample_194.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0ca9e395379f880131111ddd2d36f48ff6fc290 --- /dev/null +++ b/data/cache/sample_194.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4da9733cfc05d09509034b58b26fc4eb818c4335d40b80ad3ee911f82e27cdb8 +size 162943 diff --git a/data/cache/sample_196.pt b/data/cache/sample_196.pt new file mode 100644 index 0000000000000000000000000000000000000000..db4d52a74b70506c8588f23a2c7323345b618c4d --- /dev/null +++ b/data/cache/sample_196.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac255544b2e2699e84ba88d73183a1fe0f4fc9f8e46bee229a6798f02b105017 +size 6655 diff --git a/data/cache/sample_197.pt b/data/cache/sample_197.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2f2fe9cca51663e23f5d250bfd689b83ce4e9c3 --- /dev/null +++ b/data/cache/sample_197.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b97a66734f848bffa942f0a21e255130cffad9fe26924910b4c801b806e3875 +size 5503 diff --git a/data/cache/sample_198.pt b/data/cache/sample_198.pt new file mode 100644 index 0000000000000000000000000000000000000000..27823fd4cf587cfacc06f58eb5b3a950dc4376e2 --- /dev/null +++ b/data/cache/sample_198.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b5b8ee2ecd0ecb96669401ae26118321655e524202c44a3c3fb0576b29a6f6 +size 14783 diff --git a/data/cache/sample_200.pt b/data/cache/sample_200.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f59db164412ea32fc7903e3514af71621e21ae1 --- /dev/null +++ b/data/cache/sample_200.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d602f248b2dcbde3fe2c255b80c0fc1f106b5df3ec4cd6c3a86464bc8f39a4f1 +size 6527 diff --git a/data/cache/sample_201.pt b/data/cache/sample_201.pt new file mode 100644 index 0000000000000000000000000000000000000000..5db18fad73e76668d26ec05d3169b1e1e73d80ba --- /dev/null +++ b/data/cache/sample_201.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aed404f5823d98f1c8776e87b48e9884428fc835e834366b6b37eed8e60541e +size 5631 diff --git a/data/cache/sample_202.pt b/data/cache/sample_202.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbcbd7dcf386f9fe1ffa90432bb709c297d7aae8 --- /dev/null +++ b/data/cache/sample_202.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6599676fe1deac7ff46eb9a8bffa17aa47f7776dead13371d6268c83eb270a29 +size 3391 diff --git a/data/cache/sample_203.pt b/data/cache/sample_203.pt new file mode 100644 index 0000000000000000000000000000000000000000..524776a7e1d583a3dfaca0fbd9b6491a3405cc73 --- /dev/null +++ b/data/cache/sample_203.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b22a8cfaff3298c31d28760644ca21a9de83d6b32a12b08a455777fd36c6f12 +size 337471 diff --git a/data/cache/sample_204.pt b/data/cache/sample_204.pt new file mode 100644 index 0000000000000000000000000000000000000000..be58bbbd327b3d12d3765d04a2ced1f9e64ed063 --- /dev/null +++ b/data/cache/sample_204.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2faf9f019825a7dc87b14c714e9f82958fe98ea697e956344e1eef83605eb1f +size 44351 diff --git a/data/cache/sample_206.pt b/data/cache/sample_206.pt new file mode 100644 index 0000000000000000000000000000000000000000..a151231e9dcedf6b5802ed06429bdce3f5ec0981 --- /dev/null +++ b/data/cache/sample_206.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ff056692348a616f90afb6adcb4b6804d637a9e9fbc6a5a43135059daeed10 +size 7679 diff --git a/data/cache/sample_209.pt b/data/cache/sample_209.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d8b35e51722b2796c84087857e084069d4ad137 --- /dev/null +++ b/data/cache/sample_209.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2aa75c80ce394656ac99adff8f0fea29c2c128271e16b24c567377d192f3f7c +size 61503 diff --git a/data/cache/sample_210.pt b/data/cache/sample_210.pt new file mode 100644 index 0000000000000000000000000000000000000000..766e76b93d5729095045b827eaffefb541547e8d --- /dev/null +++ b/data/cache/sample_210.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e1660624b7ebce6de3e98e38bec3b989a7488ae23618a0e37bab2152ca8633b +size 7487 diff --git a/data/cache/sample_211.pt b/data/cache/sample_211.pt new file mode 100644 index 0000000000000000000000000000000000000000..85fb10f63e8dd37a3524eb8d181bed17f177ff8b --- /dev/null +++ b/data/cache/sample_211.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e025a83adc4e7b4af7146e4f8416c301d8a80f0675ea846229c12efb1688ad8 +size 7807 diff --git a/data/cache/sample_212.pt b/data/cache/sample_212.pt new file mode 100644 index 0000000000000000000000000000000000000000..935be839533561c7d1cdc4f1bb2c750ddd2dd085 --- /dev/null +++ b/data/cache/sample_212.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4357154264b8b2b6fdd706db4b3c882849dfdd315886bed43f812b07a63540e3 +size 3391 diff --git a/data/cache/sample_213.pt b/data/cache/sample_213.pt new file mode 100644 index 0000000000000000000000000000000000000000..c061d1a039d43d67258a7d3673be6dd3295cdb4b --- /dev/null +++ b/data/cache/sample_213.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:254defc0a47a64a8125badb65dbbbcf143667ba48ba77d191f85114c09977232 +size 14847 diff --git a/data/cache/sample_214.pt b/data/cache/sample_214.pt new file mode 100644 index 0000000000000000000000000000000000000000..3446ae78c1f47eef3fc11a387da889c78f24f5a6 --- /dev/null +++ b/data/cache/sample_214.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e81145ec5d672f1099df82ce8019a38800a019f00f3733870b3028029293e95 +size 295295 diff --git a/data/cache/sample_215.pt b/data/cache/sample_215.pt new file mode 100644 index 0000000000000000000000000000000000000000..32b34cd87bffbc69ffd60f275104f2f69a9fdc1a --- /dev/null +++ b/data/cache/sample_215.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0817f3e60f93d45e5dd1301dac8618a148738a928a4177e5d1cb053f7d4a7661 +size 11967 diff --git a/data/cache/sample_217.pt b/data/cache/sample_217.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b309cb9637181a84ba7ff5d6e312450db5917cf --- /dev/null +++ b/data/cache/sample_217.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3d2c2dde2f20e3b6d1589688223de6d10095701c0c6f806883156485c23b22a +size 19007 diff --git a/data/cache/sample_218.pt b/data/cache/sample_218.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9854d81ffdf39bf7d3aebceaf47453346a43db7 --- /dev/null +++ b/data/cache/sample_218.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3473e7338c931a66c3b5b96a0a1abba5f9bd8384621bfcfd23a24a81f1d5823 +size 9983 diff --git a/data/cache/sample_219.pt b/data/cache/sample_219.pt new file mode 100644 index 0000000000000000000000000000000000000000..610bca89b3643c3764f0cfdc74783704f5679590 --- /dev/null +++ b/data/cache/sample_219.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ea7e8317192f6a39abeb3b3e255d85808cc1d480a4aa8b472b306281d105779 +size 4415 diff --git a/data/cache/sample_220.pt b/data/cache/sample_220.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b0be486f493f7439da44af9738121cdd93f0967 --- /dev/null +++ b/data/cache/sample_220.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bd48ef7f8fb619c84e81239da87688f8c97c87c293a154f990b5f137dfb41fd +size 165375 diff --git a/data/cache/sample_221.pt b/data/cache/sample_221.pt new file mode 100644 index 0000000000000000000000000000000000000000..de3da7cd5418b56cd80897b895794ba2bddf5a42 --- /dev/null +++ b/data/cache/sample_221.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78060396345180546259d70a5b162edfecd3483ec300d0fa81f8399896e979c2 +size 4415 diff --git a/data/cache/sample_222.pt b/data/cache/sample_222.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f2e5bb26ad85cf5a67e71b800c20349cd31850c --- /dev/null +++ b/data/cache/sample_222.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9504c3654a40f71b1eacece49bad25e1bbe08f4b0a1088499436f4295ba1d79 +size 4479 diff --git a/data/cache/sample_223.pt b/data/cache/sample_223.pt new file mode 100644 index 0000000000000000000000000000000000000000..c05deb7267d928ea3952aa5fc2d842ae4adf41ee --- /dev/null +++ b/data/cache/sample_223.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b34f83faf0f544c26f8b1a140eaa014522db9b4542a63d07798f4774c6be6f32 +size 10559 diff --git a/data/cache/sample_225.pt b/data/cache/sample_225.pt new file mode 100644 index 0000000000000000000000000000000000000000..d687632af996f1a868ef8ab4a92e35c3cb13f8ca --- /dev/null +++ b/data/cache/sample_225.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac5bd6eb629ee6ec39cbcbb0f64f3634699c251000f599e46da9c8023a832b6 +size 13183 diff --git a/data/cache/sample_227.pt b/data/cache/sample_227.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea6a9b86a976fb096774f9b3f6bbc73ba902d589 --- /dev/null +++ b/data/cache/sample_227.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2250cbf1a92ee724201f5084284fe5e4a4814bb95b7046b3cbbaa9f54e4261a3 +size 13567 diff --git a/data/cache/sample_228.pt b/data/cache/sample_228.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fb460fd0d1c0fab2b8f87b4a63121355205c9fb --- /dev/null +++ b/data/cache/sample_228.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9dab8d9b7fbf46370c552cbb1874cca6a91698f03ee361e180fc541ed4e4c2 +size 6591 diff --git a/data/cache/sample_229.pt b/data/cache/sample_229.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1c8faa10dbcc05f7ad1573ab0e58c36ee08f9cf --- /dev/null +++ b/data/cache/sample_229.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a36f1c8eba443ae04c82186bf2ab9e096cac70959561125bdc56f77180dd88 +size 71743 diff --git a/data/cache/sample_230.pt b/data/cache/sample_230.pt new file mode 100644 index 0000000000000000000000000000000000000000..682ae383bb625591885877c2aa1cd097e3e13f74 --- /dev/null +++ b/data/cache/sample_230.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c6872a94e3e2fa8f7b7cc3a9573fbe9c38a73b71f218a2bc7bcb3b05948ce7c +size 77631 diff --git a/data/cache/sample_231.pt b/data/cache/sample_231.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d59edcbd245b2bf7388f736dde73fbc1e2d3308 --- /dev/null +++ b/data/cache/sample_231.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43ad508162cc9d03e3585fca13bd04df212e2c68fa816beb11272d3c5d138179 +size 9727 diff --git a/data/cache/sample_232.pt b/data/cache/sample_232.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac8aaec0475f013634d862a8f7855b95275837f6 --- /dev/null +++ b/data/cache/sample_232.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85121b8aea689f6735d14ed67f0c96b694538eeb85754d4f3931d1c1832aefad +size 7679 diff --git a/data/cache/sample_235.pt b/data/cache/sample_235.pt new file mode 100644 index 0000000000000000000000000000000000000000..6dc1f805e607c2afa9532ba990de9ddb8a9a18c0 --- /dev/null +++ b/data/cache/sample_235.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:294179b6839d944d5515cb1d6500662eb5c8dd6fd5462379d632b8031ba98151 +size 40831 diff --git a/data/cache/sample_236.pt b/data/cache/sample_236.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc086773fd71e8a6730293d06c5dcd3b0b11a482 --- /dev/null +++ b/data/cache/sample_236.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba64935eeeffe290a108a28cea19105a346731bc941df964916fde5d7578dbe3 +size 476991 diff --git a/data/cache/sample_237.pt b/data/cache/sample_237.pt new file mode 100644 index 0000000000000000000000000000000000000000..095148bf7db54500c020f7acc5c4354d6214040b --- /dev/null +++ b/data/cache/sample_237.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d8e431f4bd0198d16114c7eefed0cfbefaa5a22e66e3a3887e157d3783b0ced +size 28415 diff --git a/data/cache/sample_238.pt b/data/cache/sample_238.pt new file mode 100644 index 0000000000000000000000000000000000000000..0786d68844c6a235cbcadd1e04c3ed18d7b5cbe8 --- /dev/null +++ b/data/cache/sample_238.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a838cc6124110ed65634d934726c9f1760e53aa80bfe63883a44946b692facea +size 9727 diff --git a/data/cache/sample_239.pt b/data/cache/sample_239.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad41753b6f941754fe79b3824566327b497560d7 --- /dev/null +++ b/data/cache/sample_239.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85894fbe2535ec47f6e8323c437497539f835ab72d9b1d13e962f70ba85d7a17 +size 8191 diff --git a/data/cache/sample_241.pt b/data/cache/sample_241.pt new file mode 100644 index 0000000000000000000000000000000000000000..1333cfb02e06163f080b4a614299047237f0306f --- /dev/null +++ b/data/cache/sample_241.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebe714ca7ce6c42dbde0b353a263a30d605fedb55bb5a9297a4013cadb876e0e +size 4479 diff --git a/data/cache/sample_242.pt b/data/cache/sample_242.pt new file mode 100644 index 0000000000000000000000000000000000000000..47ee5d80f7252294b02577e079e62ee5357e6085 --- /dev/null +++ b/data/cache/sample_242.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:519c64e26833e1e7ea4bce7bba733de9ce1bc21d81120d15ba2b6868cbacdb7f +size 4415 diff --git a/data/cache/sample_243.pt b/data/cache/sample_243.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd2cc91d574b1be9f89b4bc4736bebacd303743e --- /dev/null +++ b/data/cache/sample_243.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e02c504231c55c6c117919c5f6e839aba065e34b7656292c3a435d6adeaf9240 +size 30655 diff --git a/data/cache/sample_244.pt b/data/cache/sample_244.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5026a33ef4626e8e4a162044aef84a51350afa1 --- /dev/null +++ b/data/cache/sample_244.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f10d7dd30783768ec0f6d54dda11efca02f3935fd0537aa2b35c54b32dd9c9d5 +size 9151 diff --git a/data/cache/sample_245.pt b/data/cache/sample_245.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e8e390b2575786e8c02ce82dfbdb67bba2bfd7f --- /dev/null +++ b/data/cache/sample_245.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a08cedd430ce30f5584a301af7e8bee0210c3046dc118589a0475e400e9ca81 +size 46143 diff --git a/data/cache/sample_247.pt b/data/cache/sample_247.pt new file mode 100644 index 0000000000000000000000000000000000000000..030c73b25d6c55db474de42501439bca4f529b26 --- /dev/null +++ b/data/cache/sample_247.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e6acd215912a2ad69fde1c143edfca2f3f9fd9247d720dc12ea833ea7c91a10 +size 5759 diff --git a/data/cache/sample_248.pt b/data/cache/sample_248.pt new file mode 100644 index 0000000000000000000000000000000000000000..9414fdadd2f51ff21ee7bb4bd6cee7aa054c4cdf --- /dev/null +++ b/data/cache/sample_248.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:809b37cbbff3d80d78ac82a3b2d3a99ffbd770957192d822938bd9e991fc0252 +size 27775 diff --git a/data/cache/sample_249.pt b/data/cache/sample_249.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea8a83914e325763f17cdb00c5ac3e70a773e7d9 --- /dev/null +++ b/data/cache/sample_249.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c25dbf52fa4b7d411c561304fcab2dc093209a3d7fbe0f767c1ca385b1b5c4f1 +size 8127 diff --git a/data/cache/sample_250.pt b/data/cache/sample_250.pt new file mode 100644 index 0000000000000000000000000000000000000000..3adf4a1ad3c98ee03eb395a499daf265d9affe31 --- /dev/null +++ b/data/cache/sample_250.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0fbbbd142c3a38efa868e5c1a6e70b6d415e9979a1f6fc420c5745b9d1483ed +size 8127 diff --git a/data/cache/sample_253.pt b/data/cache/sample_253.pt new file mode 100644 index 0000000000000000000000000000000000000000..cefc932c162a76df446b86279650c6654de5ceb8 --- /dev/null +++ b/data/cache/sample_253.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df6fab45a17be652502e43e4452a4246d4b5c462b31154753631322e3f8d6504 +size 411583 diff --git a/data/cache/sample_254.pt b/data/cache/sample_254.pt new file mode 100644 index 0000000000000000000000000000000000000000..0352d4352478ac220981203e1df107c12593394c --- /dev/null +++ b/data/cache/sample_254.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fbdc95a31c2fb54350ef6b7b8d2e3602cffc770eff081a4aeec9192149b0bec +size 6015 diff --git a/data/cache/sample_255.pt b/data/cache/sample_255.pt new file mode 100644 index 0000000000000000000000000000000000000000..1198f75c46cd875ef4e7d3f32f755fcffadd175e --- /dev/null +++ b/data/cache/sample_255.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908d96676063dba2c76d56a15a843f01d4905d75c5ac10a14bb7b8dccbe77280 +size 4991 diff --git a/data/cache/sample_256.pt b/data/cache/sample_256.pt new file mode 100644 index 0000000000000000000000000000000000000000..1487fb447bd9dd859a1bd4ca9b58978cc624cf33 --- /dev/null +++ b/data/cache/sample_256.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37fc33d1d7a3ba6c73bd0bc27ec22a8696dc6322e8e2e0cf44c5f1d8b256a4c8 +size 12607 diff --git a/data/cache/sample_257.pt b/data/cache/sample_257.pt new file mode 100644 index 0000000000000000000000000000000000000000..64fc4f5b6abc2709d57bce155a05e661dd4ceda0 --- /dev/null +++ b/data/cache/sample_257.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb143ab629551e3567077cfb09742696eb812d3fc7cf85a623eee2776ce669df +size 7231 diff --git a/data/cache/sample_261.pt b/data/cache/sample_261.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d2ec6b8a7d5a0b85de9693534083f34c85766d0 --- /dev/null +++ b/data/cache/sample_261.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3370e8a2b302e1a1386460a7d94e1980576bdf5e0573659ab95e3769a4ff3089 +size 8639 diff --git a/data/cache/sample_262.pt b/data/cache/sample_262.pt new file mode 100644 index 0000000000000000000000000000000000000000..658a75499973198866f301b3afa65b2636b41f10 --- /dev/null +++ b/data/cache/sample_262.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a23187e36b87e38e874a2a0235db2419edba8250b99e660d4597db69d80268 +size 42687 diff --git a/data/cache/sample_263.pt b/data/cache/sample_263.pt new file mode 100644 index 0000000000000000000000000000000000000000..da8721f92a87fd354d0292ece86250eef2e904e7 --- /dev/null +++ b/data/cache/sample_263.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d0734bc40a3c2c9be4d95da3ff38899d509040418dc56c33536359724a6451 +size 51583 diff --git a/data/cache/sample_264.pt b/data/cache/sample_264.pt new file mode 100644 index 0000000000000000000000000000000000000000..a374d6f37973c1f8f0f1cf7e42bb64336592f945 --- /dev/null +++ b/data/cache/sample_264.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14a4b207c2bae4bae8cf69bce0d96fc8b9777d3d1c20b73ecea938a3268a78e0 +size 7103 diff --git a/data/cache/sample_265.pt b/data/cache/sample_265.pt new file mode 100644 index 0000000000000000000000000000000000000000..287bb11a5c32137b121a77279a24f5c5300e2f6e --- /dev/null +++ b/data/cache/sample_265.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2041de73c1bc6e7d0a2131e47f700fb4ccc2fdb841c901cf5857e326305132e0 +size 13055 diff --git a/data/cache/sample_266.pt b/data/cache/sample_266.pt new file mode 100644 index 0000000000000000000000000000000000000000..8dbfd5a35529a799fe0127f32a0bb5b621a5c894 --- /dev/null +++ b/data/cache/sample_266.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:895d2ac03e553f39ff337aa71485f5fe8a68b6a68d1a5310d3e00e52d6549592 +size 58559 diff --git a/data/cache/sample_268.pt b/data/cache/sample_268.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ea7528280bf76f0fea1962be12b852e0a8ae8d4 --- /dev/null +++ b/data/cache/sample_268.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b921e163f32a6ba74dccce7b0fe5586d1cfaed5961c2d4114fe7bcd7d653d616 +size 9215 diff --git a/data/cache/sample_269.pt b/data/cache/sample_269.pt new file mode 100644 index 0000000000000000000000000000000000000000..deb316d3d3780d0fa2233ea275e0bfe600674548 --- /dev/null +++ b/data/cache/sample_269.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dffdbe572dac853cfa656d8d5200ac87f8d198a338a2fabbc7049656824c0504 +size 10495 diff --git a/data/cache/sample_270.pt b/data/cache/sample_270.pt new file mode 100644 index 0000000000000000000000000000000000000000..83f5094e5ba143f0fb6fda1c73b82a06b5202fb9 --- /dev/null +++ b/data/cache/sample_270.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f080febdfd6fb0dcca40a6ad65b1815742b00aeb30cbf5c7a539dbb7015e7dc5 +size 6079 diff --git a/data/cache/sample_271.pt b/data/cache/sample_271.pt new file mode 100644 index 0000000000000000000000000000000000000000..ceac3fddf8f24c86728d63edd9a1d9d0f82ff835 --- /dev/null +++ b/data/cache/sample_271.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad28ec17ce157d6ef63d1ecd40f740100eae16aea868c4573ddbeb817424a83 +size 12991 diff --git a/data/cache/sample_272.pt b/data/cache/sample_272.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc9147bc3296722351328a3802dead0291cc8d26 --- /dev/null +++ b/data/cache/sample_272.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b5e540f66bc50e72865cda6c09c28e1fe6592bbc41fe075741daa8934ec7301 +size 11967 diff --git a/data/cache/sample_273.pt b/data/cache/sample_273.pt new file mode 100644 index 0000000000000000000000000000000000000000..6bb10c3046cb7fee114af10142435a11b4ef1c52 --- /dev/null +++ b/data/cache/sample_273.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1051d9eae8f21074b47032453b336cdfbecd69bd7965624deb620ef14611e524 +size 45119 diff --git a/data/cache/sample_275.pt b/data/cache/sample_275.pt new file mode 100644 index 0000000000000000000000000000000000000000..99329f9e79324b442213e5907fe0d9e4dcc182cb --- /dev/null +++ b/data/cache/sample_275.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd4f9b1290977fe7c70990e2d8c87577e3c18b5afab74fdd1627d52d43bc9540 +size 5567 diff --git a/data/cache/sample_278.pt b/data/cache/sample_278.pt new file mode 100644 index 0000000000000000000000000000000000000000..90b213ff7598f05b8b7bdc28cc148933dafd736b --- /dev/null +++ b/data/cache/sample_278.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdeafb26deed273c94200481ebd7955310fc68e35fd35af88432a3cca7cbe70f +size 8127 diff --git a/data/cache/sample_279.pt b/data/cache/sample_279.pt new file mode 100644 index 0000000000000000000000000000000000000000..7be4dfb50d665dfa89d9e3c68e5f61acb0d2d4ff --- /dev/null +++ b/data/cache/sample_279.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2a7caaae7dad6a727457ccc852d66e4f60ad3bde052735014199142f22f0f6 +size 85887 diff --git a/data/cache/sample_281.pt b/data/cache/sample_281.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f8e8e56e3f39b6c58e5228b8c8c495ecfdbdbba --- /dev/null +++ b/data/cache/sample_281.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a6afccd4b78b430077c00b86ee3632c4f5e46b855969424392bcb2dafb6e43c +size 6591 diff --git a/data/cache/sample_282.pt b/data/cache/sample_282.pt new file mode 100644 index 0000000000000000000000000000000000000000..146e1d67246b42e816cedc2154a6677d99d14271 --- /dev/null +++ b/data/cache/sample_282.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d151015c8e77495189f660fab1a00b6db538890f7ef2af6c0349f1700238a20c +size 9727 diff --git a/data/cache/sample_283.pt b/data/cache/sample_283.pt new file mode 100644 index 0000000000000000000000000000000000000000..f564f2714e69eaf01d22cb8ea6b9ad2aff49a2b2 --- /dev/null +++ b/data/cache/sample_283.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f0fcda5d032e00ee2c1ba4e25c8ce48d6cd25fced556db7f8e34e1415dbcf93 +size 6527 diff --git a/data/cache/sample_286.pt b/data/cache/sample_286.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba35065aef3d3af67fc39389fe6c8efc5b85171f --- /dev/null +++ b/data/cache/sample_286.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52712df0dad4af330f22bcb234d4725aeeee907dacc7b2de81a5be9ebf95ac75 +size 8127 diff --git a/data/cache/sample_287.pt b/data/cache/sample_287.pt new file mode 100644 index 0000000000000000000000000000000000000000..2864d9c086c34c9f6dae073a092f66e88d3d03b8 --- /dev/null +++ b/data/cache/sample_287.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c13e5159723bf1180e7723e64f11ff8ea614fa953dfc85e2c3ecb21feab1769 +size 4415 diff --git a/data/cache/sample_292.pt b/data/cache/sample_292.pt new file mode 100644 index 0000000000000000000000000000000000000000..1921063b70c3f1ea37b5bfaeec126f37eccbc8bb --- /dev/null +++ b/data/cache/sample_292.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6802b320bce144b6f5766a7a059ccdbfd7b01d69c6cbdf8663729b81d76e0e +size 16255 diff --git a/data/cache/sample_293.pt b/data/cache/sample_293.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d6ff7917427dfec5c9d20bc385346cf95f886ed --- /dev/null +++ b/data/cache/sample_293.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12dcc5b57927f7179b3ec47a5238e5d79522c2b9591e4c111225f4ed613f7fde +size 4415 diff --git a/data/cache/sample_294.pt b/data/cache/sample_294.pt new file mode 100644 index 0000000000000000000000000000000000000000..f401f886b41e37955156649349c2f0fc9ac8e367 --- /dev/null +++ b/data/cache/sample_294.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8538dc167d4ad39a2a709da11dd6404fcb4a9ccfa44ad9cad684abba927844b2 +size 4991 diff --git a/data/cache/sample_295.pt b/data/cache/sample_295.pt new file mode 100644 index 0000000000000000000000000000000000000000..da43fbd2d298beefa9eb48bf355964fe04b35a37 --- /dev/null +++ b/data/cache/sample_295.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5865b74d7a5cc2ce2d65fb2befaed0d5a205257618256a19c2423ba32c1abf8 +size 4415 diff --git a/data/cache/sample_297.pt b/data/cache/sample_297.pt new file mode 100644 index 0000000000000000000000000000000000000000..6edbb9810fbaddc7df02a827f16702dd0d5d7867 --- /dev/null +++ b/data/cache/sample_297.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1c5c7548638b44ea0b356d5b3a4540e5de65364ccf48912cc98a70d4badb12 +size 212927 diff --git a/data/cache/sample_298.pt b/data/cache/sample_298.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ebfaaf0eebd7d498afc5a4b38c30ba6453fe4d1 --- /dev/null +++ b/data/cache/sample_298.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32f0e0a35e58c32e75cf741baf1c746a0e662a8d60371fd1ceddd2b618401ff4 +size 9279 diff --git a/data/cache/sample_299.pt b/data/cache/sample_299.pt new file mode 100644 index 0000000000000000000000000000000000000000..3889d5aac614d88b2afd762922b13fc23798beea --- /dev/null +++ b/data/cache/sample_299.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d19c43ce8062d079a38a67e17e3b03218d7a67055921f504408af25f97da1886 +size 27135 diff --git a/data/data_fetcher.py b/data/data_fetcher.py index f1e8db53233b3a906a7126820bbce9b88324d315..b6671f7a9e352759746f6b26163275abb30a2395 100644 --- a/data/data_fetcher.py +++ b/data/data_fetcher.py @@ -1066,8 +1066,9 @@ class DataFetcher: mint_timestamp: datetime.datetime, max_horizon_seconds: int = 3600, include_wallet_data: bool = True, - include_graph: bool = True - ) -> Dict[str, Any]: + include_graph: bool = True, + min_trades: int = 0 + ) -> Optional[Dict[str, Any]]: """ Fetches ALL available data for a token up to the maximum horizon. This data is agnostic of T_cutoff and will be masked/filtered dynamically during training. @@ -1098,6 +1099,10 @@ class DataFetcher: sorted_trades = sorted(list(all_trades.values()), key=lambda x: x['timestamp']) + if len(sorted_trades) < min_trades: + print(f" SKIP: Token {token_address} has only {len(sorted_trades)} trades (min required: {min_trades}). skipping fetches.") + return None + # 3. Fetch other events transfers = self.fetch_transfers_for_token(token_address, max_limit_time, 0.0) # 0.0 means fetch all pool_creations = self.fetch_pool_creations_for_token(token_address, max_limit_time) diff --git a/data/data_loader.py b/data/data_loader.py index bc6b2e621317a9d495269f81f7bba218db233b2e..1f128c86fb4bd4498fb0e180681f1ee98b2b4dd8 100644 --- a/data/data_loader.py +++ b/data/data_loader.py @@ -987,8 +987,12 @@ class OracleDataset(Dataset): mint_timestamp=t0, max_horizon_seconds=self.max_cache_horizon_seconds, include_wallet_data=False, - include_graph=False + include_graph=False, + min_trades=25 ) + if raw_data is None: + return None + def _timestamp_to_order_value(ts_value: Any) -> float: if isinstance(ts_value, datetime.datetime): if ts_value.tzinfo is None: @@ -1052,896 +1056,6 @@ class OracleDataset(Dataset): raw_data["protocol_id"] = initial_mint_record.get("protocol") return raw_data - # Legacy full-sample caching path (unused). - - # The EmbeddingPooler is crucial for collecting unique text/images per sample - pooler = EmbeddingPooler() - - def _safe_int(value: Any) -> int: - try: - return int(value) - except (TypeError, ValueError): - return 0 - - def _timestamp_to_order_value(ts_value: Any) -> float: - if isinstance(ts_value, datetime.datetime): - if ts_value.tzinfo is None: - ts_value = ts_value.replace(tzinfo=datetime.timezone.utc) - return ts_value.timestamp() - try: - return float(ts_value) - except (TypeError, ValueError): - return 0.0 - - def _event_execution_sort_key(timestamp_value: Any, - slot: Any = 0, - transaction_index: Any = 0, - instruction_index: Any = 0, - signature: str = '') -> tuple: - return ( - _timestamp_to_order_value(timestamp_value), - _safe_int(slot), - _safe_int(transaction_index), - _safe_int(instruction_index), - signature or '' - ) - - - - # 1. Fetch anchor Mint event to establish the timeline & initial entities - # --- SIMPLIFIED: Use the mint record we already have --- - mint_event = { - 'event_type': 'Mint', - 'timestamp': int(initial_mint_record['timestamp'].timestamp()), - 'relative_ts': 0, - 'wallet_address': initial_mint_record['creator_address'], - 'token_address': token_address, - 'protocol_id': initial_mint_record.get('protocol') - } - - initial_entities = {mint_event['wallet_address']} - event_sequence_entries: List[Tuple[tuple, Dict[str, Any]]] = [] - - def _register_event(event: Dict[str, Any], sort_key: tuple): - event_sequence_entries.append((sort_key, event)) - - _register_event(mint_event, _event_execution_sort_key(mint_event['timestamp'], signature='Mint')) - - # Determine the cutoff time for all historical data fetching - # T_cutoff = datetime.datetime.fromtimestamp(event_sequence[-1]['timestamp'], tz=datetime.timezone.utc) - # --- MODIFIED: Set T_cutoff to mint timestamp + 1 day --- - T_cutoff = initial_mint_record['timestamp'] + datetime.timedelta(seconds=self.t_cutoff_seconds) - max_horizon_seconds = max(self.horizons_seconds) if self.horizons_seconds else 0 - future_trades_for_labels: List[Dict[str, Any]] = [] - if self.num_outputs > 0 and max_horizon_seconds > 0: - future_window_end = T_cutoff + datetime.timedelta(seconds=max_horizon_seconds) - future_trades_for_labels = self.fetcher.fetch_future_trades_for_token( - token_address, T_cutoff, future_window_end - ) - if not future_trades_for_labels: - print(f"INFO: Skipping token {token_address} (no future trades beyond cutoff).") - return None - - # --- NEW: Accumulate all wallets before hitting Neo4j to avoid duplicate queries --- - graph_seed_entities = set(initial_entities) - all_graph_entities: Dict[str, str] = {mint_event['wallet_address']: 'Wallet'} - all_graph_entity_addrs = set(all_graph_entities.keys()) - graph_links: Dict[str, Any] = {} - - # 3. Fetch trades and add traders to the entity set - # --- REFACTORED: Fetch trades using the new 3-part HBH system --- - early_trades, middle_trades, recent_trades = self.fetcher.fetch_trades_for_token( - token_address, T_cutoff, EVENT_COUNT_THRESHOLD_FOR_HBH, HBH_EARLY_EVENT_LIMIT, HBH_RECENT_EVENT_LIMIT - ) - def _trade_execution_sort_key(trade: Dict[str, Any]) -> tuple: - return ( - _timestamp_to_order_value(trade.get('timestamp')), - _safe_int(trade.get('slot')), - _safe_int(trade.get('transaction_index')), - _safe_int(trade.get('instruction_index')), - trade.get('signature', '') - ) - - early_trades = sorted(early_trades, key=_trade_execution_sort_key) - middle_trades = sorted(middle_trades, key=_trade_execution_sort_key) - recent_trades = sorted(recent_trades, key=_trade_execution_sort_key) - - # --- NEW: Inject special context tokens to mark HBH boundaries --- - # 'Middle' marks the start of the blurry middle window - if middle_trades: - mid_ts_val = _timestamp_to_order_value(middle_trades[0].get('timestamp')) - middle_event = { - 'event_type': 'Middle', - 'timestamp': int(mid_ts_val), - 'relative_ts': mid_ts_val - _timestamp_to_order_value(t0) - } - _register_event(middle_event, _event_execution_sort_key(mid_ts_val, signature='Middle')) - - # 'RECENT' marks the start of the high-definition recent window - if recent_trades: - rec_ts_val = _timestamp_to_order_value(recent_trades[0].get('timestamp')) - recent_event = { - 'event_type': 'RECENT', - 'timestamp': int(rec_ts_val), - 'relative_ts': rec_ts_val - _timestamp_to_order_value(t0) - } - _register_event(recent_event, _event_execution_sort_key(rec_ts_val, signature='RECENT')) - - # For now, we only process the high-definition segments for event creation, - # deduplicated in case of overlap between early/recent slices. - trade_records = [] - seen_trade_keys = set() - for trade in early_trades + recent_trades: - dedupe_key = ( - _safe_int(trade.get('slot')), - _safe_int(trade.get('transaction_index')), - _safe_int(trade.get('instruction_index')), - trade.get('signature', '') - ) - if dedupe_key in seen_trade_keys: - continue - seen_trade_keys.add(dedupe_key) - trade_records.append(trade) - - # --- NEW: Correctly detect bundles BEFORE filtering --- - # trade_records are ordered by (timestamp, slot, transaction_index, instruction_index), - # so adjacent entries that share a slot belong to the same bundle. - # We mark them in the raw record so the flag persists after filtering. - if len(trade_records) > 1: - for i in range(1, len(trade_records)): - if trade_records[i]['slot'] == trade_records[i-1]['slot']: - trade_records[i]['is_bundle'] = True - trade_records[i-1]['is_bundle'] = True - - for trade in trade_records: - trader_addr = trade['maker'] - if trader_addr not in all_graph_entity_addrs: - all_graph_entity_addrs.add(trader_addr) - all_graph_entities[trader_addr] = 'Wallet' # Trades are always made by wallets - graph_seed_entities.add(trader_addr) - - # --- REFACTORED: Fetch significant transfers, passing total supply for filtering --- - raw_total_supply = initial_mint_record.get('total_supply', 0) - base_decimals = initial_mint_record.get('token_decimals', 9) - total_supply_dec = (raw_total_supply / (10**base_decimals)) if base_decimals > 0 else raw_total_supply - - # Calculate the minimum amount to be considered a significant transfer - total_supply_dec = total_supply_dec * MIN_AMOUNT_TRANSFER_SUPPLY # 0.01% of total supply - - transfer_records = self.fetcher.fetch_transfers_for_token(token_address, T_cutoff, total_supply_dec) - for transfer in transfer_records: - src = transfer.get('source') - dst = transfer.get('destination') - if src: - all_graph_entities[src] = 'Wallet' - graph_seed_entities.add(src) - if dst: - all_graph_entities[dst] = 'Wallet' - graph_seed_entities.add(dst) - - # --- NEW: Fetch pool creation events to enrich entity set and token list --- - pool_creation_records = self.fetcher.fetch_pool_creations_for_token(token_address, T_cutoff) - pool_quote_addresses = set() - pool_metadata_by_address: Dict[str, Dict[str, Any]] = {} - for pool_record in pool_creation_records: - creator_addr = pool_record.get('creator_address') - if creator_addr: - all_graph_entities[creator_addr] = 'Wallet' - graph_seed_entities.add(creator_addr) - quote_addr = pool_record.get('quote_address') - if quote_addr: - pool_quote_addresses.add(quote_addr) - # Mark discovered quote tokens so they can be fetched later if needed - all_graph_entities.setdefault(quote_addr, 'Token') - pool_addr = pool_record.get('pool_address') - if pool_addr: - pool_metadata_by_address[pool_addr] = { - 'quote_token_address': quote_addr, - 'quote_decimals': pool_record.get('quote_decimals'), - 'base_decimals': pool_record.get('base_decimals') - } - - liquidity_change_records = self.fetcher.fetch_liquidity_changes_for_pools(list(pool_metadata_by_address.keys()), T_cutoff) - for liquidity_record in liquidity_change_records: - lp_provider = liquidity_record.get('lp_provider') - if lp_provider: - all_graph_entities[lp_provider] = 'Wallet' - graph_seed_entities.add(lp_provider) - - fee_collection_records = self.fetcher.fetch_fee_collections_for_token(token_address, T_cutoff) - burn_records = self.fetcher.fetch_burns_for_token(token_address, T_cutoff) - supply_lock_records = self.fetcher.fetch_supply_locks_for_token(token_address, T_cutoff) - migration_records = self.fetcher.fetch_migrations_for_token(token_address, T_cutoff) - # NEW: Fetch top holders to include their wallets so we can embed them - holder_records = self.fetcher.fetch_token_holders_for_snapshot(token_address, T_cutoff, limit=HOLDER_SNAPSHOT_TOP_K) - fee_related_mints = set() - for fee_record in fee_collection_records: - recipient = fee_record.get('recipient_address') - if recipient: - all_graph_entities[recipient] = 'Wallet' - graph_seed_entities.add(recipient) - mint_addr = fee_record.get('token_0_mint_address') - if mint_addr and mint_addr not in (token_address, ''): - fee_related_mints.add(mint_addr) - # Include migration pool addresses as tokens/entities if present - for mig in migration_records: - vpool = mig.get('virtual_pool_address') - paddr = mig.get('pool_address') - if vpool: - all_graph_entities.setdefault(vpool, 'Token') - if paddr: - all_graph_entities.setdefault(paddr, 'Token') - - # Include burner wallets in entity set - for burn in burn_records: - src = burn.get('source') - if src: - all_graph_entities[src] = 'Wallet' - graph_seed_entities.add(src) - # Include holder wallets in entity set for embedding availability - for rec in holder_records: - wa = rec.get('wallet_address') - if wa: - all_graph_entities[wa] = 'Wallet' - graph_seed_entities.add(wa) - # Include lockers in entity set - for lock in supply_lock_records: - sender = lock.get('sender') - recipient = lock.get('recipient') - if sender: - all_graph_entities[sender] = 'Wallet' - graph_seed_entities.add(sender) - if recipient: - all_graph_entities[recipient] = 'Wallet' - graph_seed_entities.add(recipient) - - # --- NEW: Now that all wallets are known, fetch graph links once --- - if graph_seed_entities: - fetched_graph_entities, graph_links = self.fetcher.fetch_graph_links( - list(graph_seed_entities), - T_cutoff=T_cutoff, - max_degrees=1 - ) - for addr, entity_type in fetched_graph_entities.items(): - all_graph_entities[addr] = entity_type - all_graph_entity_addrs = set(all_graph_entities.keys()) - - # 4. Fetch and process static data for the main token - tokens_to_fetch = [token_address] - for quote_addr in pool_quote_addresses: - if quote_addr and quote_addr not in tokens_to_fetch: - tokens_to_fetch.append(quote_addr) - for mint_addr in fee_related_mints: - if mint_addr and mint_addr not in tokens_to_fetch: - tokens_to_fetch.append(mint_addr) - main_metadata = {} - main_metadata[token_address] = { - 'name': initial_mint_record["token_name"], - 'symbol': initial_mint_record["token_symbol"], - 'token_uri': initial_mint_record["token_uri"], - 'protocol': initial_mint_record["protocol"], - 'total_supply': initial_mint_record["total_supply"], - 'decimals': initial_mint_record["token_decimals"], - 'address': token_address - } - - main_token_data = self._process_token_data(tokens_to_fetch, pooler, T_cutoff, main_metadata) - - # --- CRITICAL FIX: If the main token is invalid, skip this entire sample --- - if not main_token_data: - return None # The specific reason is already logged in _process_token_data - - # 5. Fetch and process data for ALL wallets discovered (from mint, graph, trades, etc.) - # --- FIXED: Correctly identify wallets using their entity type from the graph --- - wallets_to_fetch = [addr for addr, type in all_graph_entities.items() if type == 'Wallet'] - # Also include traders from trades, even if they weren't in the graph - wallets_to_fetch.extend([trade['maker'] for trade in trade_records if trade['maker'] not in wallets_to_fetch]) - wallet_data, all_token_data = self._process_wallet_data(list(set(wallets_to_fetch)), main_token_data.copy(), pooler, T_cutoff) - - # 6. Process trades into event format using the now-available wallet_data - trade_events = [] - - aggregation_trades = [] - high_def_chart_trades = [] # Early + recent windows use 1s candles - middle_chart_trades = [] # Middle window uses 30s candles - # --- FIXED: Get main token decimals once before the loop --- - main_token_info = main_token_data[token_address] - base_decimals = main_token_info.get('decimals', 6) - # --- FIXED: Get total_supply directly from the initial mint record --- - raw_total_supply = initial_mint_record.get('total_supply', 0) - total_supply_dec = (raw_total_supply / (10**base_decimals)) if base_decimals > 0 else raw_total_supply - print("SUPPLY", total_supply_dec) - - t0_timestamp = _timestamp_to_order_value(t0) - - for trade in trade_records: - # --- NEW: Filter out trades with low USD value --- - # This applies to both event creation and chart aggregation. - if trade.get('total_usd', 0.0) < self.min_trade_usd: - continue - - trade_sort_key = _trade_execution_sort_key(trade) - trade_timestamp = trade.get('timestamp') - trade_timestamp_value = _timestamp_to_order_value(trade_timestamp) - trade_timestamp_int = int(trade_timestamp_value) - # --- NEW: Determine event type with priority --- - trader_addr = trade['maker'] - trader_wallet_data = wallet_data.get(trader_addr, {}) - trader_profile = trader_wallet_data.get('profile', {}) - trader_socials = trader_wallet_data.get('socials', {}) - - KOL_NAME_KEYS = ['kolscan_name', 'cabalspy_name', 'axiom_kol_name'] - is_kol = any(trader_socials.get(key) for key in KOL_NAME_KEYS if trader_socials) - is_profitable = (trader_profile.get('stats_30d_realized_profit_pnl', 0.0) > SMART_WALLET_PNL_THRESHOLD and - trader_profile.get('stats_30d_realized_profit_usd', 0.0) > SMART_WALLET_USD_THRESHOLD) - - base_amount_dec = trade.get('base_amount', 0) / (10**base_decimals) - is_large_amount = (total_supply_dec > 0 and (base_amount_dec / total_supply_dec) > LARGE_TRADE_SUPPLY_PCT_THRESHOLD) - - if trader_addr == creator_address: - event_type = 'Deployer_Trade' - elif is_kol or is_profitable: - event_type = 'SmartWallet_Trade' - elif trade.get('total_usd', 0.0) > LARGE_TRADE_USD_THRESHOLD or is_large_amount: - event_type = 'LargeTrade' - else: - event_type = 'Trade' - - # --- NEW: Get token decimals for accurate calculations --- - quote_address = trade.get('quote_address') - quote_decimals = QUOTE_TOKEN_DECIMALS.get(quote_address, 9) # Default to 9 for SOL - - quote_amount_dec = trade.get('quote_amount', 0) / (10**quote_decimals) - - # --- NEW: Correctly calculate pre-trade balances --- - is_sell = trade.get('trade_type') == 1 - - # If it's a sell, the pre-trade base balance was higher. - pre_trade_base_balance = (trade.get('base_balance', 0.0) + base_amount_dec) if is_sell else trade.get('base_balance', 0.0) - # If it's a buy, the pre-trade quote balance was higher. - pre_trade_quote_balance = (trade.get('quote_balance', 0.0) + quote_amount_dec) if not is_sell else trade.get('quote_balance', 0.0) - - # --- NEW: Calculate percentage features with the corrected values --- - token_amount_pct = (base_amount_dec / pre_trade_base_balance) if pre_trade_base_balance > 1e-9 else 1.0 - quote_amount_pct = (quote_amount_dec / pre_trade_quote_balance) if pre_trade_quote_balance > 1e-9 else 1.0 - is_success = trade.get('success', False) - if is_success: - chart_entry = { - 'trade_direction': 1 if is_sell else 0, # 1 for sell, 0 for buy, - 'price_usd': trade.get('price_usd', 0.0), - 'timestamp': trade_timestamp_int, - 'sort_key': trade_sort_key, - } - aggregation_trades.append(chart_entry) - high_def_chart_trades.append(chart_entry.copy()) - # --- NEW: Calculate token amount as a percentage of total supply --- - token_amount_pct_of_supply = (base_amount_dec / total_supply_dec) if total_supply_dec > 0 else 0.0 - trade_event = { - 'event_type': event_type, - 'timestamp': trade_timestamp_int, - 'relative_ts': trade_timestamp_value - t0_timestamp, - 'wallet_address': trade['maker'], - 'token_address': token_address, - 'trade_direction': 1 if is_sell else 0, # 1 for sell, 0 for buy - 'sol_amount': trade.get('total', 0.0), # Assuming 'total' is the SOL amount - 'dex_platform_id': trade.get('platform', 0), - 'priority_fee': trade.get('priority_fee', 0.0), - 'mev_protection': 1 if trade.get('mev_protection', 0) > 0 else 0, # Convert to binary: 0 for False, 1 for True - # --- FIXED: Use the new, correct percentage calculations --- - 'token_amount_pct_of_holding': token_amount_pct, - 'quote_amount_pct_of_holding': quote_amount_pct, - 'slippage': trade.get('slippage', 0.0), - 'token_amount_pct_to_total_supply': token_amount_pct_of_supply, # FIXED: Replaced price_impact - 'success': is_success, - 'success': is_success, - 'is_bundle': trade.get('is_bundle', False), # Use pre-calculated flag - 'total_usd': trade.get('total_usd', 0.0) - } - trade_events.append(trade_event) - _register_event(trade_event, trade_sort_key) - - for trade in middle_trades: - # --- NEW: Filter out trades with low USD value from chart aggregation --- - if trade.get('total_usd', 0.0) < self.min_trade_usd: - continue - - # --- NEW: Correctly calculate pre-trade balances --- - is_sell = trade.get('trade_type') == 1 - - chart_entry = { - 'trade_direction': 1 if is_sell else 0, # 1 for sell, 0 for buy, - 'price_usd': trade.get('price_usd', 0.0), - 'timestamp': int(_timestamp_to_order_value(trade.get('timestamp'))), - 'sort_key': _trade_execution_sort_key(trade), - } - aggregation_trades.append(chart_entry) - middle_chart_trades.append(chart_entry.copy()) - - def _finalize_chart_trade_list(trade_list: List[Dict[str, Any]]): - trade_list.sort(key=lambda x: x['sort_key']) - for entry in trade_list: - entry.pop('sort_key', None) - - _finalize_chart_trade_list(aggregation_trades) - _finalize_chart_trade_list(high_def_chart_trades) - _finalize_chart_trade_list(middle_chart_trades) - - # --- NEW: Debugging log for all trades used in chart generation --- - print(f"\n[DEBUG] Total aggregated trades for OHLC: {len(aggregation_trades)}") - if aggregation_trades: - print("[DEBUG] First 5 aggregated trades:", aggregation_trades[:5]) - - HIGH_DEF_INTERVAL = ("1s", 1) - MIDDLE_INTERVAL = ("30s", 30) - - def _emit_chart_segments(trades: List[Dict[str, Any]], interval: tuple, signature_prefix: str): - if not trades: - return [] - interval_label, interval_seconds = interval - ohlc_series = self._generate_ohlc(trades, T_cutoff, interval_seconds) - print(f"[DEBUG] Generated OHLC series ({interval_label}) with {len(ohlc_series)} candles. First 5: {ohlc_series[:5]}") - emitted_events = [] - for idx in range(0, len(ohlc_series), OHLC_SEQ_LEN): - segment = ohlc_series[idx:idx + OHLC_SEQ_LEN] - if not segment: - continue - last_ts = segment[-1][0] - opens_raw = [s[1] for s in segment] - closes_raw = [s[2] for s in segment] - chart_event = { - 'event_type': 'Chart_Segment', - 'timestamp': last_ts, - 'relative_ts': last_ts - t0_timestamp, - 'opens': self._normalize_price_series(opens_raw), - 'closes': self._normalize_price_series(closes_raw), - 'i': interval_label - } - emitted_events.append(chart_event) - _register_event(chart_event, _event_execution_sort_key(last_ts, signature=f"{signature_prefix}-{idx}")) - return emitted_events - - # --- NEW: Generate Chart_Segment events from aggregated trades --- - chart_events = [] - chart_events.extend(_emit_chart_segments(high_def_chart_trades, HIGH_DEF_INTERVAL, "chart-hd")) - chart_events.extend(_emit_chart_segments(middle_chart_trades, MIDDLE_INTERVAL, "chart-mid")) - - # --- NEW: Convert pool creation records into structured events --- - SOL_MINT_ADDRESS = 'So11111111111111111111111111111111111111112' - - def _convert_amount_with_decimals(raw_amount: Any, mint_addr: Optional[str]) -> float: - if raw_amount is None: - return 0.0 - try: - amount_float = float(raw_amount) - except (TypeError, ValueError): - return 0.0 - decimals_value = None - if mint_addr == SOL_MINT_ADDRESS: - decimals_value = QUOTE_TOKEN_DECIMALS.get(SOL_MINT_ADDRESS, 9) - elif mint_addr: - token_info = all_token_data.get(mint_addr) or main_token_data.get(mint_addr) - if token_info: - decimals_value = token_info.get('decimals') - if decimals_value is None: - return amount_float - try: - decimals_int = max(int(decimals_value), 0) - except (TypeError, ValueError): - decimals_int = 0 - if decimals_int <= 0: - return amount_float - if mint_addr == SOL_MINT_ADDRESS: - should_scale = abs(amount_float) >= 1e5 - else: - should_scale = abs(amount_float) >= (10 ** decimals_int) - return amount_float / (10 ** decimals_int) if should_scale else amount_float - - pool_created_events = [] - for pool_record in pool_creation_records: - pool_ts_value = _timestamp_to_order_value(pool_record.get('timestamp')) - pool_timestamp_int = int(pool_ts_value) - - quote_token_address = pool_record.get('quote_address') - - base_liquidity_raw = pool_record.get('initial_base_liquidity') - base_decimals_override = pool_record.get('base_decimals') - if base_decimals_override is None: - base_decimals_override = main_token_info.get('decimals', base_decimals) - base_decimals_value = int(base_decimals_override) if base_decimals_override is not None else int(base_decimals) - base_amount_dec = _convert_amount_with_decimals(base_liquidity_raw, token_address) - - quote_liquidity_raw = pool_record.get('initial_quote_liquidity') - quote_decimals_override = pool_record.get('quote_decimals') - if quote_decimals_override is None: - quote_token_info = main_token_data.get(quote_token_address, {}) - quote_decimals_override = quote_token_info.get('decimals', QUOTE_TOKEN_DECIMALS.get(quote_token_address, 9)) - if quote_decimals_override is None: - quote_decimals_override = 9 - quote_decimals_value = int(quote_decimals_override) - quote_amount_dec = _convert_amount_with_decimals(quote_liquidity_raw, quote_token_address) - - protocol_raw = pool_record.get('protocol') - protocol_id = protocol_raw if isinstance(protocol_raw, int) and 0 <= protocol_raw < vocab.NUM_PROTOCOLS else vocab.PROTOCOL_TO_ID.get('Unknown', 0) - - pool_event = { - 'event_type': 'PoolCreated', - 'timestamp': pool_timestamp_int, - 'relative_ts': pool_ts_value - t0_timestamp, - 'wallet_address': pool_record.get('creator_address'), - 'token_address': token_address, - 'protocol_id': protocol_id, - 'quote_token_address': quote_token_address, - 'base_amount': base_amount_dec, - 'quote_amount': quote_amount_dec, - 'priority_fee': pool_record.get('priority_fee', 0.0), - } - pool_created_events.append(pool_event) - pool_sort_key = _event_execution_sort_key( - pool_ts_value, - slot=pool_record.get('slot'), - transaction_index=0, - instruction_index=0, - signature=pool_record.get('signature', '') - ) - _register_event(pool_event, pool_sort_key) - - # --- NEW: Convert liquidity change records into structured events --- - liquidity_change_events = [] - for liquidity_record in liquidity_change_records: - pool_address = liquidity_record.get('pool_address') - pool_meta = pool_metadata_by_address.get(pool_address, {}) - quote_token_address = pool_meta.get('quote_token_address') - - quote_decimals_override = pool_meta.get('quote_decimals') - if quote_decimals_override is None: - quote_token_info = main_token_data.get(quote_token_address, {}) - quote_decimals_override = quote_token_info.get('decimals', QUOTE_TOKEN_DECIMALS.get(quote_token_address, 9)) - if quote_decimals_override is None: - quote_decimals_override = 9 - - quote_amount_raw = liquidity_record.get('quote_amount', 0) - quote_decimals_value = int(quote_decimals_override) - quote_amount_dec = _convert_amount_with_decimals(quote_amount_raw, quote_token_address) - - liquidity_ts_value = _timestamp_to_order_value(liquidity_record.get('timestamp')) - liquidity_timestamp_int = int(liquidity_ts_value) - - protocol_raw = liquidity_record.get('protocol') - protocol_id = protocol_raw if isinstance(protocol_raw, int) and 0 <= protocol_raw < vocab.NUM_PROTOCOLS else vocab.PROTOCOL_TO_ID.get('Unknown', 0) - change_type_id = int(liquidity_record.get('change_type', 0) or 0) - - liquidity_event = { - 'event_type': 'LiquidityChange', - 'timestamp': liquidity_timestamp_int, - 'relative_ts': liquidity_ts_value - t0_timestamp, - 'wallet_address': liquidity_record.get('lp_provider'), - 'token_address': token_address, - 'protocol_id': protocol_id, - 'quote_token_address': quote_token_address, - 'change_type_id': change_type_id, - 'quote_amount': quote_amount_dec, - 'priority_fee': liquidity_record.get('priority_fee', 0.0), - 'success': liquidity_record.get('success', False) - } - - if quote_token_address: - liquidity_change_events.append(liquidity_event) - liquidity_sort_key = _event_execution_sort_key( - liquidity_ts_value, - slot=liquidity_record.get('slot'), - transaction_index=0, - instruction_index=0, - signature=liquidity_record.get('signature', '') - ) - _register_event(liquidity_event, liquidity_sort_key) - - # --- NEW: Convert fee collection records into structured events --- - fee_collected_events = [] - for fee_record in fee_collection_records: - fee_ts_value = _timestamp_to_order_value(fee_record.get('timestamp')) - fee_timestamp_int = int(fee_ts_value) - - token0_mint = fee_record.get('token_0_mint_address') - token1_mint = fee_record.get('token_1_mint_address') - token0_amount_raw = fee_record.get('token_0_amount') - token1_amount_raw = fee_record.get('token_1_amount') - - sol_amount = 0.0 - if token0_mint == SOL_MINT_ADDRESS: - sol_amount = _convert_amount_with_decimals(token0_amount_raw, SOL_MINT_ADDRESS) - elif token1_mint == SOL_MINT_ADDRESS: - sol_amount = _convert_amount_with_decimals(token1_amount_raw, SOL_MINT_ADDRESS) - - # Skip if both amounts are zero and no meaningful wallet - recipient_addr = fee_record.get('recipient_address') - if not recipient_addr: - continue - - fee_event = { - 'event_type': 'FeeCollected', - 'timestamp': fee_timestamp_int, - 'relative_ts': fee_ts_value - t0_timestamp, - 'wallet_address': recipient_addr, - 'token_address': token_address, - 'sol_amount': sol_amount, - 'priority_fee': fee_record.get('priority_fee', 0.0), - 'protocol_id': fee_record.get('protocol', 0), - 'success': fee_record.get('success', False), - } - - fee_collected_events.append(fee_event) - fee_sort_key = _event_execution_sort_key( - fee_ts_value, - slot=fee_record.get('slot'), - transaction_index=0, - instruction_index=0, - signature=fee_record.get('signature', '') - ) - _register_event(fee_event, fee_sort_key) - - # --- NEW: Convert burn records into structured TokenBurn events --- - token_burn_events = [] - for burn in burn_records: - burn_ts_value = _timestamp_to_order_value(burn.get('timestamp')) - burn_timestamp_int = int(burn_ts_value) - - amount_dec = burn.get('amount_decimal') - if amount_dec is None: - raw_amount = burn.get('amount', 0) - try: - raw_amount = float(raw_amount) - except (TypeError, ValueError): - raw_amount = 0.0 - amount_dec = raw_amount / (10**base_decimals) if base_decimals and base_decimals > 0 else raw_amount - - pct_of_supply = (amount_dec / total_supply_dec) if total_supply_dec and total_supply_dec > 0 else 0.0 - - burn_event = { - 'event_type': 'TokenBurn', - 'timestamp': burn_timestamp_int, - 'relative_ts': burn_ts_value - t0_timestamp, - 'wallet_address': burn.get('source'), - 'token_address': token_address, - 'amount_pct_of_total_supply': pct_of_supply, - 'amount_tokens_burned': amount_dec, - 'priority_fee': burn.get('priority_fee', 0.0), - 'success': burn.get('success', False), - } - token_burn_events.append(burn_event) - burn_sort_key = _event_execution_sort_key( - burn_ts_value, - slot=burn.get('slot'), - transaction_index=0, - instruction_index=0, - signature=burn.get('signature', '') - ) - _register_event(burn_event, burn_sort_key) - - # --- NEW: Convert migrations into Migrated events --- - for mig in migration_records: - mig_ts_value = _timestamp_to_order_value(mig.get('timestamp')) - mig_timestamp_int = int(mig_ts_value) - prot_raw = mig.get('protocol', 0) - protocol_id = prot_raw if isinstance(prot_raw, int) and 0 <= prot_raw < vocab.NUM_PROTOCOLS else vocab.PROTOCOL_TO_ID.get('Unknown', 0) - mig_event = { - 'event_type': 'Migrated', - 'timestamp': mig_timestamp_int, - 'relative_ts': mig_ts_value - t0_timestamp, - 'protocol_id': protocol_id, - } - mig_sort_key = _event_execution_sort_key( - mig_ts_value, - slot=mig.get('slot'), - transaction_index=0, - instruction_index=0, - signature=mig.get('signature', '') - ) - _register_event(mig_event, mig_sort_key) - - # NOTE: HolderSnapshot events are generated per-snapshot time inside _generate_onchain_snapshots - - # --- NEW: Convert supply lock records into structured SupplyLock events --- - supply_lock_events = [] - for lock in supply_lock_records: - lock_ts_value = _timestamp_to_order_value(lock.get('timestamp')) - lock_timestamp_int = int(lock_ts_value) - - # total_locked_amount is Float64, typically already decimal-scaled - raw_locked = lock.get('total_locked_amount', 0.0) - try: - locked_amount = float(raw_locked) - except (TypeError, ValueError): - locked_amount = 0.0 - - pct_of_supply = (locked_amount / total_supply_dec) if total_supply_dec and total_supply_dec > 0 else 0.0 - - final_unlock_ts = lock.get('final_unlock_timestamp') or 0 - try: - final_unlock_ts = int(final_unlock_ts) - except (TypeError, ValueError): - final_unlock_ts = 0 - lock_duration = max(0, final_unlock_ts - lock_timestamp_int) - - lock_event = { - 'event_type': 'SupplyLock', - 'timestamp': lock_timestamp_int, - 'relative_ts': lock_ts_value - t0_timestamp, - 'wallet_address': lock.get('sender'), - 'token_address': token_address, - 'amount_pct_of_total_supply': pct_of_supply, - 'lock_duration': float(lock_duration), - 'priority_fee': lock.get('priority_fee', 0.0), - 'success': lock.get('success', False), - } - supply_lock_events.append(lock_event) - lock_sort_key = _event_execution_sort_key( - lock_ts_value, - slot=lock.get('slot'), - transaction_index=0, - instruction_index=0, - signature=lock.get('signature', '') - ) - _register_event(lock_event, lock_sort_key) - - # --- NEW: Process transfer events with strict validation --- - transfer_events = [] - for transfer in transfer_records: - print("BOMBOCLAT TRANSFER", transfer) - # --- VALIDATION: Ensure the destination wallet has a valid profile --- - if transfer['destination'] not in wallet_data: - print(f"INFO: Skipping transfer event {transfer['signature']} because destination wallet {transfer['destination']} has no profile.") - continue - - # Calculate features - token_amount = transfer.get('amount_decimal', 0.0) - pct_of_supply = (token_amount / total_supply_dec) if total_supply_dec > 0 else 0.0 - - # Reconstruct pre-transfer balance of the source wallet - pre_transfer_source_balance = transfer.get('source_balance', 0.0) + token_amount - pct_of_holding = (token_amount / pre_transfer_source_balance) if pre_transfer_source_balance > 1e-9 else 1.0 - - # --- NEW: Classify LargeTransfer based on supply percentage --- - if pct_of_supply > LARGE_TRANSFER_SUPPLY_PCT_THRESHOLD: - event_type = 'LargeTransfer' - else: - event_type = 'Transfer' - - transfer_ts_value = _timestamp_to_order_value(transfer.get('timestamp')) - transfer_event = { - 'event_type': event_type, - 'timestamp': int(transfer_ts_value), - 'relative_ts': transfer_ts_value - t0_timestamp, - 'wallet_address': transfer['source'], - 'destination_wallet_address': transfer['destination'], - 'token_address': token_address, - 'token_amount': token_amount, - 'transfer_pct_of_total_supply': pct_of_supply, - 'transfer_pct_of_holding': pct_of_holding, - 'priority_fee': transfer.get('priority_fee', 0.0) - } - transfer_events.append(transfer_event) - transfer_sort_key = _event_execution_sort_key( - transfer_ts_value, - slot=transfer.get('slot'), - transaction_index=transfer.get('transaction_index'), - instruction_index=transfer.get('instruction_index'), - signature=transfer.get('signature', '') - ) - _register_event(transfer_event, transfer_sort_key) - - # --- NEW: Bundle detection moved to before trade_events generation to avoid index errors --- - # (See lines ~906) - - - # Generate OnChain_Snapshot events using helper - print(f"[DEBUG-TRACE] Calling _generate_onchain_snapshots for {token_address}") - self._generate_onchain_snapshots( - token_address=token_address, - t0_timestamp=t0_timestamp, - T_cutoff=T_cutoff, - interval_sec=HOLDER_SNAPSHOT_INTERVAL_SEC, - trade_events=trade_events, - transfer_events=transfer_events, - aggregation_trades=aggregation_trades, - wallet_data=wallet_data, - total_supply_dec=total_supply_dec, - _register_event_fn=_register_event - ) - - # 7. TODO: Fetch social events (tweets, replies, etc.) for all discovered wallets - # - Query tables like 'x_posts', 'pump_replies'. - # - Use the pooler to get indices for text and media. - - # Sort the combined event sequence by precise execution order - event_sequence_entries.sort(key=lambda entry: entry[0]) - event_sequence = [event for _, event in event_sequence_entries] - - anchor_timestamp_int = int(_timestamp_to_order_value(T_cutoff)) - anchor_price = None - print(f"[DEBUG-TRACE] Calculating anchor price. aggregation_trades len: {len(aggregation_trades)}") - if aggregation_trades: - for trade in reversed(aggregation_trades): - price_val = trade.get('price_usd') - if price_val is not None: - anchor_price = float(price_val) - break - if self.num_outputs > 0 and anchor_price is None: - print(f"INFO: Skipping token {token_address} (no pre-cutoff price for labeling).") - return None - - future_price_series: List[Tuple[int, float]] = [] - if (self.num_outputs > 0 and max_horizon_seconds > 0 and - anchor_price is not None): - timeline = [(anchor_timestamp_int, anchor_price)] - for trade in future_trades_for_labels: - price_val = trade.get('price_usd') - if price_val is None: - continue - ts_int = int(_timestamp_to_order_value(trade.get('timestamp'))) - if ts_int <= timeline[-1][0]: - continue - timeline.append((ts_int, float(price_val))) - if len(timeline) > 1: - future_price_series = timeline - - debug_label_entries: List[Dict[str, Any]] = [] - if self.num_outputs > 0: - print(f"[DEBUG-TRACE] Calling _compute_future_return_labels. Num outputs: {self.num_outputs}") - labels_tensor, labels_mask_tensor, debug_label_entries = self._compute_future_return_labels( - anchor_price, anchor_timestamp_int, future_price_series - ) - if labels_mask_tensor.sum() == 0: - print(f"INFO: Skipping token {token_address} (no valid horizons in future).") - return None - print("\n[Label Debug]") - for entry in debug_label_entries: - print(f" Horizon {entry['horizon']}s -> target_ts={entry['target_ts']}, " - f"future_price={entry['future_price']}, return={entry['return']:.6f}, " - f"mask={int(entry['mask'])}") - else: - labels_tensor = torch.zeros(0) - labels_mask_tensor = torch.zeros(0) - - # For now, we'll return the item with mint and trade events - item = { - 'event_sequence': event_sequence, - 'wallets': wallet_data, - 'tokens': all_token_data, # FIXED: Use the comprehensive token data - 'graph_links': graph_links, # NEW: Add the fetched graph links - 'embedding_pooler': pooler, - 'labels': labels_tensor, - 'labels_mask': labels_mask_tensor} - - # --- NEW: Comprehensive logging before returning the item --- - print("\n--- Dataset Item Generation Summary ---") - print(f"Token Address: {token_address}" - ) - print(f"\n[Event Sequence] ({len(item['event_sequence'])} events):") - for i, event in enumerate(item['event_sequence']): - print(f" - Event {i}: {event}") - - print(f"\n[Wallets] ({len(item['wallets'])} wallets):") - for i, (addr, data) in enumerate(item['wallets'].items()): - print(f" - Wallet {addr}:") - print(f" - Profile: {data.get('profile', {})}") - print(f" - Socials: {data.get('socials', {})}") - - print(f"\n[Tokens] ({len(item['tokens'])} tokens):") - for addr, data in item['tokens'].items(): - print(f" - Token {addr}: {data}") - - if self.num_outputs > 0: - print(f"\n[Labels]") - for h_idx, horizon in enumerate(self.horizons_seconds): - offset = h_idx * len(self.quantiles) - values = item['labels'][offset:offset + len(self.quantiles)] - masks = item['labels_mask'][offset:offset + len(self.quantiles)] - print(f" Horizon {horizon}s:") - for q_idx, quantile in enumerate(self.quantiles): - print(f" q={quantile:.2f}: value={values[q_idx]:.6f}, mask={masks[q_idx]:.0f}") - - print("--- End Summary ---\n") - def _generate_dataset_item(self, token_address: str, t0: datetime.datetime, diff --git a/log.log b/log.log index 82b7e3278f833e660e1b1dc7ace4a70f2f0f887d..177a070bba0bce619166ddc2e89a15b4fe0b580b 100644 --- a/log.log +++ b/log.log @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c575df22cf9941bdf678f1fe6167a6e8279ca54cb2e2c96037cd0c86e2ac59e7 -size 53544 +oid sha256:49fbbad7f8b64bdc8c2c12b853433e6ec1e67615288a42112086254d96e0acf5 +size 3985 diff --git a/pre_cache.sh b/pre_cache.sh index fdf8e8ba06f9bd4d458802352a5c69adfd78d5d5..279ec4a21a341313be2d144c71fb1138c89753ea 100644 --- a/pre_cache.sh +++ b/pre_cache.sh @@ -2,14 +2,11 @@ # Pre-caches the dataset for training # Usage: ./pre_cache.sh [max_samples] -MAX_SAMPLES=${1:-1000} +MAX_SAMPLES=${1:-100} echo "Starting dataset caching..." python3 scripts/cache_dataset.py \ --max_samples $MAX_SAMPLES \ - --t_cutoff_seconds 300 \ - --start_date "2024-01-01" \ - --ohlc_stats_path "/workspace/apollo/data/ohlc_stats.npz" \ - --min_trade_usd 10.0 + --ohlc_stats_path "/workspace/apollo/data/ohlc_stats.npz" echo "Done!" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..144a8d0df702722272c33fafea8fab9072a0e9a4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +pandas +pyarrow +clickhouse-connect +tqdm +PyYaml +datasets +transformers +huggingface_hub +decord +#pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu126 +clickhouse-driver +neo4j +tensorboard +accelerate +python-dotenv \ No newline at end of file diff --git a/scripts/cache_dataset.py b/scripts/cache_dataset.py index 131e8a30285d6fc6599194a7f6b673ffe80b36c1..f833ffeca55cf729b24d49500d4265212d523ceb 100644 --- a/scripts/cache_dataset.py +++ b/scripts/cache_dataset.py @@ -37,9 +37,9 @@ def main(): parser = argparse.ArgumentParser(description="Pre-cache dataset samples.") parser.add_argument("--max_samples", type=int, default=100, help="Number of samples to cache.") parser.add_argument("--t_cutoff_seconds", type=int, default=60, help="Deprecated; cutoff is randomized at training time.") - parser.add_argument("--start_date", type=str, default="2024-01-01", help="Start date for filtering mints (YYYY-MM-DD).") + parser.add_argument("--start_date", type=str, default=None, help="Start date for filtering mints (YYYY-MM-DD).") parser.add_argument("--ohlc_stats_path", type=str, default=None, help="Path to OHLC stats JSON.") - parser.add_argument("--min_trade_usd", type=float, default=10.0, help="Minimum trade USD value.") + parser.add_argument("--min_trade_usd", type=float, default=0.0, help="Minimum trade USD value.") args = parser.parse_args() @@ -47,7 +47,9 @@ def main(): output_dir = Path(CACHE_DIR) output_dir.mkdir(parents=True, exist_ok=True) - start_date_dt = datetime.datetime.strptime(args.start_date, "%Y-%m-%d").replace(tzinfo=datetime.timezone.utc) + start_date_dt = None + if args.start_date: + start_date_dt = datetime.datetime.strptime(args.start_date, "%Y-%m-%d").replace(tzinfo=datetime.timezone.utc) # --- 1. Set up database connections --- try: