GitHub Actions commited on
Commit Β·
74b343e
1
Parent(s): 25f0954
Sync from GitHub: 98fcc333e64d341f1b2205dcb535e6f4a4f81eef
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- app.py +109 -64
- hf_space/data/loader.py +8 -26
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +11 -8
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +1 -2
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +184 -191
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +88 -49
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +31 -39
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +119 -88
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +51 -62
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +101 -108
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +68 -51
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +96 -76
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/data/__init__.py +1 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +12 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/__init__.py +1 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py +215 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +110 -14
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py +273 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes +35 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile +20 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md +19 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +3 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/streamlit_app.py +40 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt +29 -3
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py +199 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py +167 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py +1 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py +150 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py +193 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/conviction.py +93 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py +229 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py +144 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/calendar.py +91 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/__init__.py +1 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py +1 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py +1 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py +1 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py +1 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/models/__init__.py +1 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py +1 -1
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py +1 -1
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py +1 -1
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py +1 -1
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py +1 -1
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py +216 -0
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py +67 -93
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py +28 -109
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py +4 -1
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py +31 -101
- hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py +43 -0
app.py
CHANGED
|
@@ -8,6 +8,7 @@ P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
|
|
| 8 |
- Ann. Return compared vs SPY in metrics row
|
| 9 |
- Max Daily DD shows date it occurred
|
| 10 |
- Conviction panel: compact ETF probability list
|
|
|
|
| 11 |
"""
|
| 12 |
|
| 13 |
import os
|
|
@@ -32,6 +33,7 @@ from ui.components import (
|
|
| 32 |
show_metrics_row, show_comparison_table, show_audit_trail,
|
| 33 |
show_all_signals_panel,
|
| 34 |
)
|
|
|
|
| 35 |
|
| 36 |
st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="π§ ", layout="wide")
|
| 37 |
|
|
@@ -43,6 +45,8 @@ for key, default in [
|
|
| 43 |
("test_dates", None), ("test_slice", None), ("optimal_lookback", None),
|
| 44 |
("df_for_chart", None), ("tbill_rate", None), ("target_etfs", None),
|
| 45 |
("from_cache", False),
|
|
|
|
|
|
|
| 46 |
]:
|
| 47 |
if key not in st.session_state:
|
| 48 |
st.session_state[key] = default
|
|
@@ -68,7 +72,6 @@ with st.sidebar:
|
|
| 68 |
# ββ Title βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 69 |
st.title("π§ P2-ETF-CNN-LSTM")
|
| 70 |
st.caption("Approach 1: Wavelet Β· Approach 2: Regime-Conditioned Β· Approach 3: Multi-Scale Parallel")
|
| 71 |
-
st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
|
| 72 |
|
| 73 |
if not HF_TOKEN:
|
| 74 |
st.error("β HF_TOKEN secret not found.")
|
|
@@ -83,6 +86,7 @@ if df_raw.empty:
|
|
| 83 |
|
| 84 |
freshness = check_data_freshness(df_raw)
|
| 85 |
show_freshness_status(freshness)
|
|
|
|
| 86 |
|
| 87 |
# ββ Dataset info sidebar ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 88 |
with st.sidebar:
|
|
@@ -112,7 +116,7 @@ if run_button:
|
|
| 112 |
st.stop()
|
| 113 |
|
| 114 |
n_etfs = len(target_etfs)
|
| 115 |
-
n_classes = n_etfs
|
| 116 |
|
| 117 |
st.info(
|
| 118 |
f"π― **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· "
|
|
@@ -120,7 +124,6 @@ if run_button:
|
|
| 120 |
f"**T-bill:** {tbill_rate*100:.2f}%"
|
| 121 |
)
|
| 122 |
|
| 123 |
-
# ββ Raw arrays ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 124 |
X_raw = df[input_features].values.astype(np.float32)
|
| 125 |
y_raw = np.clip(df[target_etfs].values.astype(np.float32), -0.5, 0.5)
|
| 126 |
|
|
@@ -133,8 +136,6 @@ if run_button:
|
|
| 133 |
if mask.any():
|
| 134 |
y_raw[mask, j] = 0.0
|
| 135 |
|
| 136 |
-
last_date_str = str(freshness.get("last_date_in_data", "unknown"))
|
| 137 |
-
|
| 138 |
# ββ Auto-select lookback ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 139 |
lb_key = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs),
|
| 140 |
split_option, False, 0)
|
|
@@ -186,12 +187,11 @@ if run_button:
|
|
| 186 |
results, trained_info = {}, {}
|
| 187 |
progress = st.progress(0, text="Training Approach 1...")
|
| 188 |
|
| 189 |
-
for approach, train_fn, predict_fn
|
| 190 |
("Approach 1",
|
| 191 |
lambda: train_approach1(X_train_s, y_train_l, X_val_s, y_val_l,
|
| 192 |
n_classes=n_classes, epochs=int(epochs)),
|
| 193 |
-
lambda m: predict_approach1(m[0], X_test_s),
|
| 194 |
-
None),
|
| 195 |
("Approach 2",
|
| 196 |
lambda: train_approach2(X_train_s, y_train_l, X_val_s, y_val_l,
|
| 197 |
X_flat_all=X_raw, feature_names=input_features,
|
|
@@ -199,13 +199,11 @@ if run_button:
|
|
| 199 |
val_size=val_size, n_classes=n_classes,
|
| 200 |
epochs=int(epochs)),
|
| 201 |
lambda m: predict_approach2(m[0], X_test_s, X_raw, m[3], m[2],
|
| 202 |
-
lookback, train_size, val_size),
|
| 203 |
-
None),
|
| 204 |
("Approach 3",
|
| 205 |
lambda: train_approach3(X_train_s, y_train_l, X_val_s, y_val_l,
|
| 206 |
n_classes=n_classes, epochs=int(epochs)),
|
| 207 |
-
lambda m: predict_approach3(m[0], X_test_s),
|
| 208 |
-
None),
|
| 209 |
]:
|
| 210 |
try:
|
| 211 |
model_out = train_fn()
|
|
@@ -229,7 +227,6 @@ if run_button:
|
|
| 229 |
"test_dates": list(test_dates), "test_slice": test_slice,
|
| 230 |
})
|
| 231 |
|
| 232 |
-
# ββ Persist to session state ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 233 |
st.session_state.update({
|
| 234 |
"results": results, "trained_info": trained_info,
|
| 235 |
"test_dates": test_dates, "test_slice": test_slice,
|
|
@@ -238,68 +235,116 @@ if run_button:
|
|
| 238 |
"output_ready": True,
|
| 239 |
})
|
| 240 |
|
| 241 |
-
# ββ
|
| 242 |
-
|
| 243 |
-
st.info("π Configure parameters and click **π Run All 3 Approaches**.")
|
| 244 |
-
st.stop()
|
| 245 |
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
target_etfs = st.session_state.target_etfs
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
st.stop()
|
| 261 |
|
| 262 |
-
if
|
| 263 |
-
|
|
|
|
| 264 |
|
| 265 |
-
|
| 266 |
-
st.divider()
|
| 267 |
|
| 268 |
-
|
|
|
|
| 269 |
|
| 270 |
-
|
| 271 |
-
conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash=False)
|
| 272 |
-
show_conviction_panel(conviction)
|
| 273 |
|
| 274 |
-
|
|
|
|
|
|
|
| 275 |
|
| 276 |
-
|
| 277 |
-
name: {"signal": res["next_signal"],
|
| 278 |
-
"proba": trained_info[name]["proba"][-1],
|
| 279 |
-
"is_winner": name == winner_name}
|
| 280 |
-
for name, res in results.items() if res is not None
|
| 281 |
-
}
|
| 282 |
-
show_all_signals_panel(all_signals, target_etfs, False, next_date, optimal_lookback)
|
| 283 |
|
| 284 |
-
|
| 285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
|
| 297 |
-
|
|
|
|
|
|
|
| 298 |
|
| 299 |
-
st.divider()
|
| 300 |
-
st.subheader("
|
| 301 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
- Ann. Return compared vs SPY in metrics row
|
| 9 |
- Max Daily DD shows date it occurred
|
| 10 |
- Conviction panel: compact ETF probability list
|
| 11 |
+
- [NEW] Multi-Year Sweep tab: runs 8 start years, vote tally + comparison table
|
| 12 |
"""
|
| 13 |
|
| 14 |
import os
|
|
|
|
| 33 |
show_metrics_row, show_comparison_table, show_audit_trail,
|
| 34 |
show_all_signals_panel,
|
| 35 |
)
|
| 36 |
+
from ui.multiyear import run_multiyear_sweep, show_multiyear_results
|
| 37 |
|
| 38 |
st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="π§ ", layout="wide")
|
| 39 |
|
|
|
|
| 45 |
("test_dates", None), ("test_slice", None), ("optimal_lookback", None),
|
| 46 |
("df_for_chart", None), ("tbill_rate", None), ("target_etfs", None),
|
| 47 |
("from_cache", False),
|
| 48 |
+
# Multi-year sweep state
|
| 49 |
+
("multiyear_ready", False), ("multiyear_results", None),
|
| 50 |
]:
|
| 51 |
if key not in st.session_state:
|
| 52 |
st.session_state[key] = default
|
|
|
|
| 72 |
# ββ Title βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 73 |
st.title("π§ P2-ETF-CNN-LSTM")
|
| 74 |
st.caption("Approach 1: Wavelet Β· Approach 2: Regime-Conditioned Β· Approach 3: Multi-Scale Parallel")
|
|
|
|
| 75 |
|
| 76 |
if not HF_TOKEN:
|
| 77 |
st.error("β HF_TOKEN secret not found.")
|
|
|
|
| 86 |
|
| 87 |
freshness = check_data_freshness(df_raw)
|
| 88 |
show_freshness_status(freshness)
|
| 89 |
+
last_date_str = str(freshness.get("last_date_in_data", "unknown"))
|
| 90 |
|
| 91 |
# ββ Dataset info sidebar ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
with st.sidebar:
|
|
|
|
| 116 |
st.stop()
|
| 117 |
|
| 118 |
n_etfs = len(target_etfs)
|
| 119 |
+
n_classes = n_etfs
|
| 120 |
|
| 121 |
st.info(
|
| 122 |
f"π― **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· "
|
|
|
|
| 124 |
f"**T-bill:** {tbill_rate*100:.2f}%"
|
| 125 |
)
|
| 126 |
|
|
|
|
| 127 |
X_raw = df[input_features].values.astype(np.float32)
|
| 128 |
y_raw = np.clip(df[target_etfs].values.astype(np.float32), -0.5, 0.5)
|
| 129 |
|
|
|
|
| 136 |
if mask.any():
|
| 137 |
y_raw[mask, j] = 0.0
|
| 138 |
|
|
|
|
|
|
|
| 139 |
# ββ Auto-select lookback ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 140 |
lb_key = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs),
|
| 141 |
split_option, False, 0)
|
|
|
|
| 187 |
results, trained_info = {}, {}
|
| 188 |
progress = st.progress(0, text="Training Approach 1...")
|
| 189 |
|
| 190 |
+
for approach, train_fn, predict_fn in [
|
| 191 |
("Approach 1",
|
| 192 |
lambda: train_approach1(X_train_s, y_train_l, X_val_s, y_val_l,
|
| 193 |
n_classes=n_classes, epochs=int(epochs)),
|
| 194 |
+
lambda m: predict_approach1(m[0], X_test_s)),
|
|
|
|
| 195 |
("Approach 2",
|
| 196 |
lambda: train_approach2(X_train_s, y_train_l, X_val_s, y_val_l,
|
| 197 |
X_flat_all=X_raw, feature_names=input_features,
|
|
|
|
| 199 |
val_size=val_size, n_classes=n_classes,
|
| 200 |
epochs=int(epochs)),
|
| 201 |
lambda m: predict_approach2(m[0], X_test_s, X_raw, m[3], m[2],
|
| 202 |
+
lookback, train_size, val_size)),
|
|
|
|
| 203 |
("Approach 3",
|
| 204 |
lambda: train_approach3(X_train_s, y_train_l, X_val_s, y_val_l,
|
| 205 |
n_classes=n_classes, epochs=int(epochs)),
|
| 206 |
+
lambda m: predict_approach3(m[0], X_test_s)),
|
|
|
|
| 207 |
]:
|
| 208 |
try:
|
| 209 |
model_out = train_fn()
|
|
|
|
| 227 |
"test_dates": list(test_dates), "test_slice": test_slice,
|
| 228 |
})
|
| 229 |
|
|
|
|
| 230 |
st.session_state.update({
|
| 231 |
"results": results, "trained_info": trained_info,
|
| 232 |
"test_dates": test_dates, "test_slice": test_slice,
|
|
|
|
| 235 |
"output_ready": True,
|
| 236 |
})
|
| 237 |
|
| 238 |
+
# ββ TABS ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 239 |
+
tab_single, tab_sweep = st.tabs(["π Single-Year Results", "π Multi-Year Consensus Sweep"])
|
|
|
|
|
|
|
| 240 |
|
| 241 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 242 |
+
# TAB 1 β existing single-year output (unchanged)
|
| 243 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 244 |
+
with tab_single:
|
| 245 |
+
if not st.session_state.output_ready:
|
| 246 |
+
st.info("π Configure parameters and click **π Run All 3 Approaches**.")
|
| 247 |
+
st.stop()
|
|
|
|
| 248 |
|
| 249 |
+
results = st.session_state.results
|
| 250 |
+
trained_info = st.session_state.trained_info
|
| 251 |
+
test_dates = st.session_state.test_dates
|
| 252 |
+
test_slice = st.session_state.test_slice
|
| 253 |
+
optimal_lookback = st.session_state.optimal_lookback
|
| 254 |
+
df = st.session_state.df_for_chart
|
| 255 |
+
tbill_rate = st.session_state.tbill_rate
|
| 256 |
+
target_etfs = st.session_state.target_etfs
|
| 257 |
|
| 258 |
+
winner_name = select_winner(results)
|
| 259 |
+
winner_res = results.get(winner_name)
|
|
|
|
| 260 |
|
| 261 |
+
if winner_res is None:
|
| 262 |
+
st.error("β All approaches failed.")
|
| 263 |
+
st.stop()
|
| 264 |
|
| 265 |
+
st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
|
|
|
|
| 266 |
|
| 267 |
+
next_date = get_next_signal_date()
|
| 268 |
+
st.divider()
|
| 269 |
|
| 270 |
+
show_signal_banner(winner_res["next_signal"], next_date, winner_name)
|
|
|
|
|
|
|
| 271 |
|
| 272 |
+
winner_proba = trained_info[winner_name]["proba"]
|
| 273 |
+
conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash=False)
|
| 274 |
+
show_conviction_panel(conviction)
|
| 275 |
|
| 276 |
+
st.divider()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
|
| 278 |
+
all_signals = {
|
| 279 |
+
name: {"signal": res["next_signal"],
|
| 280 |
+
"proba": trained_info[name]["proba"][-1],
|
| 281 |
+
"is_winner": name == winner_name}
|
| 282 |
+
for name, res in results.items() if res is not None
|
| 283 |
+
}
|
| 284 |
+
show_all_signals_panel(all_signals, target_etfs, False, next_date, optimal_lookback)
|
| 285 |
|
| 286 |
+
st.divider()
|
| 287 |
+
st.subheader(f"π {winner_name} β Performance Metrics")
|
| 288 |
+
|
| 289 |
+
spy_ann = None
|
| 290 |
+
if "SPY_Ret" in df.columns:
|
| 291 |
+
spy_raw = df["SPY_Ret"].iloc[test_slice].values.copy().astype(float)
|
| 292 |
+
spy_raw = spy_raw[~np.isnan(spy_raw)]
|
| 293 |
+
spy_raw = np.clip(spy_raw, -0.5, 0.5)
|
| 294 |
+
if len(spy_raw) > 5:
|
| 295 |
+
spy_cum = np.prod(1 + spy_raw)
|
| 296 |
+
spy_ann = float(spy_cum ** (252 / len(spy_raw)) - 1)
|
| 297 |
+
|
| 298 |
+
show_metrics_row(winner_res, tbill_rate, spy_ann_return=spy_ann)
|
| 299 |
|
| 300 |
+
st.divider()
|
| 301 |
+
st.subheader("π Approach Comparison (Winner = Highest Raw Annualised Return)")
|
| 302 |
+
show_comparison_table(build_comparison_table(results, winner_name))
|
| 303 |
|
| 304 |
+
st.divider()
|
| 305 |
+
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 306 |
+
show_audit_trail(winner_res["audit_trail"])
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 310 |
+
# TAB 2 β Multi-Year Consensus Sweep
|
| 311 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 312 |
+
with tab_sweep:
|
| 313 |
+
st.subheader("π Multi-Year Consensus Sweep")
|
| 314 |
+
st.markdown(
|
| 315 |
+
"Runs the winner model (Approach 2 proxy) across **8 start years** "
|
| 316 |
+
"and aggregates signals into a consensus vote. "
|
| 317 |
+
"Each year uses the same fee, epochs, and split settings as the sidebar. "
|
| 318 |
+
"Results are cached β only untrained years incur compute."
|
| 319 |
+
)
|
| 320 |
|
| 321 |
+
SWEEP_YEARS = [2010, 2012, 2014, 2016, 2018, 2019, 2021, 2023]
|
| 322 |
+
|
| 323 |
+
col_l, col_r = st.columns([2, 1])
|
| 324 |
+
with col_l:
|
| 325 |
+
st.caption(f"Sweep years: {', '.join(str(y) for y in SWEEP_YEARS)}")
|
| 326 |
+
with col_r:
|
| 327 |
+
sweep_button = st.button("π Run Consensus Sweep", type="primary", use_container_width=True)
|
| 328 |
+
|
| 329 |
+
if sweep_button:
|
| 330 |
+
st.session_state.multiyear_ready = False
|
| 331 |
+
sweep_results = run_multiyear_sweep(
|
| 332 |
+
df_raw = df_raw,
|
| 333 |
+
sweep_years = SWEEP_YEARS,
|
| 334 |
+
fee_bps = fee_bps,
|
| 335 |
+
epochs = int(epochs),
|
| 336 |
+
split_option = split_option,
|
| 337 |
+
last_date_str = last_date_str,
|
| 338 |
+
train_pct = train_pct,
|
| 339 |
+
val_pct = val_pct,
|
| 340 |
+
)
|
| 341 |
+
st.session_state.multiyear_results = sweep_results
|
| 342 |
+
st.session_state.multiyear_ready = True
|
| 343 |
+
|
| 344 |
+
if st.session_state.multiyear_ready and st.session_state.multiyear_results:
|
| 345 |
+
show_multiyear_results(
|
| 346 |
+
st.session_state.multiyear_results,
|
| 347 |
+
sweep_years = SWEEP_YEARS,
|
| 348 |
+
)
|
| 349 |
+
elif not st.session_state.multiyear_ready:
|
| 350 |
+
st.info("Click **π Run Consensus Sweep** to analyse all start years at once.")
|
hf_space/data/loader.py
CHANGED
|
@@ -4,14 +4,12 @@ Loads master_data.parquet from HF Dataset.
|
|
| 4 |
Engineers rich feature set from raw price/macro columns.
|
| 5 |
No external pings β all data from HF Dataset only.
|
| 6 |
"""
|
| 7 |
-
|
| 8 |
import pandas as pd
|
| 9 |
import numpy as np
|
| 10 |
import streamlit as st
|
| 11 |
from huggingface_hub import hf_hub_download
|
| 12 |
from datetime import datetime, timedelta
|
| 13 |
import pytz
|
| 14 |
-
|
| 15 |
try:
|
| 16 |
import pandas_market_calendars as mcal
|
| 17 |
NYSE_CAL_AVAILABLE = True
|
|
@@ -20,14 +18,12 @@ except ImportError:
|
|
| 20 |
|
| 21 |
DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
|
| 22 |
PARQUET_FILE = "master_data.parquet"
|
| 23 |
-
TARGET_ETF_COLS = ["TLT", "
|
| 24 |
BENCHMARK_COLS = ["SPY", "AGG"]
|
| 25 |
TBILL_COL = "TBILL_3M"
|
| 26 |
MACRO_COLS = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"]
|
| 27 |
|
| 28 |
-
|
| 29 |
# ββ NYSE calendar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
-
|
| 31 |
def get_last_nyse_trading_day(as_of=None):
|
| 32 |
est = pytz.timezone("US/Eastern")
|
| 33 |
if as_of is None:
|
|
@@ -46,9 +42,7 @@ def get_last_nyse_trading_day(as_of=None):
|
|
| 46 |
candidate -= timedelta(days=1)
|
| 47 |
return candidate
|
| 48 |
|
| 49 |
-
|
| 50 |
# ββ Data loading ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 51 |
-
|
| 52 |
@st.cache_data(ttl=3600, show_spinner=False)
|
| 53 |
def load_dataset(hf_token: str) -> pd.DataFrame:
|
| 54 |
try:
|
|
@@ -64,15 +58,13 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
|
|
| 64 |
if col in df.columns:
|
| 65 |
df = df.set_index(col)
|
| 66 |
break
|
| 67 |
-
|
| 68 |
return df.sort_index()
|
| 69 |
except Exception as e:
|
| 70 |
st.error(f"β Failed to load dataset: {e}")
|
| 71 |
return pd.DataFrame()
|
| 72 |
|
| 73 |
-
|
| 74 |
# ββ Freshness check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 75 |
-
|
| 76 |
def check_data_freshness(df: pd.DataFrame) -> dict:
|
| 77 |
if df.empty:
|
| 78 |
return {"fresh": False, "message": "Dataset is empty."}
|
|
@@ -80,16 +72,14 @@ def check_data_freshness(df: pd.DataFrame) -> dict:
|
|
| 80 |
expect = get_last_nyse_trading_day()
|
| 81 |
fresh = last >= expect
|
| 82 |
msg = (
|
| 83 |
-
f"β
Dataset up to date through
|
| 84 |
-
f"β οΈ
|
| 85 |
f"Dataset updates daily after market close."
|
| 86 |
)
|
| 87 |
return {"fresh": fresh, "last_date_in_data": last,
|
| 88 |
"expected_date": expect, "message": msg}
|
| 89 |
|
| 90 |
-
|
| 91 |
# ββ Price β returns βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
-
|
| 93 |
def _to_returns(series: pd.Series) -> pd.Series:
|
| 94 |
"""Convert price series to daily pct returns. If already returns, pass through."""
|
| 95 |
clean = series.dropna()
|
|
@@ -99,13 +89,10 @@ def _to_returns(series: pd.Series) -> pd.Series:
|
|
| 99 |
return series.pct_change()
|
| 100 |
return series # already returns
|
| 101 |
|
| 102 |
-
|
| 103 |
# ββ Feature engineering βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 104 |
-
|
| 105 |
def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame:
|
| 106 |
"""
|
| 107 |
Build a rich feature set from raw macro + ETF return columns.
|
| 108 |
-
|
| 109 |
Features added per ETF return:
|
| 110 |
- 1d, 5d, 21d lagged returns
|
| 111 |
- 5d, 21d rolling volatility
|
|
@@ -120,7 +107,7 @@ def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame:
|
|
| 120 |
- TBILL_3M as a feature (rate level)
|
| 121 |
- VIX regime flag (VIX > 25)
|
| 122 |
- Yield curve slope (already T10Y2Y)
|
| 123 |
-
- Cross-asset momentum: spread between TLT_ret and
|
| 124 |
"""
|
| 125 |
feat = pd.DataFrame(index=df.index)
|
| 126 |
|
|
@@ -154,9 +141,9 @@ def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame:
|
|
| 154 |
feat["TBILL_chg5"] = tbill.diff(5)
|
| 155 |
|
| 156 |
# ββ Derived cross-asset signals βββββββββββββββββββββββββββββββββββββββββββ
|
| 157 |
-
if "TLT_Ret" in df.columns and "
|
| 158 |
-
feat["
|
| 159 |
-
df["TLT_Ret"].rolling(5).sum() - df["
|
| 160 |
)
|
| 161 |
|
| 162 |
if "VIX" in df.columns:
|
|
@@ -171,13 +158,10 @@ def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame:
|
|
| 171 |
|
| 172 |
return feat
|
| 173 |
|
| 174 |
-
|
| 175 |
# ββ Main extraction function ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 176 |
-
|
| 177 |
def get_features_and_targets(df: pd.DataFrame):
|
| 178 |
"""
|
| 179 |
Build return columns for target ETFs and engineer a rich feature set.
|
| 180 |
-
|
| 181 |
Returns:
|
| 182 |
input_features : list[str]
|
| 183 |
target_etfs : list[str] e.g. ["TLT_Ret", ...]
|
|
@@ -240,9 +224,7 @@ def get_features_and_targets(df: pd.DataFrame):
|
|
| 240 |
|
| 241 |
return input_features, target_etfs, tbill_rate, df, col_info
|
| 242 |
|
| 243 |
-
|
| 244 |
# ββ Dataset summary βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 245 |
-
|
| 246 |
def dataset_summary(df: pd.DataFrame) -> dict:
|
| 247 |
if df.empty:
|
| 248 |
return {}
|
|
|
|
| 4 |
Engineers rich feature set from raw price/macro columns.
|
| 5 |
No external pings β all data from HF Dataset only.
|
| 6 |
"""
|
|
|
|
| 7 |
import pandas as pd
|
| 8 |
import numpy as np
|
| 9 |
import streamlit as st
|
| 10 |
from huggingface_hub import hf_hub_download
|
| 11 |
from datetime import datetime, timedelta
|
| 12 |
import pytz
|
|
|
|
| 13 |
try:
|
| 14 |
import pandas_market_calendars as mcal
|
| 15 |
NYSE_CAL_AVAILABLE = True
|
|
|
|
| 18 |
|
| 19 |
DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
|
| 20 |
PARQUET_FILE = "master_data.parquet"
|
| 21 |
+
TARGET_ETF_COLS = ["TLT", "VNQ", "SLV", "GLD", "LQD", "HYG", "VCIT"]
|
| 22 |
BENCHMARK_COLS = ["SPY", "AGG"]
|
| 23 |
TBILL_COL = "TBILL_3M"
|
| 24 |
MACRO_COLS = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"]
|
| 25 |
|
|
|
|
| 26 |
# ββ NYSE calendar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 27 |
def get_last_nyse_trading_day(as_of=None):
|
| 28 |
est = pytz.timezone("US/Eastern")
|
| 29 |
if as_of is None:
|
|
|
|
| 42 |
candidate -= timedelta(days=1)
|
| 43 |
return candidate
|
| 44 |
|
|
|
|
| 45 |
# ββ Data loading ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 46 |
@st.cache_data(ttl=3600, show_spinner=False)
|
| 47 |
def load_dataset(hf_token: str) -> pd.DataFrame:
|
| 48 |
try:
|
|
|
|
| 58 |
if col in df.columns:
|
| 59 |
df = df.set_index(col)
|
| 60 |
break
|
| 61 |
+
df.index = pd.to_datetime(df.index)
|
| 62 |
return df.sort_index()
|
| 63 |
except Exception as e:
|
| 64 |
st.error(f"β Failed to load dataset: {e}")
|
| 65 |
return pd.DataFrame()
|
| 66 |
|
|
|
|
| 67 |
# ββ Freshness check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 68 |
def check_data_freshness(df: pd.DataFrame) -> dict:
|
| 69 |
if df.empty:
|
| 70 |
return {"fresh": False, "message": "Dataset is empty."}
|
|
|
|
| 72 |
expect = get_last_nyse_trading_day()
|
| 73 |
fresh = last >= expect
|
| 74 |
msg = (
|
| 75 |
+
f"β
Dataset up to date through {last}." if fresh else
|
| 76 |
+
f"β οΈ {expect} data not yet updated. Latest: {last}. "
|
| 77 |
f"Dataset updates daily after market close."
|
| 78 |
)
|
| 79 |
return {"fresh": fresh, "last_date_in_data": last,
|
| 80 |
"expected_date": expect, "message": msg}
|
| 81 |
|
|
|
|
| 82 |
# ββ Price β returns βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 83 |
def _to_returns(series: pd.Series) -> pd.Series:
|
| 84 |
"""Convert price series to daily pct returns. If already returns, pass through."""
|
| 85 |
clean = series.dropna()
|
|
|
|
| 89 |
return series.pct_change()
|
| 90 |
return series # already returns
|
| 91 |
|
|
|
|
| 92 |
# ββ Feature engineering βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 93 |
def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame:
|
| 94 |
"""
|
| 95 |
Build a rich feature set from raw macro + ETF return columns.
|
|
|
|
| 96 |
Features added per ETF return:
|
| 97 |
- 1d, 5d, 21d lagged returns
|
| 98 |
- 5d, 21d rolling volatility
|
|
|
|
| 107 |
- TBILL_3M as a feature (rate level)
|
| 108 |
- VIX regime flag (VIX > 25)
|
| 109 |
- Yield curve slope (already T10Y2Y)
|
| 110 |
+
- Cross-asset momentum: spread between TLT_ret and AGG_ret
|
| 111 |
"""
|
| 112 |
feat = pd.DataFrame(index=df.index)
|
| 113 |
|
|
|
|
| 141 |
feat["TBILL_chg5"] = tbill.diff(5)
|
| 142 |
|
| 143 |
# ββ Derived cross-asset signals βββββββββββββββββββββββββββββββββββββββββββ
|
| 144 |
+
if "TLT_Ret" in df.columns and "AGG_Ret" in df.columns:
|
| 145 |
+
feat["TLT_AGG_spread_mom5"] = (
|
| 146 |
+
df["TLT_Ret"].rolling(5).sum() - df["AGG_Ret"].rolling(5).sum()
|
| 147 |
)
|
| 148 |
|
| 149 |
if "VIX" in df.columns:
|
|
|
|
| 158 |
|
| 159 |
return feat
|
| 160 |
|
|
|
|
| 161 |
# ββ Main extraction function ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 162 |
def get_features_and_targets(df: pd.DataFrame):
|
| 163 |
"""
|
| 164 |
Build return columns for target ETFs and engineer a rich feature set.
|
|
|
|
| 165 |
Returns:
|
| 166 |
input_features : list[str]
|
| 167 |
target_etfs : list[str] e.g. ["TLT_Ret", ...]
|
|
|
|
| 224 |
|
| 225 |
return input_features, target_etfs, tbill_rate, df, col_info
|
| 226 |
|
|
|
|
| 227 |
# ββ Dataset summary βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 228 |
def dataset_summary(df: pd.DataFrame) -> dict:
|
| 229 |
if df.empty:
|
| 230 |
return {}
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
CHANGED
|
@@ -32,7 +32,6 @@ from ui.components import (
|
|
| 32 |
show_metrics_row, show_comparison_table, show_audit_trail,
|
| 33 |
show_all_signals_panel,
|
| 34 |
)
|
| 35 |
-
from ui.charts import equity_curve_chart
|
| 36 |
|
| 37 |
st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="π§ ", layout="wide")
|
| 38 |
|
|
@@ -169,7 +168,7 @@ if run_button:
|
|
| 169 |
st.success("β‘ Results loaded from cache β no retraining needed.")
|
| 170 |
else:
|
| 171 |
X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
|
| 172 |
-
y_labels = returns_to_labels(y_seq
|
| 173 |
|
| 174 |
(X_train, y_train_r, X_val, y_val_r,
|
| 175 |
X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
|
|
@@ -285,18 +284,22 @@ show_all_signals_panel(all_signals, target_etfs, False, next_date, optimal_lookb
|
|
| 285 |
st.divider()
|
| 286 |
st.subheader(f"π {winner_name} β Performance Metrics")
|
| 287 |
|
| 288 |
-
#
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
show_metrics_row(winner_res, tbill_rate, spy_ann_return=spy_ann)
|
| 291 |
|
| 292 |
st.divider()
|
| 293 |
st.subheader("π Approach Comparison (Winner = Highest Raw Annualised Return)")
|
| 294 |
show_comparison_table(build_comparison_table(results, winner_name))
|
| 295 |
|
| 296 |
-
st.divider()
|
| 297 |
-
st.subheader(f"π {winner_name} vs SPY & AGG β Out-of-Sample")
|
| 298 |
-
st.plotly_chart(fig, use_container_width=True)
|
| 299 |
-
|
| 300 |
st.divider()
|
| 301 |
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 302 |
show_audit_trail(winner_res["audit_trail"])
|
|
|
|
| 32 |
show_metrics_row, show_comparison_table, show_audit_trail,
|
| 33 |
show_all_signals_panel,
|
| 34 |
)
|
|
|
|
| 35 |
|
| 36 |
st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="π§ ", layout="wide")
|
| 37 |
|
|
|
|
| 168 |
st.success("β‘ Results loaded from cache β no retraining needed.")
|
| 169 |
else:
|
| 170 |
X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
|
| 171 |
+
y_labels = returns_to_labels(y_seq)
|
| 172 |
|
| 173 |
(X_train, y_train_r, X_val, y_val_r,
|
| 174 |
X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
|
|
|
|
| 284 |
st.divider()
|
| 285 |
st.subheader(f"π {winner_name} β Performance Metrics")
|
| 286 |
|
| 287 |
+
# Compute SPY annualised return directly from raw returns for metrics comparison
|
| 288 |
+
spy_ann = None
|
| 289 |
+
if "SPY_Ret" in df.columns:
|
| 290 |
+
spy_raw = df["SPY_Ret"].iloc[test_slice].values.copy().astype(float)
|
| 291 |
+
spy_raw = spy_raw[~np.isnan(spy_raw)]
|
| 292 |
+
spy_raw = np.clip(spy_raw, -0.5, 0.5)
|
| 293 |
+
if len(spy_raw) > 5:
|
| 294 |
+
spy_cum = np.prod(1 + spy_raw)
|
| 295 |
+
spy_ann = float(spy_cum ** (252 / len(spy_raw)) - 1)
|
| 296 |
+
|
| 297 |
show_metrics_row(winner_res, tbill_rate, spy_ann_return=spy_ann)
|
| 298 |
|
| 299 |
st.divider()
|
| 300 |
st.subheader("π Approach Comparison (Winner = Highest Raw Annualised Return)")
|
| 301 |
show_comparison_table(build_comparison_table(results, winner_name))
|
| 302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
st.divider()
|
| 304 |
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 305 |
show_audit_trail(winner_res["audit_trail"])
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
CHANGED
|
@@ -148,8 +148,7 @@ if run_button:
|
|
| 148 |
with st.spinner("π Auto-selecting optimal lookback (30 / 45 / 60d)..."):
|
| 149 |
optimal_lookback = find_best_lookback(
|
| 150 |
X_raw, y_raw,
|
| 151 |
-
|
| 152 |
-
train_pct, val_pct, n_classes, False,
|
| 153 |
candidates=[30, 45, 60],
|
| 154 |
)
|
| 155 |
save_cache(f"lb_{lb_key}", {"optimal_lookback": optimal_lookback})
|
|
|
|
| 148 |
with st.spinner("π Auto-selecting optimal lookback (30 / 45 / 60d)..."):
|
| 149 |
optimal_lookback = find_best_lookback(
|
| 150 |
X_raw, y_raw,
|
| 151 |
+
train_pct, val_pct, n_classes,
|
|
|
|
| 152 |
candidates=[30, 45, 60],
|
| 153 |
)
|
| 154 |
save_cache(f"lb_{lb_key}", {"optimal_lookback": optimal_lookback})
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
CHANGED
|
@@ -1,7 +1,13 @@
|
|
| 1 |
"""
|
| 2 |
app.py
|
| 3 |
P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import os
|
|
@@ -32,11 +38,20 @@ st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="π§ ", layout="wide"
|
|
| 32 |
|
| 33 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# ββ Sidebar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
with st.sidebar:
|
| 37 |
st.header("βοΈ Configuration")
|
| 38 |
-
|
| 39 |
-
st.write(f"π **EST:** {now_est.strftime('%H:%M:%S')}")
|
| 40 |
st.divider()
|
| 41 |
|
| 42 |
start_yr = st.slider("π
Start Year", 2010, 2024, 2016)
|
|
@@ -47,9 +62,7 @@ with st.sidebar:
|
|
| 47 |
split_option = st.selectbox("π Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
|
| 48 |
train_pct, val_pct = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}[split_option]
|
| 49 |
|
| 50 |
-
|
| 51 |
-
help="Model can select CASH (earns T-bill rate) instead of any ETF")
|
| 52 |
-
|
| 53 |
st.divider()
|
| 54 |
run_button = st.button("π Run All 3 Approaches", type="primary", use_container_width=True)
|
| 55 |
|
|
@@ -85,226 +98,206 @@ with st.sidebar:
|
|
| 85 |
st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
|
| 86 |
st.write(f"**T-bill col:** {'β
' if summary['tbill_found'] else 'β'}")
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
st.
|
| 91 |
-
|
| 92 |
-
# ββ Filter by start year ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 93 |
-
df = df_raw[df_raw.index.year >= start_yr].copy()
|
| 94 |
-
st.write(f"π
**Data:** {df.index[0].strftime('%Y-%m-%d')} β {df.index[-1].strftime('%Y-%m-%d')} "
|
| 95 |
-
f"({df.index[-1].year - df.index[0].year + 1} years)")
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
except ValueError as e:
|
| 101 |
-
st.error(str(e))
|
| 102 |
-
st.stop()
|
| 103 |
|
| 104 |
-
n_etfs = len(target_etfs)
|
| 105 |
-
n_classes = n_etfs + (1 if include_cash else 0)
|
| 106 |
-
|
| 107 |
-
st.info(
|
| 108 |
-
f"π― **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· "
|
| 109 |
-
f"**Features:** {len(input_features)} signals Β· "
|
| 110 |
-
f"**T-bill:** {tbill_rate*100:.2f}%"
|
| 111 |
-
)
|
| 112 |
-
|
| 113 |
-
# ββ Prepare raw arrays ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 114 |
-
X_raw = df[input_features].values.astype(np.float32)
|
| 115 |
-
y_raw = df[target_etfs].values.astype(np.float32)
|
| 116 |
-
|
| 117 |
-
for j in range(X_raw.shape[1]):
|
| 118 |
-
mask = np.isnan(X_raw[:, j])
|
| 119 |
-
if mask.any():
|
| 120 |
-
X_raw[mask, j] = np.nanmean(X_raw[:, j])
|
| 121 |
-
for j in range(y_raw.shape[1]):
|
| 122 |
-
mask = np.isnan(y_raw[:, j])
|
| 123 |
-
if mask.any():
|
| 124 |
-
y_raw[mask, j] = np.nanmean(y_raw[:, j])
|
| 125 |
-
|
| 126 |
-
# ββ Auto-select optimal lookback ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 127 |
-
last_date_str = str(freshness.get("last_date_in_data", "unknown"))
|
| 128 |
-
|
| 129 |
-
# Check cache for lookback selection too
|
| 130 |
-
lb_cache_key = make_cache_key(
|
| 131 |
-
last_date_str, start_yr, fee_bps, int(epochs), split_option, include_cash, 0
|
| 132 |
-
)
|
| 133 |
-
lb_cached = load_cache(f"lb_{lb_cache_key}")
|
| 134 |
-
|
| 135 |
-
if lb_cached is not None:
|
| 136 |
-
optimal_lookback = lb_cached["optimal_lookback"]
|
| 137 |
-
st.success(f"β‘ Loaded from cache Β· Optimal lookback: **{optimal_lookback}d**")
|
| 138 |
-
else:
|
| 139 |
-
with st.spinner("π Finding optimal lookback (30 / 45 / 60d)..."):
|
| 140 |
-
def _y_labels_fn(y_seq):
|
| 141 |
-
return returns_to_labels(y_seq, include_cash=include_cash)
|
| 142 |
-
optimal_lookback = find_best_lookback(
|
| 143 |
-
X_raw, y_raw, _y_labels_fn,
|
| 144 |
-
train_pct, val_pct, n_classes, include_cash,
|
| 145 |
-
candidates=[30, 45, 60],
|
| 146 |
-
)
|
| 147 |
-
save_cache(f"lb_{lb_cache_key}", {"optimal_lookback": optimal_lookback})
|
| 148 |
-
st.success(f"π Optimal lookback: **{optimal_lookback}d** (auto-selected from 30/45/60)")
|
| 149 |
-
|
| 150 |
-
lookback = optimal_lookback
|
| 151 |
-
|
| 152 |
-
# ββ Check full model cache ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 153 |
-
cache_key = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs),
|
| 154 |
-
split_option, include_cash, lookback)
|
| 155 |
-
cached_data = load_cache(cache_key)
|
| 156 |
-
from_cache = cached_data is not None
|
| 157 |
-
|
| 158 |
-
if from_cache:
|
| 159 |
-
results = cached_data["results"]
|
| 160 |
-
trained_info = cached_data["trained_info"]
|
| 161 |
-
test_dates = pd.DatetimeIndex(cached_data["test_dates"])
|
| 162 |
-
test_slice = cached_data["test_slice"]
|
| 163 |
-
st.success("β‘ Results loaded from cache β no retraining needed.")
|
| 164 |
-
else:
|
| 165 |
-
# ββ Build sequences βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 166 |
-
X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
|
| 167 |
-
y_labels = returns_to_labels(y_seq, include_cash=include_cash)
|
| 168 |
-
|
| 169 |
-
(X_train, y_train_r, X_val, y_val_r,
|
| 170 |
-
X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
|
| 171 |
-
(_, y_train_l, _, y_val_l,
|
| 172 |
-
_, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
|
| 173 |
-
|
| 174 |
-
X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
|
| 175 |
-
|
| 176 |
-
train_size = len(X_train)
|
| 177 |
-
val_size = len(X_val)
|
| 178 |
-
test_start = lookback + train_size + val_size
|
| 179 |
-
test_dates = df.index[test_start: test_start + len(X_test)]
|
| 180 |
-
test_slice = slice(test_start, test_start + len(X_test))
|
| 181 |
-
|
| 182 |
-
results = {}
|
| 183 |
-
trained_info = {}
|
| 184 |
-
progress = st.progress(0, text="Training Approach 1...")
|
| 185 |
-
|
| 186 |
-
# ββ Approach 1 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 187 |
-
try:
|
| 188 |
-
model1, _, _ = train_approach1(
|
| 189 |
-
X_train_s, y_train_l, X_val_s, y_val_l,
|
| 190 |
-
n_classes=n_classes, epochs=int(epochs),
|
| 191 |
-
)
|
| 192 |
-
preds1, proba1 = predict_approach1(model1, X_test_s)
|
| 193 |
-
results["Approach 1"] = execute_strategy(
|
| 194 |
-
preds1, proba1, y_test_r, test_dates,
|
| 195 |
-
target_etfs, fee_bps, tbill_rate, include_cash,
|
| 196 |
-
)
|
| 197 |
-
trained_info["Approach 1"] = {"proba": proba1}
|
| 198 |
-
except Exception as e:
|
| 199 |
-
st.warning(f"β οΈ Approach 1 failed: {e}")
|
| 200 |
-
results["Approach 1"] = None
|
| 201 |
-
|
| 202 |
-
progress.progress(33, text="Training Approach 2...")
|
| 203 |
-
|
| 204 |
-
# ββ Approach 2 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 205 |
try:
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
)
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
})
|
| 253 |
|
| 254 |
-
# ββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
winner_name = select_winner(results)
|
| 256 |
winner_res = results.get(winner_name)
|
| 257 |
|
| 258 |
if winner_res is None:
|
| 259 |
-
st.error("β All approaches failed.
|
| 260 |
st.stop()
|
| 261 |
|
|
|
|
|
|
|
|
|
|
| 262 |
next_date = get_next_signal_date()
|
| 263 |
st.divider()
|
| 264 |
|
| 265 |
-
# ββ Winner signal banner ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 266 |
show_signal_banner(winner_res["next_signal"], next_date, winner_name)
|
| 267 |
|
| 268 |
-
# ββ Conviction panel ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 269 |
winner_proba = trained_info[winner_name]["proba"]
|
| 270 |
-
conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
|
| 271 |
show_conviction_panel(conviction)
|
| 272 |
|
| 273 |
st.divider()
|
| 274 |
|
| 275 |
-
# ββ All models next day signals βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 276 |
all_signals = {
|
| 277 |
-
name: {
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
"is_winner": name == winner_name,
|
| 281 |
-
}
|
| 282 |
for name, res in results.items() if res is not None
|
| 283 |
}
|
| 284 |
-
show_all_signals_panel(all_signals, target_etfs,
|
| 285 |
|
| 286 |
st.divider()
|
| 287 |
-
|
| 288 |
-
# ββ Winner performance metrics ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 289 |
st.subheader(f"π {winner_name} β Performance Metrics")
|
| 290 |
-
show_metrics_row(winner_res, tbill_rate)
|
| 291 |
|
| 292 |
-
|
|
|
|
|
|
|
| 293 |
|
| 294 |
-
|
| 295 |
st.subheader("π Approach Comparison (Winner = Highest Raw Annualised Return)")
|
| 296 |
-
|
| 297 |
-
show_comparison_table(comparison_df)
|
| 298 |
|
| 299 |
st.divider()
|
| 300 |
-
|
| 301 |
-
# ββ Equity curve ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 302 |
st.subheader(f"π {winner_name} vs SPY & AGG β Out-of-Sample")
|
| 303 |
-
fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
|
| 304 |
st.plotly_chart(fig, use_container_width=True)
|
| 305 |
|
| 306 |
st.divider()
|
| 307 |
-
|
| 308 |
-
# ββ Audit trail βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 309 |
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 310 |
show_audit_trail(winner_res["audit_trail"])
|
|
|
|
| 1 |
"""
|
| 2 |
app.py
|
| 3 |
P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
|
| 4 |
+
- Session state persistence (results don't vanish on rerun)
|
| 5 |
+
- Model caching keyed by data date + config params
|
| 6 |
+
- Auto-lookback (30/45/60d)
|
| 7 |
+
- CASH is a drawdown risk overlay (not a model class)
|
| 8 |
+
- Ann. Return compared vs SPY in metrics row
|
| 9 |
+
- Max Daily DD shows date it occurred
|
| 10 |
+
- Conviction panel: compact ETF probability list
|
| 11 |
"""
|
| 12 |
|
| 13 |
import os
|
|
|
|
| 38 |
|
| 39 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 40 |
|
| 41 |
+
# ββ Session state init ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 42 |
+
for key, default in [
|
| 43 |
+
("output_ready", False), ("results", None), ("trained_info", None),
|
| 44 |
+
("test_dates", None), ("test_slice", None), ("optimal_lookback", None),
|
| 45 |
+
("df_for_chart", None), ("tbill_rate", None), ("target_etfs", None),
|
| 46 |
+
("from_cache", False),
|
| 47 |
+
]:
|
| 48 |
+
if key not in st.session_state:
|
| 49 |
+
st.session_state[key] = default
|
| 50 |
+
|
| 51 |
# ββ Sidebar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 52 |
with st.sidebar:
|
| 53 |
st.header("βοΈ Configuration")
|
| 54 |
+
st.write(f"π **EST:** {get_est_time().strftime('%H:%M:%S')}")
|
|
|
|
| 55 |
st.divider()
|
| 56 |
|
| 57 |
start_yr = st.slider("π
Start Year", 2010, 2024, 2016)
|
|
|
|
| 62 |
split_option = st.selectbox("π Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
|
| 63 |
train_pct, val_pct = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}[split_option]
|
| 64 |
|
| 65 |
+
st.caption("π‘ CASH triggered automatically on 2-day drawdown β€ β15%")
|
|
|
|
|
|
|
| 66 |
st.divider()
|
| 67 |
run_button = st.button("π Run All 3 Approaches", type="primary", use_container_width=True)
|
| 68 |
|
|
|
|
| 98 |
st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
|
| 99 |
st.write(f"**T-bill col:** {'β
' if summary['tbill_found'] else 'β'}")
|
| 100 |
|
| 101 |
+
# ββ Run button ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 102 |
+
if run_button:
|
| 103 |
+
st.session_state.output_ready = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
+
df = df_raw[df_raw.index.year >= start_yr].copy()
|
| 106 |
+
st.write(f"π
**Data:** {df.index[0].strftime('%Y-%m-%d')} β {df.index[-1].strftime('%Y-%m-%d')} "
|
| 107 |
+
f"({df.index[-1].year - df.index[0].year + 1} years)")
|
|
|
|
|
|
|
|
|
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
try:
|
| 110 |
+
input_features, target_etfs, tbill_rate, df, _ = get_features_and_targets(df)
|
| 111 |
+
except ValueError as e:
|
| 112 |
+
st.error(str(e))
|
| 113 |
+
st.stop()
|
| 114 |
+
|
| 115 |
+
n_etfs = len(target_etfs)
|
| 116 |
+
n_classes = n_etfs # CASH is overlay only β model always picks from ETFs
|
| 117 |
+
|
| 118 |
+
st.info(
|
| 119 |
+
f"π― **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· "
|
| 120 |
+
f"**Features:** {len(input_features)} signals Β· "
|
| 121 |
+
f"**T-bill:** {tbill_rate*100:.2f}%"
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
# ββ Raw arrays ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 125 |
+
X_raw = df[input_features].values.astype(np.float32)
|
| 126 |
+
y_raw = np.clip(df[target_etfs].values.astype(np.float32), -0.5, 0.5)
|
| 127 |
+
|
| 128 |
+
for j in range(X_raw.shape[1]):
|
| 129 |
+
mask = np.isnan(X_raw[:, j])
|
| 130 |
+
if mask.any():
|
| 131 |
+
X_raw[mask, j] = np.nanmean(X_raw[:, j])
|
| 132 |
+
for j in range(y_raw.shape[1]):
|
| 133 |
+
mask = np.isnan(y_raw[:, j])
|
| 134 |
+
if mask.any():
|
| 135 |
+
y_raw[mask, j] = 0.0
|
| 136 |
+
|
| 137 |
+
last_date_str = str(freshness.get("last_date_in_data", "unknown"))
|
| 138 |
+
|
| 139 |
+
# ββ Auto-select lookback ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 140 |
+
lb_key = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs),
|
| 141 |
+
split_option, False, 0)
|
| 142 |
+
lb_cached = load_cache(f"lb_{lb_key}")
|
| 143 |
+
|
| 144 |
+
if lb_cached is not None:
|
| 145 |
+
optimal_lookback = lb_cached["optimal_lookback"]
|
| 146 |
+
st.success(f"β‘ Cache hit Β· Optimal lookback: **{optimal_lookback}d**")
|
| 147 |
+
else:
|
| 148 |
+
with st.spinner("π Auto-selecting optimal lookback (30 / 45 / 60d)..."):
|
| 149 |
+
optimal_lookback = find_best_lookback(
|
| 150 |
+
X_raw, y_raw,
|
| 151 |
+
lambda y: returns_to_labels(y, include_cash=False),
|
| 152 |
+
train_pct, val_pct, n_classes, False,
|
| 153 |
+
candidates=[30, 45, 60],
|
| 154 |
+
)
|
| 155 |
+
save_cache(f"lb_{lb_key}", {"optimal_lookback": optimal_lookback})
|
| 156 |
+
st.success(f"π Optimal lookback: **{optimal_lookback}d** (auto-selected from 30/45/60)")
|
| 157 |
+
|
| 158 |
+
lookback = optimal_lookback
|
| 159 |
+
|
| 160 |
+
# ββ Check model cache βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 161 |
+
cache_key = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs),
|
| 162 |
+
split_option, False, lookback)
|
| 163 |
+
cached_data = load_cache(cache_key)
|
| 164 |
+
|
| 165 |
+
if cached_data is not None:
|
| 166 |
+
results = cached_data["results"]
|
| 167 |
+
trained_info = cached_data["trained_info"]
|
| 168 |
+
test_dates = pd.DatetimeIndex(cached_data["test_dates"])
|
| 169 |
+
test_slice = cached_data["test_slice"]
|
| 170 |
+
st.success("β‘ Results loaded from cache β no retraining needed.")
|
| 171 |
+
else:
|
| 172 |
+
X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
|
| 173 |
+
y_labels = returns_to_labels(y_seq, include_cash=False)
|
| 174 |
+
|
| 175 |
+
(X_train, y_train_r, X_val, y_val_r,
|
| 176 |
+
X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
|
| 177 |
+
(_, y_train_l, _, y_val_l,
|
| 178 |
+
_, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
|
| 179 |
+
|
| 180 |
+
X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
|
| 181 |
+
|
| 182 |
+
train_size = len(X_train)
|
| 183 |
+
val_size = len(X_val)
|
| 184 |
+
test_start = lookback + train_size + val_size
|
| 185 |
+
test_dates = df.index[test_start: test_start + len(X_test)]
|
| 186 |
+
test_slice = slice(test_start, test_start + len(X_test))
|
| 187 |
+
|
| 188 |
+
results, trained_info = {}, {}
|
| 189 |
+
progress = st.progress(0, text="Training Approach 1...")
|
| 190 |
+
|
| 191 |
+
for approach, train_fn, predict_fn, train_kwargs in [
|
| 192 |
+
("Approach 1",
|
| 193 |
+
lambda: train_approach1(X_train_s, y_train_l, X_val_s, y_val_l,
|
| 194 |
+
n_classes=n_classes, epochs=int(epochs)),
|
| 195 |
+
lambda m: predict_approach1(m[0], X_test_s),
|
| 196 |
+
None),
|
| 197 |
+
("Approach 2",
|
| 198 |
+
lambda: train_approach2(X_train_s, y_train_l, X_val_s, y_val_l,
|
| 199 |
+
X_flat_all=X_raw, feature_names=input_features,
|
| 200 |
+
lookback=lookback, train_size=train_size,
|
| 201 |
+
val_size=val_size, n_classes=n_classes,
|
| 202 |
+
epochs=int(epochs)),
|
| 203 |
+
lambda m: predict_approach2(m[0], X_test_s, X_raw, m[3], m[2],
|
| 204 |
+
lookback, train_size, val_size),
|
| 205 |
+
None),
|
| 206 |
+
("Approach 3",
|
| 207 |
+
lambda: train_approach3(X_train_s, y_train_l, X_val_s, y_val_l,
|
| 208 |
+
n_classes=n_classes, epochs=int(epochs)),
|
| 209 |
+
lambda m: predict_approach3(m[0], X_test_s),
|
| 210 |
+
None),
|
| 211 |
+
]:
|
| 212 |
+
try:
|
| 213 |
+
model_out = train_fn()
|
| 214 |
+
preds, proba = predict_fn(model_out)
|
| 215 |
+
results[approach] = execute_strategy(
|
| 216 |
+
preds, proba, y_test_r, test_dates,
|
| 217 |
+
target_etfs, fee_bps, tbill_rate,
|
| 218 |
+
)
|
| 219 |
+
trained_info[approach] = {"proba": proba}
|
| 220 |
+
except Exception as e:
|
| 221 |
+
st.warning(f"β οΈ {approach} failed: {e}")
|
| 222 |
+
results[approach] = None
|
| 223 |
+
|
| 224 |
+
pct = {"Approach 1": 33, "Approach 2": 66, "Approach 3": 100}[approach]
|
| 225 |
+
progress.progress(pct, text=f"{approach} done...")
|
| 226 |
+
|
| 227 |
+
progress.empty()
|
| 228 |
+
|
| 229 |
+
save_cache(cache_key, {
|
| 230 |
+
"results": results, "trained_info": trained_info,
|
| 231 |
+
"test_dates": list(test_dates), "test_slice": test_slice,
|
| 232 |
+
})
|
| 233 |
+
|
| 234 |
+
# ββ Persist to session state ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 235 |
+
st.session_state.update({
|
| 236 |
+
"results": results, "trained_info": trained_info,
|
| 237 |
+
"test_dates": test_dates, "test_slice": test_slice,
|
| 238 |
+
"optimal_lookback": optimal_lookback, "df_for_chart": df,
|
| 239 |
+
"tbill_rate": tbill_rate, "target_etfs": target_etfs,
|
| 240 |
+
"output_ready": True,
|
| 241 |
})
|
| 242 |
|
| 243 |
+
# ββ Render (persists across reruns via session_state) βββββββββββββββββββββββββ
|
| 244 |
+
if not st.session_state.output_ready:
|
| 245 |
+
st.info("π Configure parameters and click **π Run All 3 Approaches**.")
|
| 246 |
+
st.stop()
|
| 247 |
+
|
| 248 |
+
results = st.session_state.results
|
| 249 |
+
trained_info = st.session_state.trained_info
|
| 250 |
+
test_dates = st.session_state.test_dates
|
| 251 |
+
test_slice = st.session_state.test_slice
|
| 252 |
+
optimal_lookback = st.session_state.optimal_lookback
|
| 253 |
+
df = st.session_state.df_for_chart
|
| 254 |
+
tbill_rate = st.session_state.tbill_rate
|
| 255 |
+
target_etfs = st.session_state.target_etfs
|
| 256 |
+
|
| 257 |
winner_name = select_winner(results)
|
| 258 |
winner_res = results.get(winner_name)
|
| 259 |
|
| 260 |
if winner_res is None:
|
| 261 |
+
st.error("β All approaches failed.")
|
| 262 |
st.stop()
|
| 263 |
|
| 264 |
+
if st.session_state.from_cache:
|
| 265 |
+
st.success("β‘ Showing cached results.")
|
| 266 |
+
|
| 267 |
next_date = get_next_signal_date()
|
| 268 |
st.divider()
|
| 269 |
|
|
|
|
| 270 |
show_signal_banner(winner_res["next_signal"], next_date, winner_name)
|
| 271 |
|
|
|
|
| 272 |
winner_proba = trained_info[winner_name]["proba"]
|
| 273 |
+
conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash=False)
|
| 274 |
show_conviction_panel(conviction)
|
| 275 |
|
| 276 |
st.divider()
|
| 277 |
|
|
|
|
| 278 |
all_signals = {
|
| 279 |
+
name: {"signal": res["next_signal"],
|
| 280 |
+
"proba": trained_info[name]["proba"][-1],
|
| 281 |
+
"is_winner": name == winner_name}
|
|
|
|
|
|
|
| 282 |
for name, res in results.items() if res is not None
|
| 283 |
}
|
| 284 |
+
show_all_signals_panel(all_signals, target_etfs, False, next_date, optimal_lookback)
|
| 285 |
|
| 286 |
st.divider()
|
|
|
|
|
|
|
| 287 |
st.subheader(f"π {winner_name} β Performance Metrics")
|
|
|
|
| 288 |
|
| 289 |
+
# Build equity curve first to get spy_ann for metrics comparison
|
| 290 |
+
fig, spy_ann = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
|
| 291 |
+
show_metrics_row(winner_res, tbill_rate, spy_ann_return=spy_ann)
|
| 292 |
|
| 293 |
+
st.divider()
|
| 294 |
st.subheader("π Approach Comparison (Winner = Highest Raw Annualised Return)")
|
| 295 |
+
show_comparison_table(build_comparison_table(results, winner_name))
|
|
|
|
| 296 |
|
| 297 |
st.divider()
|
|
|
|
|
|
|
| 298 |
st.subheader(f"π {winner_name} vs SPY & AGG β Out-of-Sample")
|
|
|
|
| 299 |
st.plotly_chart(fig, use_container_width=True)
|
| 300 |
|
| 301 |
st.divider()
|
|
|
|
|
|
|
| 302 |
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 303 |
show_audit_trail(winner_res["audit_trail"])
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
CHANGED
|
@@ -13,7 +13,9 @@ from data.loader import (load_dataset, check_data_freshness,
|
|
| 13 |
get_features_and_targets, dataset_summary)
|
| 14 |
from utils.calendar import get_est_time, get_next_signal_date
|
| 15 |
from models.base import (build_sequences, train_val_test_split,
|
| 16 |
-
scale_features, returns_to_labels
|
|
|
|
|
|
|
| 17 |
from models.approach1_wavelet import train_approach1, predict_approach1
|
| 18 |
from models.approach2_regime import train_approach2, predict_approach2
|
| 19 |
from models.approach3_multiscale import train_approach3, predict_approach3
|
|
@@ -39,8 +41,7 @@ with st.sidebar:
|
|
| 39 |
|
| 40 |
start_yr = st.slider("π
Start Year", 2010, 2024, 2016)
|
| 41 |
fee_bps = st.slider("π° Fee (bps)", 0, 50, 10)
|
| 42 |
-
|
| 43 |
-
epochs = st.number_input("π Max Epochs", 20, 300, 100, step=10)
|
| 44 |
|
| 45 |
st.divider()
|
| 46 |
split_option = st.selectbox("π Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
|
|
@@ -109,7 +110,7 @@ st.info(
|
|
| 109 |
f"**T-bill:** {tbill_rate*100:.2f}%"
|
| 110 |
)
|
| 111 |
|
| 112 |
-
# ββ
|
| 113 |
X_raw = df[input_features].values.astype(np.float32)
|
| 114 |
y_raw = df[target_etfs].values.astype(np.float32)
|
| 115 |
|
|
@@ -117,39 +118,74 @@ for j in range(X_raw.shape[1]):
|
|
| 117 |
mask = np.isnan(X_raw[:, j])
|
| 118 |
if mask.any():
|
| 119 |
X_raw[mask, j] = np.nanmean(X_raw[:, j])
|
| 120 |
-
|
| 121 |
for j in range(y_raw.shape[1]):
|
| 122 |
mask = np.isnan(y_raw[:, j])
|
| 123 |
if mask.any():
|
| 124 |
y_raw[mask, j] = np.nanmean(y_raw[:, j])
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
(X_train, y_train_r, X_val, y_val_r,
|
| 130 |
-
X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
|
| 131 |
-
(_, y_train_l, _, y_val_l,
|
| 132 |
-
_, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
|
| 133 |
-
|
| 134 |
-
X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
|
| 135 |
-
|
| 136 |
-
train_size = len(X_train)
|
| 137 |
-
val_size = len(X_val)
|
| 138 |
-
test_start = lookback + train_size + val_size
|
| 139 |
-
test_dates = df.index[test_start: test_start + len(X_test)]
|
| 140 |
-
test_slice = slice(test_start, test_start + len(X_test))
|
| 141 |
-
|
| 142 |
-
st.success(f"β
Sequences β Train: {train_size:,} Β· Val: {val_size:,} Β· Test: {len(X_test):,}")
|
| 143 |
|
| 144 |
-
#
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
try:
|
| 152 |
-
model1,
|
| 153 |
X_train_s, y_train_l, X_val_s, y_val_l,
|
| 154 |
n_classes=n_classes, epochs=int(epochs),
|
| 155 |
)
|
|
@@ -159,17 +195,15 @@ with st.spinner("π Training Approach 1 β Wavelet CNN-LSTM..."):
|
|
| 159 |
target_etfs, fee_bps, tbill_rate, include_cash,
|
| 160 |
)
|
| 161 |
trained_info["Approach 1"] = {"proba": proba1}
|
| 162 |
-
st.success("β
Approach 1 complete")
|
| 163 |
except Exception as e:
|
| 164 |
st.warning(f"β οΈ Approach 1 failed: {e}")
|
| 165 |
results["Approach 1"] = None
|
| 166 |
|
| 167 |
-
progress.progress(33, text="Approach
|
| 168 |
|
| 169 |
-
# Approach 2
|
| 170 |
-
with st.spinner("π Training Approach 2 β Regime-Conditioned CNN-LSTM..."):
|
| 171 |
try:
|
| 172 |
-
model2,
|
| 173 |
X_train_s, y_train_l, X_val_s, y_val_l,
|
| 174 |
X_flat_all=X_raw, feature_names=input_features,
|
| 175 |
lookback=lookback, train_size=train_size, val_size=val_size,
|
|
@@ -184,17 +218,15 @@ with st.spinner("π Training Approach 2 β Regime-Conditioned CNN-LSTM..."):
|
|
| 184 |
target_etfs, fee_bps, tbill_rate, include_cash,
|
| 185 |
)
|
| 186 |
trained_info["Approach 2"] = {"proba": proba2}
|
| 187 |
-
st.success("β
Approach 2 complete")
|
| 188 |
except Exception as e:
|
| 189 |
st.warning(f"β οΈ Approach 2 failed: {e}")
|
| 190 |
results["Approach 2"] = None
|
| 191 |
|
| 192 |
-
progress.progress(66, text="Approach
|
| 193 |
|
| 194 |
-
# Approach 3
|
| 195 |
-
with st.spinner("π‘ Training Approach 3 β Multi-Scale CNN-LSTM..."):
|
| 196 |
try:
|
| 197 |
-
model3,
|
| 198 |
X_train_s, y_train_l, X_val_s, y_val_l,
|
| 199 |
n_classes=n_classes, epochs=int(epochs),
|
| 200 |
)
|
|
@@ -204,13 +236,20 @@ with st.spinner("π‘ Training Approach 3 β Multi-Scale CNN-LSTM..."):
|
|
| 204 |
target_etfs, fee_bps, tbill_rate, include_cash,
|
| 205 |
)
|
| 206 |
trained_info["Approach 3"] = {"proba": proba3}
|
| 207 |
-
st.success("β
Approach 3 complete")
|
| 208 |
except Exception as e:
|
| 209 |
st.warning(f"β οΈ Approach 3 failed: {e}")
|
| 210 |
results["Approach 3"] = None
|
| 211 |
|
| 212 |
-
progress.progress(100, text="
|
| 213 |
-
progress.empty()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
# ββ Select winner βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 216 |
winner_name = select_winner(results)
|
|
@@ -226,14 +265,14 @@ st.divider()
|
|
| 226 |
# ββ Winner signal banner ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 227 |
show_signal_banner(winner_res["next_signal"], next_date, winner_name)
|
| 228 |
|
| 229 |
-
# ββ Conviction panel
|
| 230 |
winner_proba = trained_info[winner_name]["proba"]
|
| 231 |
conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
|
| 232 |
show_conviction_panel(conviction)
|
| 233 |
|
| 234 |
st.divider()
|
| 235 |
|
| 236 |
-
# ββ All models
|
| 237 |
all_signals = {
|
| 238 |
name: {
|
| 239 |
"signal": res["next_signal"],
|
|
@@ -242,7 +281,7 @@ all_signals = {
|
|
| 242 |
}
|
| 243 |
for name, res in results.items() if res is not None
|
| 244 |
}
|
| 245 |
-
show_all_signals_panel(all_signals, target_etfs, include_cash, next_date)
|
| 246 |
|
| 247 |
st.divider()
|
| 248 |
|
|
@@ -259,13 +298,13 @@ show_comparison_table(comparison_df)
|
|
| 259 |
|
| 260 |
st.divider()
|
| 261 |
|
| 262 |
-
# ββ Equity
|
| 263 |
-
st.subheader("π
|
| 264 |
fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
|
| 265 |
st.plotly_chart(fig, use_container_width=True)
|
| 266 |
|
| 267 |
st.divider()
|
| 268 |
|
| 269 |
-
# ββ Audit trail
|
| 270 |
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 271 |
show_audit_trail(winner_res["audit_trail"])
|
|
|
|
| 13 |
get_features_and_targets, dataset_summary)
|
| 14 |
from utils.calendar import get_est_time, get_next_signal_date
|
| 15 |
from models.base import (build_sequences, train_val_test_split,
|
| 16 |
+
scale_features, returns_to_labels,
|
| 17 |
+
find_best_lookback, make_cache_key,
|
| 18 |
+
save_cache, load_cache)
|
| 19 |
from models.approach1_wavelet import train_approach1, predict_approach1
|
| 20 |
from models.approach2_regime import train_approach2, predict_approach2
|
| 21 |
from models.approach3_multiscale import train_approach3, predict_approach3
|
|
|
|
| 41 |
|
| 42 |
start_yr = st.slider("π
Start Year", 2010, 2024, 2016)
|
| 43 |
fee_bps = st.slider("π° Fee (bps)", 0, 50, 10)
|
| 44 |
+
epochs = st.number_input("π Max Epochs", 20, 150, 80, step=10)
|
|
|
|
| 45 |
|
| 46 |
st.divider()
|
| 47 |
split_option = st.selectbox("π Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
|
|
|
|
| 110 |
f"**T-bill:** {tbill_rate*100:.2f}%"
|
| 111 |
)
|
| 112 |
|
| 113 |
+
# ββ Prepare raw arrays ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 114 |
X_raw = df[input_features].values.astype(np.float32)
|
| 115 |
y_raw = df[target_etfs].values.astype(np.float32)
|
| 116 |
|
|
|
|
| 118 |
mask = np.isnan(X_raw[:, j])
|
| 119 |
if mask.any():
|
| 120 |
X_raw[mask, j] = np.nanmean(X_raw[:, j])
|
|
|
|
| 121 |
for j in range(y_raw.shape[1]):
|
| 122 |
mask = np.isnan(y_raw[:, j])
|
| 123 |
if mask.any():
|
| 124 |
y_raw[mask, j] = np.nanmean(y_raw[:, j])
|
| 125 |
|
| 126 |
+
# ββ Auto-select optimal lookback ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 127 |
+
last_date_str = str(freshness.get("last_date_in_data", "unknown"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
+
# Check cache for lookback selection too
|
| 130 |
+
lb_cache_key = make_cache_key(
|
| 131 |
+
last_date_str, start_yr, fee_bps, int(epochs), split_option, include_cash, 0
|
| 132 |
+
)
|
| 133 |
+
lb_cached = load_cache(f"lb_{lb_cache_key}")
|
| 134 |
+
|
| 135 |
+
if lb_cached is not None:
|
| 136 |
+
optimal_lookback = lb_cached["optimal_lookback"]
|
| 137 |
+
st.success(f"β‘ Loaded from cache Β· Optimal lookback: **{optimal_lookback}d**")
|
| 138 |
+
else:
|
| 139 |
+
with st.spinner("π Finding optimal lookback (30 / 45 / 60d)..."):
|
| 140 |
+
def _y_labels_fn(y_seq):
|
| 141 |
+
return returns_to_labels(y_seq, include_cash=include_cash)
|
| 142 |
+
optimal_lookback = find_best_lookback(
|
| 143 |
+
X_raw, y_raw, _y_labels_fn,
|
| 144 |
+
train_pct, val_pct, n_classes, include_cash,
|
| 145 |
+
candidates=[30, 45, 60],
|
| 146 |
+
)
|
| 147 |
+
save_cache(f"lb_{lb_cache_key}", {"optimal_lookback": optimal_lookback})
|
| 148 |
+
st.success(f"π Optimal lookback: **{optimal_lookback}d** (auto-selected from 30/45/60)")
|
| 149 |
+
|
| 150 |
+
lookback = optimal_lookback
|
| 151 |
+
|
| 152 |
+
# ββ Check full model cache ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 153 |
+
cache_key = make_cache_key(last_date_str, start_yr, fee_bps, int(epochs),
|
| 154 |
+
split_option, include_cash, lookback)
|
| 155 |
+
cached_data = load_cache(cache_key)
|
| 156 |
+
from_cache = cached_data is not None
|
| 157 |
+
|
| 158 |
+
if from_cache:
|
| 159 |
+
results = cached_data["results"]
|
| 160 |
+
trained_info = cached_data["trained_info"]
|
| 161 |
+
test_dates = pd.DatetimeIndex(cached_data["test_dates"])
|
| 162 |
+
test_slice = cached_data["test_slice"]
|
| 163 |
+
st.success("β‘ Results loaded from cache β no retraining needed.")
|
| 164 |
+
else:
|
| 165 |
+
# ββ Build sequences βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 166 |
+
X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
|
| 167 |
+
y_labels = returns_to_labels(y_seq, include_cash=include_cash)
|
| 168 |
+
|
| 169 |
+
(X_train, y_train_r, X_val, y_val_r,
|
| 170 |
+
X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
|
| 171 |
+
(_, y_train_l, _, y_val_l,
|
| 172 |
+
_, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
|
| 173 |
+
|
| 174 |
+
X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
|
| 175 |
+
|
| 176 |
+
train_size = len(X_train)
|
| 177 |
+
val_size = len(X_val)
|
| 178 |
+
test_start = lookback + train_size + val_size
|
| 179 |
+
test_dates = df.index[test_start: test_start + len(X_test)]
|
| 180 |
+
test_slice = slice(test_start, test_start + len(X_test))
|
| 181 |
+
|
| 182 |
+
results = {}
|
| 183 |
+
trained_info = {}
|
| 184 |
+
progress = st.progress(0, text="Training Approach 1...")
|
| 185 |
+
|
| 186 |
+
# ββ Approach 1 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 187 |
try:
|
| 188 |
+
model1, _, _ = train_approach1(
|
| 189 |
X_train_s, y_train_l, X_val_s, y_val_l,
|
| 190 |
n_classes=n_classes, epochs=int(epochs),
|
| 191 |
)
|
|
|
|
| 195 |
target_etfs, fee_bps, tbill_rate, include_cash,
|
| 196 |
)
|
| 197 |
trained_info["Approach 1"] = {"proba": proba1}
|
|
|
|
| 198 |
except Exception as e:
|
| 199 |
st.warning(f"β οΈ Approach 1 failed: {e}")
|
| 200 |
results["Approach 1"] = None
|
| 201 |
|
| 202 |
+
progress.progress(33, text="Training Approach 2...")
|
| 203 |
|
| 204 |
+
# ββ Approach 2 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 205 |
try:
|
| 206 |
+
model2, _, hmm2, regime_cols2 = train_approach2(
|
| 207 |
X_train_s, y_train_l, X_val_s, y_val_l,
|
| 208 |
X_flat_all=X_raw, feature_names=input_features,
|
| 209 |
lookback=lookback, train_size=train_size, val_size=val_size,
|
|
|
|
| 218 |
target_etfs, fee_bps, tbill_rate, include_cash,
|
| 219 |
)
|
| 220 |
trained_info["Approach 2"] = {"proba": proba2}
|
|
|
|
| 221 |
except Exception as e:
|
| 222 |
st.warning(f"β οΈ Approach 2 failed: {e}")
|
| 223 |
results["Approach 2"] = None
|
| 224 |
|
| 225 |
+
progress.progress(66, text="Training Approach 3...")
|
| 226 |
|
| 227 |
+
# ββ Approach 3 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 228 |
try:
|
| 229 |
+
model3, _ = train_approach3(
|
| 230 |
X_train_s, y_train_l, X_val_s, y_val_l,
|
| 231 |
n_classes=n_classes, epochs=int(epochs),
|
| 232 |
)
|
|
|
|
| 236 |
target_etfs, fee_bps, tbill_rate, include_cash,
|
| 237 |
)
|
| 238 |
trained_info["Approach 3"] = {"proba": proba3}
|
|
|
|
| 239 |
except Exception as e:
|
| 240 |
st.warning(f"β οΈ Approach 3 failed: {e}")
|
| 241 |
results["Approach 3"] = None
|
| 242 |
|
| 243 |
+
progress.progress(100, text="Done!")
|
| 244 |
+
progress.empty()
|
| 245 |
+
|
| 246 |
+
# ββ Save to cache βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 247 |
+
save_cache(cache_key, {
|
| 248 |
+
"results": results,
|
| 249 |
+
"trained_info": trained_info,
|
| 250 |
+
"test_dates": list(test_dates),
|
| 251 |
+
"test_slice": test_slice,
|
| 252 |
+
})
|
| 253 |
|
| 254 |
# ββ Select winner βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 255 |
winner_name = select_winner(results)
|
|
|
|
| 265 |
# ββ Winner signal banner ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 266 |
show_signal_banner(winner_res["next_signal"], next_date, winner_name)
|
| 267 |
|
| 268 |
+
# ββ Conviction panel ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 269 |
winner_proba = trained_info[winner_name]["proba"]
|
| 270 |
conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
|
| 271 |
show_conviction_panel(conviction)
|
| 272 |
|
| 273 |
st.divider()
|
| 274 |
|
| 275 |
+
# ββ All models next day signals βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 276 |
all_signals = {
|
| 277 |
name: {
|
| 278 |
"signal": res["next_signal"],
|
|
|
|
| 281 |
}
|
| 282 |
for name, res in results.items() if res is not None
|
| 283 |
}
|
| 284 |
+
show_all_signals_panel(all_signals, target_etfs, include_cash, next_date, optimal_lookback)
|
| 285 |
|
| 286 |
st.divider()
|
| 287 |
|
|
|
|
| 298 |
|
| 299 |
st.divider()
|
| 300 |
|
| 301 |
+
# ββ Equity curve ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 302 |
+
st.subheader(f"π {winner_name} vs SPY & AGG β Out-of-Sample")
|
| 303 |
fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
|
| 304 |
st.plotly_chart(fig, use_container_width=True)
|
| 305 |
|
| 306 |
st.divider()
|
| 307 |
|
| 308 |
+
# ββ Audit trail βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 309 |
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 310 |
show_audit_trail(winner_res["audit_trail"])
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
CHANGED
|
@@ -11,7 +11,7 @@ import numpy as np
|
|
| 11 |
|
| 12 |
from data.loader import (load_dataset, check_data_freshness,
|
| 13 |
get_features_and_targets, dataset_summary)
|
| 14 |
-
from utils.calendar import get_est_time,
|
| 15 |
from models.base import (build_sequences, train_val_test_split,
|
| 16 |
scale_features, returns_to_labels)
|
| 17 |
from models.approach1_wavelet import train_approach1, predict_approach1
|
|
@@ -22,8 +22,9 @@ from signals.conviction import compute_conviction
|
|
| 22 |
from ui.components import (
|
| 23 |
show_freshness_status, show_signal_banner, show_conviction_panel,
|
| 24 |
show_metrics_row, show_comparison_table, show_audit_trail,
|
|
|
|
| 25 |
)
|
| 26 |
-
from ui.charts import equity_curve_chart
|
| 27 |
|
| 28 |
st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="π§ ", layout="wide")
|
| 29 |
|
|
@@ -34,12 +35,8 @@ with st.sidebar:
|
|
| 34 |
st.header("βοΈ Configuration")
|
| 35 |
now_est = get_est_time()
|
| 36 |
st.write(f"π **EST:** {now_est.strftime('%H:%M:%S')}")
|
| 37 |
-
if is_sync_window():
|
| 38 |
-
st.success("β
Sync Window Active")
|
| 39 |
-
else:
|
| 40 |
-
st.info("βΈοΈ Sync Window Inactive")
|
| 41 |
-
|
| 42 |
st.divider()
|
|
|
|
| 43 |
start_yr = st.slider("π
Start Year", 2010, 2024, 2016)
|
| 44 |
fee_bps = st.slider("π° Fee (bps)", 0, 50, 10)
|
| 45 |
lookback = st.slider("π Lookback (days)", 20, 60, 30, step=5)
|
|
@@ -87,9 +84,6 @@ with st.sidebar:
|
|
| 87 |
st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
|
| 88 |
st.write(f"**T-bill col:** {'β
' if summary['tbill_found'] else 'β'}")
|
| 89 |
|
| 90 |
-
with st.expander("π All columns"):
|
| 91 |
-
st.write(summary["all_cols"])
|
| 92 |
-
|
| 93 |
if not run_button:
|
| 94 |
st.info("π Configure parameters and click **π Run All 3 Approaches**.")
|
| 95 |
st.stop()
|
|
@@ -101,7 +95,7 @@ st.write(f"π
**Data:** {df.index[0].strftime('%Y-%m-%d')} β {df.index[-1].s
|
|
| 101 |
|
| 102 |
# ββ Features & targets ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 103 |
try:
|
| 104 |
-
input_features, target_etfs, tbill_rate, df,
|
| 105 |
except ValueError as e:
|
| 106 |
st.error(str(e))
|
| 107 |
st.stop()
|
|
@@ -109,18 +103,6 @@ except ValueError as e:
|
|
| 109 |
n_etfs = len(target_etfs)
|
| 110 |
n_classes = n_etfs + (1 if include_cash else 0)
|
| 111 |
|
| 112 |
-
# ββ Show column detection diagnostics ββββββββββββββββββββββββββββββββββββββββ
|
| 113 |
-
with st.expander("π¬ Column detection diagnostics", expanded=False):
|
| 114 |
-
st.write("**How each ETF column was interpreted:**")
|
| 115 |
-
for col, info in col_info.items():
|
| 116 |
-
st.write(f"- `{col}`: {info}")
|
| 117 |
-
st.write(f"**Input features ({len(input_features)}):** {input_features}")
|
| 118 |
-
st.write(f"**T-bill rate used:** {tbill_rate*100:.3f}%")
|
| 119 |
-
|
| 120 |
-
# Show sample return values to verify correctness
|
| 121 |
-
st.write("**Sample target return values (last 3 rows):**")
|
| 122 |
-
st.dataframe(df[target_etfs].tail(3))
|
| 123 |
-
|
| 124 |
st.info(
|
| 125 |
f"π― **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· "
|
| 126 |
f"**Features:** {len(input_features)} signals Β· "
|
|
@@ -131,19 +113,15 @@ st.info(
|
|
| 131 |
X_raw = df[input_features].values.astype(np.float32)
|
| 132 |
y_raw = df[target_etfs].values.astype(np.float32)
|
| 133 |
|
| 134 |
-
# Fill NaNs
|
| 135 |
-
col_means = np.nanmean(X_raw, axis=0)
|
| 136 |
for j in range(X_raw.shape[1]):
|
| 137 |
mask = np.isnan(X_raw[:, j])
|
| 138 |
if mask.any():
|
| 139 |
-
X_raw[mask, j] =
|
| 140 |
|
| 141 |
-
# Also fill NaNs in y_raw
|
| 142 |
-
y_means = np.nanmean(y_raw, axis=0)
|
| 143 |
for j in range(y_raw.shape[1]):
|
| 144 |
mask = np.isnan(y_raw[:, j])
|
| 145 |
if mask.any():
|
| 146 |
-
y_raw[mask, j] =
|
| 147 |
|
| 148 |
X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
|
| 149 |
y_labels = returns_to_labels(y_seq, include_cash=include_cash)
|
|
@@ -151,7 +129,7 @@ y_labels = returns_to_labels(y_seq, include_cash=include_cash)
|
|
| 151 |
(X_train, y_train_r, X_val, y_val_r,
|
| 152 |
X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
|
| 153 |
(_, y_train_l, _, y_val_l,
|
| 154 |
-
_,
|
| 155 |
|
| 156 |
X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
|
| 157 |
|
|
@@ -163,14 +141,6 @@ test_slice = slice(test_start, test_start + len(X_test))
|
|
| 163 |
|
| 164 |
st.success(f"β
Sequences β Train: {train_size:,} Β· Val: {val_size:,} Β· Test: {len(X_test):,}")
|
| 165 |
|
| 166 |
-
# Show class distribution to check for degenerate labels
|
| 167 |
-
with st.expander("π¬ Label distribution (train set)", expanded=False):
|
| 168 |
-
unique, counts = np.unique(y_train_l, return_counts=True)
|
| 169 |
-
label_names = [target_etfs[i].replace("_Ret","") if i < n_etfs else "CASH" for i in unique]
|
| 170 |
-
dist_df = pd.DataFrame({"Class": label_names, "Count": counts,
|
| 171 |
-
"Pct": (counts / counts.sum() * 100).round(1)})
|
| 172 |
-
st.dataframe(dist_df)
|
| 173 |
-
|
| 174 |
# ββ Train all three approaches ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 175 |
results = {}
|
| 176 |
trained_info = {}
|
|
@@ -253,27 +223,49 @@ if winner_res is None:
|
|
| 253 |
next_date = get_next_signal_date()
|
| 254 |
st.divider()
|
| 255 |
|
|
|
|
| 256 |
show_signal_banner(winner_res["next_signal"], next_date, winner_name)
|
| 257 |
|
|
|
|
| 258 |
winner_proba = trained_info[winner_name]["proba"]
|
| 259 |
conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
|
| 260 |
show_conviction_panel(conviction)
|
| 261 |
|
| 262 |
st.divider()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
st.subheader(f"π {winner_name} β Performance Metrics")
|
| 264 |
show_metrics_row(winner_res, tbill_rate)
|
| 265 |
|
| 266 |
st.divider()
|
|
|
|
|
|
|
| 267 |
st.subheader("π Approach Comparison (Winner = Highest Raw Annualised Return)")
|
| 268 |
comparison_df = build_comparison_table(results, winner_name)
|
| 269 |
show_comparison_table(comparison_df)
|
| 270 |
-
st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
|
| 271 |
|
| 272 |
st.divider()
|
|
|
|
|
|
|
| 273 |
st.subheader("π Out-of-Sample Equity Curves β All Approaches vs Benchmarks")
|
| 274 |
fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
|
| 275 |
st.plotly_chart(fig, use_container_width=True)
|
| 276 |
|
| 277 |
st.divider()
|
|
|
|
|
|
|
| 278 |
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 279 |
show_audit_trail(winner_res["audit_trail"])
|
|
|
|
| 11 |
|
| 12 |
from data.loader import (load_dataset, check_data_freshness,
|
| 13 |
get_features_and_targets, dataset_summary)
|
| 14 |
+
from utils.calendar import get_est_time, get_next_signal_date
|
| 15 |
from models.base import (build_sequences, train_val_test_split,
|
| 16 |
scale_features, returns_to_labels)
|
| 17 |
from models.approach1_wavelet import train_approach1, predict_approach1
|
|
|
|
| 22 |
from ui.components import (
|
| 23 |
show_freshness_status, show_signal_banner, show_conviction_panel,
|
| 24 |
show_metrics_row, show_comparison_table, show_audit_trail,
|
| 25 |
+
show_all_signals_panel,
|
| 26 |
)
|
| 27 |
+
from ui.charts import equity_curve_chart
|
| 28 |
|
| 29 |
st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="π§ ", layout="wide")
|
| 30 |
|
|
|
|
| 35 |
st.header("βοΈ Configuration")
|
| 36 |
now_est = get_est_time()
|
| 37 |
st.write(f"π **EST:** {now_est.strftime('%H:%M:%S')}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
st.divider()
|
| 39 |
+
|
| 40 |
start_yr = st.slider("π
Start Year", 2010, 2024, 2016)
|
| 41 |
fee_bps = st.slider("π° Fee (bps)", 0, 50, 10)
|
| 42 |
lookback = st.slider("π Lookback (days)", 20, 60, 30, step=5)
|
|
|
|
| 84 |
st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
|
| 85 |
st.write(f"**T-bill col:** {'β
' if summary['tbill_found'] else 'β'}")
|
| 86 |
|
|
|
|
|
|
|
|
|
|
| 87 |
if not run_button:
|
| 88 |
st.info("π Configure parameters and click **π Run All 3 Approaches**.")
|
| 89 |
st.stop()
|
|
|
|
| 95 |
|
| 96 |
# ββ Features & targets ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 97 |
try:
|
| 98 |
+
input_features, target_etfs, tbill_rate, df, _ = get_features_and_targets(df)
|
| 99 |
except ValueError as e:
|
| 100 |
st.error(str(e))
|
| 101 |
st.stop()
|
|
|
|
| 103 |
n_etfs = len(target_etfs)
|
| 104 |
n_classes = n_etfs + (1 if include_cash else 0)
|
| 105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
st.info(
|
| 107 |
f"π― **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· "
|
| 108 |
f"**Features:** {len(input_features)} signals Β· "
|
|
|
|
| 113 |
X_raw = df[input_features].values.astype(np.float32)
|
| 114 |
y_raw = df[target_etfs].values.astype(np.float32)
|
| 115 |
|
|
|
|
|
|
|
| 116 |
for j in range(X_raw.shape[1]):
|
| 117 |
mask = np.isnan(X_raw[:, j])
|
| 118 |
if mask.any():
|
| 119 |
+
X_raw[mask, j] = np.nanmean(X_raw[:, j])
|
| 120 |
|
|
|
|
|
|
|
| 121 |
for j in range(y_raw.shape[1]):
|
| 122 |
mask = np.isnan(y_raw[:, j])
|
| 123 |
if mask.any():
|
| 124 |
+
y_raw[mask, j] = np.nanmean(y_raw[:, j])
|
| 125 |
|
| 126 |
X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
|
| 127 |
y_labels = returns_to_labels(y_seq, include_cash=include_cash)
|
|
|
|
| 129 |
(X_train, y_train_r, X_val, y_val_r,
|
| 130 |
X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
|
| 131 |
(_, y_train_l, _, y_val_l,
|
| 132 |
+
_, _) = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
|
| 133 |
|
| 134 |
X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
|
| 135 |
|
|
|
|
| 141 |
|
| 142 |
st.success(f"β
Sequences β Train: {train_size:,} Β· Val: {val_size:,} Β· Test: {len(X_test):,}")
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
# ββ Train all three approaches ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 145 |
results = {}
|
| 146 |
trained_info = {}
|
|
|
|
| 223 |
next_date = get_next_signal_date()
|
| 224 |
st.divider()
|
| 225 |
|
| 226 |
+
# ββ Winner signal banner ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 227 |
show_signal_banner(winner_res["next_signal"], next_date, winner_name)
|
| 228 |
|
| 229 |
+
# ββ Conviction panel (winner only) ββββββββββββββββββββββββββββββββββββββββββββ
|
| 230 |
winner_proba = trained_info[winner_name]["proba"]
|
| 231 |
conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
|
| 232 |
show_conviction_panel(conviction)
|
| 233 |
|
| 234 |
st.divider()
|
| 235 |
+
|
| 236 |
+
# ββ All models' next day signals ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 237 |
+
all_signals = {
|
| 238 |
+
name: {
|
| 239 |
+
"signal": res["next_signal"],
|
| 240 |
+
"proba": trained_info[name]["proba"][-1],
|
| 241 |
+
"is_winner": name == winner_name,
|
| 242 |
+
}
|
| 243 |
+
for name, res in results.items() if res is not None
|
| 244 |
+
}
|
| 245 |
+
show_all_signals_panel(all_signals, target_etfs, include_cash, next_date)
|
| 246 |
+
|
| 247 |
+
st.divider()
|
| 248 |
+
|
| 249 |
+
# ββ Winner performance metrics ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 250 |
st.subheader(f"π {winner_name} β Performance Metrics")
|
| 251 |
show_metrics_row(winner_res, tbill_rate)
|
| 252 |
|
| 253 |
st.divider()
|
| 254 |
+
|
| 255 |
+
# ββ Comparison table ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 256 |
st.subheader("π Approach Comparison (Winner = Highest Raw Annualised Return)")
|
| 257 |
comparison_df = build_comparison_table(results, winner_name)
|
| 258 |
show_comparison_table(comparison_df)
|
|
|
|
| 259 |
|
| 260 |
st.divider()
|
| 261 |
+
|
| 262 |
+
# ββ Equity curves βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 263 |
st.subheader("π Out-of-Sample Equity Curves β All Approaches vs Benchmarks")
|
| 264 |
fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
|
| 265 |
st.plotly_chart(fig, use_container_width=True)
|
| 266 |
|
| 267 |
st.divider()
|
| 268 |
+
|
| 269 |
+
# ββ Audit trail (winner) ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 270 |
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 271 |
show_audit_trail(winner_res["audit_trail"])
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py
CHANGED
|
@@ -1,12 +1,8 @@
|
|
| 1 |
"""
|
| 2 |
data/loader.py
|
| 3 |
Loads master_data.parquet from HF Dataset.
|
| 4 |
-
|
| 5 |
-
No external pings β all data
|
| 6 |
-
|
| 7 |
-
Actual dataset columns (confirmed from parquet inspection):
|
| 8 |
-
ETFs : AGG, GLD, SLV, SPY, TBT, TLT, VNQ
|
| 9 |
-
Macro : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD
|
| 10 |
"""
|
| 11 |
|
| 12 |
import pandas as pd
|
|
@@ -22,9 +18,8 @@ try:
|
|
| 22 |
except ImportError:
|
| 23 |
NYSE_CAL_AVAILABLE = False
|
| 24 |
|
| 25 |
-
DATASET_REPO
|
| 26 |
-
PARQUET_FILE
|
| 27 |
-
|
| 28 |
TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"]
|
| 29 |
BENCHMARK_COLS = ["SPY", "AGG"]
|
| 30 |
TBILL_COL = "TBILL_3M"
|
|
@@ -64,16 +59,13 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
|
|
| 64 |
token=hf_token,
|
| 65 |
)
|
| 66 |
df = pd.read_parquet(path)
|
| 67 |
-
|
| 68 |
if not isinstance(df.index, pd.DatetimeIndex):
|
| 69 |
for col in ["Date", "date", "DATE"]:
|
| 70 |
if col in df.columns:
|
| 71 |
df = df.set_index(col)
|
| 72 |
break
|
| 73 |
df.index = pd.to_datetime(df.index)
|
| 74 |
-
|
| 75 |
return df.sort_index()
|
| 76 |
-
|
| 77 |
except Exception as e:
|
| 78 |
st.error(f"β Failed to load dataset: {e}")
|
| 79 |
return pd.DataFrame()
|
|
@@ -84,11 +76,9 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
|
|
| 84 |
def check_data_freshness(df: pd.DataFrame) -> dict:
|
| 85 |
if df.empty:
|
| 86 |
return {"fresh": False, "message": "Dataset is empty."}
|
| 87 |
-
|
| 88 |
last = df.index[-1].date()
|
| 89 |
expect = get_last_nyse_trading_day()
|
| 90 |
fresh = last >= expect
|
| 91 |
-
|
| 92 |
msg = (
|
| 93 |
f"β
Dataset up to date through **{last}**." if fresh else
|
| 94 |
f"β οΈ **{expect}** data not yet updated. Latest: **{last}**. "
|
|
@@ -98,106 +88,139 @@ def check_data_freshness(df: pd.DataFrame) -> dict:
|
|
| 98 |
"expected_date": expect, "message": msg}
|
| 99 |
|
| 100 |
|
| 101 |
-
# ββ
|
| 102 |
|
| 103 |
-
def
|
| 104 |
-
"""
|
| 105 |
-
Heuristic: a price series has abs(median) > 2 and std/mean < 0.5.
|
| 106 |
-
A return series has abs(median) < 0.1 and many values near zero.
|
| 107 |
-
"""
|
| 108 |
clean = series.dropna()
|
| 109 |
if len(clean) == 0:
|
| 110 |
-
return
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
def get_features_and_targets(df: pd.DataFrame):
|
| 124 |
"""
|
| 125 |
-
Build return columns for target ETFs and
|
| 126 |
-
Auto-detects whether source columns are prices or already returns.
|
| 127 |
|
| 128 |
Returns:
|
| 129 |
input_features : list[str]
|
| 130 |
target_etfs : list[str] e.g. ["TLT_Ret", ...]
|
| 131 |
tbill_rate : float
|
| 132 |
-
|
| 133 |
-
col_info : dict of diagnostics
|
| 134 |
"""
|
| 135 |
missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
|
| 136 |
if missing:
|
| 137 |
raise ValueError(
|
| 138 |
f"Missing ETF columns: {missing}. "
|
| 139 |
-
f"Found
|
| 140 |
)
|
| 141 |
|
| 142 |
col_info = {}
|
| 143 |
|
| 144 |
-
# ββ Build
|
| 145 |
-
|
|
|
|
| 146 |
ret_col = f"{col}_Ret"
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
target_etfs = [make_ret(c) for c in TARGET_ETF_COLS]
|
| 159 |
-
benchmark_rets = [make_ret(c) for c in BENCHMARK_COLS if c in df.columns]
|
| 160 |
-
|
| 161 |
-
# Drop NaN rows (first row from pct_change)
|
| 162 |
df = df.dropna(subset=target_etfs).copy()
|
| 163 |
|
| 164 |
-
#
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
f"these may not be daily returns. Check dataset column '{ret_col.replace('_Ret','')}'. "
|
| 171 |
-
f"Sample values: {df[ret_col].tail(3).values}"
|
| 172 |
-
)
|
| 173 |
-
|
| 174 |
-
# ββ Input features ββββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½βββββββββββββββββββ
|
| 175 |
-
exclude = set(
|
| 176 |
-
TARGET_ETF_COLS + BENCHMARK_COLS + target_etfs + benchmark_rets +
|
| 177 |
-
[f"{c}_Ret" for c in BENCHMARK_COLS] + [TBILL_COL]
|
| 178 |
-
)
|
| 179 |
|
| 180 |
-
#
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
# Then add any engineered signal columns
|
| 184 |
-
extra = [
|
| 185 |
-
c for c in df.columns
|
| 186 |
-
if c not in exclude
|
| 187 |
-
and c not in input_features
|
| 188 |
-
and any(k in c for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_",
|
| 189 |
-
"Rates_", "VIX_", "Spread", "DXY", "T10Y",
|
| 190 |
-
"TBILL", "SOFR", "MOVE"])
|
| 191 |
-
and pd.api.types.is_numeric_dtype(df[c])
|
| 192 |
-
]
|
| 193 |
-
input_features += extra
|
| 194 |
-
|
| 195 |
-
# Fallback: all numeric non-excluded columns
|
| 196 |
-
if not input_features:
|
| 197 |
-
input_features = [
|
| 198 |
-
c for c in df.columns
|
| 199 |
-
if c not in exclude and pd.api.types.is_numeric_dtype(df[c])
|
| 200 |
-
]
|
| 201 |
|
| 202 |
# ββ T-bill rate βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 203 |
tbill_rate = 0.045
|
|
@@ -207,6 +230,14 @@ def get_features_and_targets(df: pd.DataFrame):
|
|
| 207 |
v = float(raw.iloc[-1])
|
| 208 |
tbill_rate = v / 100 if v > 1 else v
|
| 209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
return input_features, target_etfs, tbill_rate, df, col_info
|
| 211 |
|
| 212 |
|
|
|
|
| 1 |
"""
|
| 2 |
data/loader.py
|
| 3 |
Loads master_data.parquet from HF Dataset.
|
| 4 |
+
Engineers rich feature set from raw price/macro columns.
|
| 5 |
+
No external pings β all data from HF Dataset only.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import pandas as pd
|
|
|
|
| 18 |
except ImportError:
|
| 19 |
NYSE_CAL_AVAILABLE = False
|
| 20 |
|
| 21 |
+
DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
|
| 22 |
+
PARQUET_FILE = "master_data.parquet"
|
|
|
|
| 23 |
TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"]
|
| 24 |
BENCHMARK_COLS = ["SPY", "AGG"]
|
| 25 |
TBILL_COL = "TBILL_3M"
|
|
|
|
| 59 |
token=hf_token,
|
| 60 |
)
|
| 61 |
df = pd.read_parquet(path)
|
|
|
|
| 62 |
if not isinstance(df.index, pd.DatetimeIndex):
|
| 63 |
for col in ["Date", "date", "DATE"]:
|
| 64 |
if col in df.columns:
|
| 65 |
df = df.set_index(col)
|
| 66 |
break
|
| 67 |
df.index = pd.to_datetime(df.index)
|
|
|
|
| 68 |
return df.sort_index()
|
|
|
|
| 69 |
except Exception as e:
|
| 70 |
st.error(f"β Failed to load dataset: {e}")
|
| 71 |
return pd.DataFrame()
|
|
|
|
| 76 |
def check_data_freshness(df: pd.DataFrame) -> dict:
|
| 77 |
if df.empty:
|
| 78 |
return {"fresh": False, "message": "Dataset is empty."}
|
|
|
|
| 79 |
last = df.index[-1].date()
|
| 80 |
expect = get_last_nyse_trading_day()
|
| 81 |
fresh = last >= expect
|
|
|
|
| 82 |
msg = (
|
| 83 |
f"β
Dataset up to date through **{last}**." if fresh else
|
| 84 |
f"β οΈ **{expect}** data not yet updated. Latest: **{last}**. "
|
|
|
|
| 88 |
"expected_date": expect, "message": msg}
|
| 89 |
|
| 90 |
|
| 91 |
+
# ββ Price β returns βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
|
| 93 |
+
def _to_returns(series: pd.Series) -> pd.Series:
|
| 94 |
+
"""Convert price series to daily pct returns. If already returns, pass through."""
|
|
|
|
|
|
|
|
|
|
| 95 |
clean = series.dropna()
|
| 96 |
if len(clean) == 0:
|
| 97 |
+
return series
|
| 98 |
+
if abs(clean.median()) > 2: # price series
|
| 99 |
+
return series.pct_change()
|
| 100 |
+
return series # already returns
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
# ββ Feature engineering βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 104 |
+
|
| 105 |
+
def _engineer_features(df: pd.DataFrame, ret_cols: list) -> pd.DataFrame:
|
| 106 |
+
"""
|
| 107 |
+
Build a rich feature set from raw macro + ETF return columns.
|
| 108 |
+
|
| 109 |
+
Features added per ETF return:
|
| 110 |
+
- 1d, 5d, 21d lagged returns
|
| 111 |
+
- 5d, 21d rolling volatility
|
| 112 |
+
- 5d, 21d momentum (cumulative return)
|
| 113 |
+
|
| 114 |
+
Features added per macro column:
|
| 115 |
+
- raw value (z-scored over rolling 252d window)
|
| 116 |
+
- 5d change
|
| 117 |
+
- 1d lag
|
| 118 |
+
|
| 119 |
+
Also adds:
|
| 120 |
+
- TBILL_3M as a feature (rate level)
|
| 121 |
+
- VIX regime flag (VIX > 25)
|
| 122 |
+
- Yield curve slope (already T10Y2Y)
|
| 123 |
+
- Cross-asset momentum: spread between TLT_ret and TBT_ret
|
| 124 |
+
"""
|
| 125 |
+
feat = pd.DataFrame(index=df.index)
|
| 126 |
+
|
| 127 |
+
# ββ ETF return features βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 128 |
+
for col in ret_cols:
|
| 129 |
+
r = df[col]
|
| 130 |
+
feat[f"{col}_lag1"] = r.shift(1)
|
| 131 |
+
feat[f"{col}_lag5"] = r.shift(5)
|
| 132 |
+
feat[f"{col}_lag21"] = r.shift(21)
|
| 133 |
+
feat[f"{col}_vol5"] = r.rolling(5).std()
|
| 134 |
+
feat[f"{col}_vol21"] = r.rolling(21).std()
|
| 135 |
+
feat[f"{col}_mom5"] = r.rolling(5).sum()
|
| 136 |
+
feat[f"{col}_mom21"] = r.rolling(21).sum()
|
| 137 |
+
|
| 138 |
+
# ββ Macro features ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 139 |
+
for col in MACRO_COLS:
|
| 140 |
+
if col not in df.columns:
|
| 141 |
+
continue
|
| 142 |
+
s = df[col]
|
| 143 |
+
# Z-score over rolling 252-day window
|
| 144 |
+
roll_mean = s.rolling(252, min_periods=63).mean()
|
| 145 |
+
roll_std = s.rolling(252, min_periods=63).std()
|
| 146 |
+
feat[f"{col}_z"] = (s - roll_mean) / (roll_std + 1e-9)
|
| 147 |
+
feat[f"{col}_chg5"] = s.diff(5)
|
| 148 |
+
feat[f"{col}_lag1"] = s.shift(1)
|
| 149 |
+
|
| 150 |
+
# ββ TBILL level βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 151 |
+
if TBILL_COL in df.columns:
|
| 152 |
+
tbill = df[TBILL_COL]
|
| 153 |
+
feat["TBILL_level"] = tbill
|
| 154 |
+
feat["TBILL_chg5"] = tbill.diff(5)
|
| 155 |
+
|
| 156 |
+
# ββ Derived cross-asset signals βββββββββββββββββββββββββββββββββββββββββββ
|
| 157 |
+
if "TLT_Ret" in df.columns and "TBT_Ret" in df.columns:
|
| 158 |
+
feat["TLT_TBT_spread_mom5"] = (
|
| 159 |
+
df["TLT_Ret"].rolling(5).sum() - df["TBT_Ret"].rolling(5).sum()
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
if "VIX" in df.columns:
|
| 163 |
+
feat["VIX_regime"] = (df["VIX"] > 25).astype(float)
|
| 164 |
+
feat["VIX_mom5"] = df["VIX"].diff(5)
|
| 165 |
+
|
| 166 |
+
if "T10Y2Y" in df.columns:
|
| 167 |
+
feat["YC_inverted"] = (df["T10Y2Y"] < 0).astype(float)
|
| 168 |
|
| 169 |
+
if "IG_SPREAD" in df.columns and "HY_SPREAD" in df.columns:
|
| 170 |
+
feat["credit_ratio"] = df["HY_SPREAD"] / (df["IG_SPREAD"] + 1e-9)
|
| 171 |
|
| 172 |
+
return feat
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
# ββ Main extraction function ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 176 |
|
| 177 |
def get_features_and_targets(df: pd.DataFrame):
|
| 178 |
"""
|
| 179 |
+
Build return columns for target ETFs and engineer a rich feature set.
|
|
|
|
| 180 |
|
| 181 |
Returns:
|
| 182 |
input_features : list[str]
|
| 183 |
target_etfs : list[str] e.g. ["TLT_Ret", ...]
|
| 184 |
tbill_rate : float
|
| 185 |
+
df_out : DataFrame with all columns
|
| 186 |
+
col_info : dict of diagnostics
|
| 187 |
"""
|
| 188 |
missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
|
| 189 |
if missing:
|
| 190 |
raise ValueError(
|
| 191 |
f"Missing ETF columns: {missing}. "
|
| 192 |
+
f"Found: {list(df.columns)}"
|
| 193 |
)
|
| 194 |
|
| 195 |
col_info = {}
|
| 196 |
|
| 197 |
+
# ββ Build ETF return columns ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 198 |
+
target_etfs = []
|
| 199 |
+
for col in TARGET_ETF_COLS:
|
| 200 |
ret_col = f"{col}_Ret"
|
| 201 |
+
df[ret_col] = _to_returns(df[col])
|
| 202 |
+
med = abs(df[col].dropna().median())
|
| 203 |
+
col_info[col] = f"priceβpct_change (median={med:.2f})" if med > 2 else f"used as-is (median={med:.4f})"
|
| 204 |
+
target_etfs.append(ret_col)
|
| 205 |
+
|
| 206 |
+
# ββ Build benchmark return columns ββββββββββββββββββββββββββββββββββββββββ
|
| 207 |
+
for col in BENCHMARK_COLS:
|
| 208 |
+
if col in df.columns:
|
| 209 |
+
df[f"{col}_Ret"] = _to_returns(df[col])
|
| 210 |
+
|
| 211 |
+
# ββ Drop NaN from first pct_change row ββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
df = df.dropna(subset=target_etfs).copy()
|
| 213 |
|
| 214 |
+
# ββ Engineer features βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 215 |
+
feat_df = _engineer_features(df, target_etfs)
|
| 216 |
+
|
| 217 |
+
# Merge features into df
|
| 218 |
+
for col in feat_df.columns:
|
| 219 |
+
df[col] = feat_df[col].values
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
+
# Drop rows with NaN in features (from lags/rolling)
|
| 222 |
+
feat_cols = list(feat_df.columns)
|
| 223 |
+
df = df.dropna(subset=feat_cols).copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
# ββ T-bill rate βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 226 |
tbill_rate = 0.045
|
|
|
|
| 230 |
v = float(raw.iloc[-1])
|
| 231 |
tbill_rate = v / 100 if v > 1 else v
|
| 232 |
|
| 233 |
+
# Input features = all engineered feature columns
|
| 234 |
+
exclude = set(
|
| 235 |
+
TARGET_ETF_COLS + BENCHMARK_COLS + target_etfs +
|
| 236 |
+
[f"{c}_Ret" for c in BENCHMARK_COLS] + [TBILL_COL] +
|
| 237 |
+
list(MACRO_COLS)
|
| 238 |
+
)
|
| 239 |
+
input_features = [c for c in feat_cols if c not in exclude]
|
| 240 |
+
|
| 241 |
return input_features, target_etfs, tbill_rate, df, col_info
|
| 242 |
|
| 243 |
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
CHANGED
|
@@ -9,7 +9,6 @@ import streamlit as st
|
|
| 9 |
import pandas as pd
|
| 10 |
import numpy as np
|
| 11 |
|
| 12 |
-
# ββ Module imports ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
from data.loader import (load_dataset, check_data_freshness,
|
| 14 |
get_features_and_targets, dataset_summary)
|
| 15 |
from utils.calendar import get_est_time, is_sync_window, get_next_signal_date
|
|
@@ -26,20 +25,13 @@ from ui.components import (
|
|
| 26 |
)
|
| 27 |
from ui.charts import equity_curve_chart, comparison_bar_chart
|
| 28 |
|
| 29 |
-
|
| 30 |
-
st.set_page_config(
|
| 31 |
-
page_title="P2-ETF-CNN-LSTM",
|
| 32 |
-
page_icon="π§ ",
|
| 33 |
-
layout="wide",
|
| 34 |
-
)
|
| 35 |
|
| 36 |
-
# ββ Secrets βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 37 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 38 |
|
| 39 |
# ββ Sidebar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
with st.sidebar:
|
| 41 |
st.header("βοΈ Configuration")
|
| 42 |
-
|
| 43 |
now_est = get_est_time()
|
| 44 |
st.write(f"π **EST:** {now_est.strftime('%H:%M:%S')}")
|
| 45 |
if is_sync_window():
|
|
@@ -48,25 +40,19 @@ with st.sidebar:
|
|
| 48 |
st.info("βΈοΈ Sync Window Inactive")
|
| 49 |
|
| 50 |
st.divider()
|
| 51 |
-
|
| 52 |
start_yr = st.slider("π
Start Year", 2010, 2024, 2016)
|
| 53 |
fee_bps = st.slider("π° Fee (bps)", 0, 50, 10)
|
| 54 |
lookback = st.slider("π Lookback (days)", 20, 60, 30, step=5)
|
| 55 |
epochs = st.number_input("π Max Epochs", 20, 300, 100, step=10)
|
| 56 |
|
| 57 |
st.divider()
|
| 58 |
-
|
| 59 |
split_option = st.selectbox("π Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
|
| 60 |
-
|
| 61 |
-
train_pct, val_pct = split_map[split_option]
|
| 62 |
|
| 63 |
-
include_cash = st.checkbox(
|
| 64 |
-
"
|
| 65 |
-
help="Model can select CASH (earns T-bill rate) instead of any ETF",
|
| 66 |
-
)
|
| 67 |
|
| 68 |
st.divider()
|
| 69 |
-
|
| 70 |
run_button = st.button("π Run All 3 Approaches", type="primary", use_container_width=True)
|
| 71 |
|
| 72 |
# ββ Title βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -74,9 +60,8 @@ st.title("π§ P2-ETF-CNN-LSTM")
|
|
| 74 |
st.caption("Approach 1: Wavelet Β· Approach 2: Regime-Conditioned Β· Approach 3: Multi-Scale Parallel")
|
| 75 |
st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
|
| 76 |
|
| 77 |
-
# ββ Token check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 78 |
if not HF_TOKEN:
|
| 79 |
-
st.error("β HF_TOKEN secret not found.
|
| 80 |
st.stop()
|
| 81 |
|
| 82 |
# ββ Load dataset ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -86,11 +71,10 @@ with st.spinner("π‘ Loading dataset from HuggingFace..."):
|
|
| 86 |
if df_raw.empty:
|
| 87 |
st.stop()
|
| 88 |
|
| 89 |
-
# ββ Freshness check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 90 |
freshness = check_data_freshness(df_raw)
|
| 91 |
show_freshness_status(freshness)
|
| 92 |
|
| 93 |
-
# ββ Dataset
|
| 94 |
with st.sidebar:
|
| 95 |
st.divider()
|
| 96 |
st.subheader("π¦ Dataset Info")
|
|
@@ -103,21 +87,21 @@ with st.sidebar:
|
|
| 103 |
st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
|
| 104 |
st.write(f"**T-bill col:** {'β
' if summary['tbill_found'] else 'β'}")
|
| 105 |
|
| 106 |
-
|
|
|
|
|
|
|
| 107 |
if not run_button:
|
| 108 |
-
st.info("π Configure parameters
|
| 109 |
st.stop()
|
| 110 |
|
| 111 |
# ββ Filter by start year ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 112 |
df = df_raw[df_raw.index.year >= start_yr].copy()
|
| 113 |
-
st.write(
|
| 114 |
-
|
| 115 |
-
f"({df.index[-1].year - df.index[0].year + 1} years)"
|
| 116 |
-
)
|
| 117 |
|
| 118 |
# ββ Features & targets ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 119 |
try:
|
| 120 |
-
input_features, target_etfs, tbill_rate, df = get_features_and_targets(df)
|
| 121 |
except ValueError as e:
|
| 122 |
st.error(str(e))
|
| 123 |
st.stop()
|
|
@@ -125,6 +109,18 @@ except ValueError as e:
|
|
| 125 |
n_etfs = len(target_etfs)
|
| 126 |
n_classes = n_etfs + (1 if include_cash else 0)
|
| 127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
st.info(
|
| 129 |
f"π― **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· "
|
| 130 |
f"**Features:** {len(input_features)} signals Β· "
|
|
@@ -135,13 +131,20 @@ st.info(
|
|
| 135 |
X_raw = df[input_features].values.astype(np.float32)
|
| 136 |
y_raw = df[target_etfs].values.astype(np.float32)
|
| 137 |
|
| 138 |
-
# Fill
|
| 139 |
col_means = np.nanmean(X_raw, axis=0)
|
| 140 |
for j in range(X_raw.shape[1]):
|
| 141 |
mask = np.isnan(X_raw[:, j])
|
| 142 |
if mask.any():
|
| 143 |
X_raw[mask, j] = col_means[j]
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
|
| 146 |
y_labels = returns_to_labels(y_seq, include_cash=include_cash)
|
| 147 |
|
|
@@ -154,27 +157,30 @@ X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
|
|
| 154 |
|
| 155 |
train_size = len(X_train)
|
| 156 |
val_size = len(X_val)
|
| 157 |
-
|
| 158 |
test_start = lookback + train_size + val_size
|
| 159 |
test_dates = df.index[test_start: test_start + len(X_test)]
|
| 160 |
test_slice = slice(test_start, test_start + len(X_test))
|
| 161 |
|
| 162 |
-
st.success(
|
| 163 |
-
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
# ββ Train all three approaches ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 167 |
results = {}
|
| 168 |
trained_info = {}
|
|
|
|
| 169 |
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
# ββ Approach 1 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 173 |
with st.spinner("π Training Approach 1 β Wavelet CNN-LSTM..."):
|
| 174 |
try:
|
| 175 |
model1, hist1, _ = train_approach1(
|
| 176 |
-
X_train_s, y_train_l,
|
| 177 |
-
X_val_s, y_val_l,
|
| 178 |
n_classes=n_classes, epochs=int(epochs),
|
| 179 |
)
|
| 180 |
preds1, proba1 = predict_approach1(model1, X_test_s)
|
|
@@ -190,17 +196,13 @@ with st.spinner("π Training Approach 1 β Wavelet CNN-LSTM..."):
|
|
| 190 |
|
| 191 |
progress.progress(33, text="Approach 1 done...")
|
| 192 |
|
| 193 |
-
#
|
| 194 |
with st.spinner("π Training Approach 2 β Regime-Conditioned CNN-LSTM..."):
|
| 195 |
try:
|
| 196 |
model2, hist2, hmm2, regime_cols2 = train_approach2(
|
| 197 |
-
X_train_s, y_train_l,
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
feature_names=input_features,
|
| 201 |
-
lookback=lookback,
|
| 202 |
-
train_size=train_size,
|
| 203 |
-
val_size=val_size,
|
| 204 |
n_classes=n_classes, epochs=int(epochs),
|
| 205 |
)
|
| 206 |
preds2, proba2 = predict_approach2(
|
|
@@ -219,12 +221,11 @@ with st.spinner("π Training Approach 2 β Regime-Conditioned CNN-LSTM..."):
|
|
| 219 |
|
| 220 |
progress.progress(66, text="Approach 2 done...")
|
| 221 |
|
| 222 |
-
#
|
| 223 |
with st.spinner("π‘ Training Approach 3 β Multi-Scale CNN-LSTM..."):
|
| 224 |
try:
|
| 225 |
model3, hist3 = train_approach3(
|
| 226 |
-
X_train_s, y_train_l,
|
| 227 |
-
X_val_s, y_val_l,
|
| 228 |
n_classes=n_classes, epochs=int(epochs),
|
| 229 |
)
|
| 230 |
preds3, proba3 = predict_approach3(model3, X_test_s)
|
|
@@ -250,41 +251,29 @@ if winner_res is None:
|
|
| 250 |
st.stop()
|
| 251 |
|
| 252 |
next_date = get_next_signal_date()
|
| 253 |
-
|
| 254 |
st.divider()
|
| 255 |
|
| 256 |
-
# ββ Signal banner βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 257 |
show_signal_banner(winner_res["next_signal"], next_date, winner_name)
|
| 258 |
|
| 259 |
-
# ββ Conviction panel ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 260 |
winner_proba = trained_info[winner_name]["proba"]
|
| 261 |
conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
|
| 262 |
show_conviction_panel(conviction)
|
| 263 |
|
| 264 |
st.divider()
|
| 265 |
-
|
| 266 |
-
# ββ Winner metrics ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 267 |
st.subheader(f"π {winner_name} β Performance Metrics")
|
| 268 |
show_metrics_row(winner_res, tbill_rate)
|
| 269 |
|
| 270 |
st.divider()
|
| 271 |
-
|
| 272 |
-
# ββ Comparison table ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 273 |
st.subheader("π Approach Comparison (Winner = Highest Raw Annualised Return)")
|
| 274 |
comparison_df = build_comparison_table(results, winner_name)
|
| 275 |
show_comparison_table(comparison_df)
|
| 276 |
-
|
| 277 |
st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
|
| 278 |
|
| 279 |
st.divider()
|
| 280 |
-
|
| 281 |
-
# ββ Equity curves βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 282 |
st.subheader("π Out-of-Sample Equity Curves β All Approaches vs Benchmarks")
|
| 283 |
fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
|
| 284 |
st.plotly_chart(fig, use_container_width=True)
|
| 285 |
|
| 286 |
st.divider()
|
| 287 |
-
|
| 288 |
-
# ββ Audit trail βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 289 |
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 290 |
show_audit_trail(winner_res["audit_trail"])
|
|
|
|
| 9 |
import pandas as pd
|
| 10 |
import numpy as np
|
| 11 |
|
|
|
|
| 12 |
from data.loader import (load_dataset, check_data_freshness,
|
| 13 |
get_features_and_targets, dataset_summary)
|
| 14 |
from utils.calendar import get_est_time, is_sync_window, get_next_signal_date
|
|
|
|
| 25 |
)
|
| 26 |
from ui.charts import equity_curve_chart, comparison_bar_chart
|
| 27 |
|
| 28 |
+
st.set_page_config(page_title="P2-ETF-CNN-LSTM", page_icon="π§ ", layout="wide")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
|
|
|
| 30 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 31 |
|
| 32 |
# ββ Sidebar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 33 |
with st.sidebar:
|
| 34 |
st.header("βοΈ Configuration")
|
|
|
|
| 35 |
now_est = get_est_time()
|
| 36 |
st.write(f"π **EST:** {now_est.strftime('%H:%M:%S')}")
|
| 37 |
if is_sync_window():
|
|
|
|
| 40 |
st.info("βΈοΈ Sync Window Inactive")
|
| 41 |
|
| 42 |
st.divider()
|
|
|
|
| 43 |
start_yr = st.slider("π
Start Year", 2010, 2024, 2016)
|
| 44 |
fee_bps = st.slider("π° Fee (bps)", 0, 50, 10)
|
| 45 |
lookback = st.slider("π Lookback (days)", 20, 60, 30, step=5)
|
| 46 |
epochs = st.number_input("π Max Epochs", 20, 300, 100, step=10)
|
| 47 |
|
| 48 |
st.divider()
|
|
|
|
| 49 |
split_option = st.selectbox("π Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
|
| 50 |
+
train_pct, val_pct = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}[split_option]
|
|
|
|
| 51 |
|
| 52 |
+
include_cash = st.checkbox("π΅ Include CASH class", value=True,
|
| 53 |
+
help="Model can select CASH (earns T-bill rate) instead of any ETF")
|
|
|
|
|
|
|
| 54 |
|
| 55 |
st.divider()
|
|
|
|
| 56 |
run_button = st.button("π Run All 3 Approaches", type="primary", use_container_width=True)
|
| 57 |
|
| 58 |
# ββ Title βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 60 |
st.caption("Approach 1: Wavelet Β· Approach 2: Regime-Conditioned Β· Approach 3: Multi-Scale Parallel")
|
| 61 |
st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
|
| 62 |
|
|
|
|
| 63 |
if not HF_TOKEN:
|
| 64 |
+
st.error("β HF_TOKEN secret not found.")
|
| 65 |
st.stop()
|
| 66 |
|
| 67 |
# ββ Load dataset ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 71 |
if df_raw.empty:
|
| 72 |
st.stop()
|
| 73 |
|
|
|
|
| 74 |
freshness = check_data_freshness(df_raw)
|
| 75 |
show_freshness_status(freshness)
|
| 76 |
|
| 77 |
+
# ββ Dataset info sidebar ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 78 |
with st.sidebar:
|
| 79 |
st.divider()
|
| 80 |
st.subheader("π¦ Dataset Info")
|
|
|
|
| 87 |
st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
|
| 88 |
st.write(f"**T-bill col:** {'β
' if summary['tbill_found'] else 'β'}")
|
| 89 |
|
| 90 |
+
with st.expander("π All columns"):
|
| 91 |
+
st.write(summary["all_cols"])
|
| 92 |
+
|
| 93 |
if not run_button:
|
| 94 |
+
st.info("π Configure parameters and click **π Run All 3 Approaches**.")
|
| 95 |
st.stop()
|
| 96 |
|
| 97 |
# ββ Filter by start year ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 98 |
df = df_raw[df_raw.index.year >= start_yr].copy()
|
| 99 |
+
st.write(f"π
**Data:** {df.index[0].strftime('%Y-%m-%d')} β {df.index[-1].strftime('%Y-%m-%d')} "
|
| 100 |
+
f"({df.index[-1].year - df.index[0].year + 1} years)")
|
|
|
|
|
|
|
| 101 |
|
| 102 |
# ββ Features & targets ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 103 |
try:
|
| 104 |
+
input_features, target_etfs, tbill_rate, df, col_info = get_features_and_targets(df)
|
| 105 |
except ValueError as e:
|
| 106 |
st.error(str(e))
|
| 107 |
st.stop()
|
|
|
|
| 109 |
n_etfs = len(target_etfs)
|
| 110 |
n_classes = n_etfs + (1 if include_cash else 0)
|
| 111 |
|
| 112 |
+
# ββ Show column detection diagnostics ββββββββββββββββββββββββββββββββββββββββ
|
| 113 |
+
with st.expander("π¬ Column detection diagnostics", expanded=False):
|
| 114 |
+
st.write("**How each ETF column was interpreted:**")
|
| 115 |
+
for col, info in col_info.items():
|
| 116 |
+
st.write(f"- `{col}`: {info}")
|
| 117 |
+
st.write(f"**Input features ({len(input_features)}):** {input_features}")
|
| 118 |
+
st.write(f"**T-bill rate used:** {tbill_rate*100:.3f}%")
|
| 119 |
+
|
| 120 |
+
# Show sample return values to verify correctness
|
| 121 |
+
st.write("**Sample target return values (last 3 rows):**")
|
| 122 |
+
st.dataframe(df[target_etfs].tail(3))
|
| 123 |
+
|
| 124 |
st.info(
|
| 125 |
f"π― **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· "
|
| 126 |
f"**Features:** {len(input_features)} signals Β· "
|
|
|
|
| 131 |
X_raw = df[input_features].values.astype(np.float32)
|
| 132 |
y_raw = df[target_etfs].values.astype(np.float32)
|
| 133 |
|
| 134 |
+
# Fill NaNs
|
| 135 |
col_means = np.nanmean(X_raw, axis=0)
|
| 136 |
for j in range(X_raw.shape[1]):
|
| 137 |
mask = np.isnan(X_raw[:, j])
|
| 138 |
if mask.any():
|
| 139 |
X_raw[mask, j] = col_means[j]
|
| 140 |
|
| 141 |
+
# Also fill NaNs in y_raw
|
| 142 |
+
y_means = np.nanmean(y_raw, axis=0)
|
| 143 |
+
for j in range(y_raw.shape[1]):
|
| 144 |
+
mask = np.isnan(y_raw[:, j])
|
| 145 |
+
if mask.any():
|
| 146 |
+
y_raw[mask, j] = y_means[j]
|
| 147 |
+
|
| 148 |
X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
|
| 149 |
y_labels = returns_to_labels(y_seq, include_cash=include_cash)
|
| 150 |
|
|
|
|
| 157 |
|
| 158 |
train_size = len(X_train)
|
| 159 |
val_size = len(X_val)
|
|
|
|
| 160 |
test_start = lookback + train_size + val_size
|
| 161 |
test_dates = df.index[test_start: test_start + len(X_test)]
|
| 162 |
test_slice = slice(test_start, test_start + len(X_test))
|
| 163 |
|
| 164 |
+
st.success(f"β
Sequences β Train: {train_size:,} Β· Val: {val_size:,} Β· Test: {len(X_test):,}")
|
| 165 |
+
|
| 166 |
+
# Show class distribution to check for degenerate labels
|
| 167 |
+
with st.expander("π¬ Label distribution (train set)", expanded=False):
|
| 168 |
+
unique, counts = np.unique(y_train_l, return_counts=True)
|
| 169 |
+
label_names = [target_etfs[i].replace("_Ret","") if i < n_etfs else "CASH" for i in unique]
|
| 170 |
+
dist_df = pd.DataFrame({"Class": label_names, "Count": counts,
|
| 171 |
+
"Pct": (counts / counts.sum() * 100).round(1)})
|
| 172 |
+
st.dataframe(dist_df)
|
| 173 |
|
| 174 |
# ββ Train all three approaches ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 175 |
results = {}
|
| 176 |
trained_info = {}
|
| 177 |
+
progress = st.progress(0, text="Starting training...")
|
| 178 |
|
| 179 |
+
# Approach 1
|
|
|
|
|
|
|
| 180 |
with st.spinner("π Training Approach 1 β Wavelet CNN-LSTM..."):
|
| 181 |
try:
|
| 182 |
model1, hist1, _ = train_approach1(
|
| 183 |
+
X_train_s, y_train_l, X_val_s, y_val_l,
|
|
|
|
| 184 |
n_classes=n_classes, epochs=int(epochs),
|
| 185 |
)
|
| 186 |
preds1, proba1 = predict_approach1(model1, X_test_s)
|
|
|
|
| 196 |
|
| 197 |
progress.progress(33, text="Approach 1 done...")
|
| 198 |
|
| 199 |
+
# Approach 2
|
| 200 |
with st.spinner("π Training Approach 2 β Regime-Conditioned CNN-LSTM..."):
|
| 201 |
try:
|
| 202 |
model2, hist2, hmm2, regime_cols2 = train_approach2(
|
| 203 |
+
X_train_s, y_train_l, X_val_s, y_val_l,
|
| 204 |
+
X_flat_all=X_raw, feature_names=input_features,
|
| 205 |
+
lookback=lookback, train_size=train_size, val_size=val_size,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
n_classes=n_classes, epochs=int(epochs),
|
| 207 |
)
|
| 208 |
preds2, proba2 = predict_approach2(
|
|
|
|
| 221 |
|
| 222 |
progress.progress(66, text="Approach 2 done...")
|
| 223 |
|
| 224 |
+
# Approach 3
|
| 225 |
with st.spinner("π‘ Training Approach 3 β Multi-Scale CNN-LSTM..."):
|
| 226 |
try:
|
| 227 |
model3, hist3 = train_approach3(
|
| 228 |
+
X_train_s, y_train_l, X_val_s, y_val_l,
|
|
|
|
| 229 |
n_classes=n_classes, epochs=int(epochs),
|
| 230 |
)
|
| 231 |
preds3, proba3 = predict_approach3(model3, X_test_s)
|
|
|
|
| 251 |
st.stop()
|
| 252 |
|
| 253 |
next_date = get_next_signal_date()
|
|
|
|
| 254 |
st.divider()
|
| 255 |
|
|
|
|
| 256 |
show_signal_banner(winner_res["next_signal"], next_date, winner_name)
|
| 257 |
|
|
|
|
| 258 |
winner_proba = trained_info[winner_name]["proba"]
|
| 259 |
conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
|
| 260 |
show_conviction_panel(conviction)
|
| 261 |
|
| 262 |
st.divider()
|
|
|
|
|
|
|
| 263 |
st.subheader(f"π {winner_name} β Performance Metrics")
|
| 264 |
show_metrics_row(winner_res, tbill_rate)
|
| 265 |
|
| 266 |
st.divider()
|
|
|
|
|
|
|
| 267 |
st.subheader("π Approach Comparison (Winner = Highest Raw Annualised Return)")
|
| 268 |
comparison_df = build_comparison_table(results, winner_name)
|
| 269 |
show_comparison_table(comparison_df)
|
|
|
|
| 270 |
st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
|
| 271 |
|
| 272 |
st.divider()
|
|
|
|
|
|
|
| 273 |
st.subheader("π Out-of-Sample Equity Curves β All Approaches vs Benchmarks")
|
| 274 |
fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
|
| 275 |
st.plotly_chart(fig, use_container_width=True)
|
| 276 |
|
| 277 |
st.divider()
|
|
|
|
|
|
|
| 278 |
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 279 |
show_audit_trail(winner_res["audit_trail"])
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py
CHANGED
|
@@ -4,7 +4,7 @@ Loads master_data.parquet from HF Dataset.
|
|
| 4 |
Validates freshness against the last NYSE trading day.
|
| 5 |
No external pings β all data comes from HF Dataset only.
|
| 6 |
|
| 7 |
-
Actual dataset columns (from parquet inspection):
|
| 8 |
ETFs : AGG, GLD, SLV, SPY, TBT, TLT, VNQ
|
| 9 |
Macro : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD
|
| 10 |
"""
|
|
@@ -15,7 +15,6 @@ import streamlit as st
|
|
| 15 |
from huggingface_hub import hf_hub_download
|
| 16 |
from datetime import datetime, timedelta
|
| 17 |
import pytz
|
| 18 |
-
import os
|
| 19 |
|
| 20 |
try:
|
| 21 |
import pandas_market_calendars as mcal
|
|
@@ -26,33 +25,27 @@ except ImportError:
|
|
| 26 |
DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
|
| 27 |
PARQUET_FILE = "master_data.parquet"
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
MACRO_COLS = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"]
|
| 34 |
|
| 35 |
|
| 36 |
-
# ββ NYSE calendar
|
| 37 |
|
| 38 |
def get_last_nyse_trading_day(as_of=None):
|
| 39 |
-
"""Return the most recent NYSE trading day on or before as_of (default: today EST)."""
|
| 40 |
est = pytz.timezone("US/Eastern")
|
| 41 |
if as_of is None:
|
| 42 |
as_of = datetime.now(est)
|
| 43 |
today = as_of.date()
|
| 44 |
-
|
| 45 |
if NYSE_CAL_AVAILABLE:
|
| 46 |
try:
|
| 47 |
nyse = mcal.get_calendar("NYSE")
|
| 48 |
-
|
| 49 |
-
sched = nyse.schedule(start_date=start, end_date=today)
|
| 50 |
if len(sched) > 0:
|
| 51 |
return sched.index[-1].date()
|
| 52 |
except Exception:
|
| 53 |
pass
|
| 54 |
-
|
| 55 |
-
# Fallback: skip weekends
|
| 56 |
candidate = today
|
| 57 |
while candidate.weekday() >= 5:
|
| 58 |
candidate -= timedelta(days=1)
|
|
@@ -63,10 +56,6 @@ def get_last_nyse_trading_day(as_of=None):
|
|
| 63 |
|
| 64 |
@st.cache_data(ttl=3600, show_spinner=False)
|
| 65 |
def load_dataset(hf_token: str) -> pd.DataFrame:
|
| 66 |
-
"""
|
| 67 |
-
Download master_data.parquet from HF Dataset and return as DataFrame.
|
| 68 |
-
Cached for 1 hour. Index is parsed as DatetimeIndex.
|
| 69 |
-
"""
|
| 70 |
try:
|
| 71 |
path = hf_hub_download(
|
| 72 |
repo_id=DATASET_REPO,
|
|
@@ -76,7 +65,6 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
|
|
| 76 |
)
|
| 77 |
df = pd.read_parquet(path)
|
| 78 |
|
| 79 |
-
# Ensure DatetimeIndex
|
| 80 |
if not isinstance(df.index, pd.DatetimeIndex):
|
| 81 |
for col in ["Date", "date", "DATE"]:
|
| 82 |
if col in df.columns:
|
|
@@ -84,66 +72,66 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
|
|
| 84 |
break
|
| 85 |
df.index = pd.to_datetime(df.index)
|
| 86 |
|
| 87 |
-
|
| 88 |
-
return df
|
| 89 |
|
| 90 |
except Exception as e:
|
| 91 |
-
st.error(f"β Failed to load dataset
|
| 92 |
return pd.DataFrame()
|
| 93 |
|
| 94 |
|
| 95 |
# ββ Freshness check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 96 |
|
| 97 |
def check_data_freshness(df: pd.DataFrame) -> dict:
|
| 98 |
-
"""
|
| 99 |
-
Check whether the dataset contains data for the last NYSE trading day.
|
| 100 |
-
"""
|
| 101 |
if df.empty:
|
| 102 |
-
return {
|
| 103 |
-
"fresh": False,
|
| 104 |
-
"last_date_in_data": None,
|
| 105 |
-
"expected_date": None,
|
| 106 |
-
"message": "Dataset is empty.",
|
| 107 |
-
}
|
| 108 |
-
|
| 109 |
-
last_date_in_data = df.index[-1].date()
|
| 110 |
-
expected_date = get_last_nyse_trading_day()
|
| 111 |
-
fresh = last_date_in_data >= expected_date
|
| 112 |
-
|
| 113 |
-
if fresh:
|
| 114 |
-
message = f"β
Dataset is up to date through **{last_date_in_data}**."
|
| 115 |
-
else:
|
| 116 |
-
message = (
|
| 117 |
-
f"β οΈ **{expected_date}** data not yet updated in dataset. "
|
| 118 |
-
f"Latest available: **{last_date_in_data}**. "
|
| 119 |
-
f"Please check back later β the dataset updates daily after market close."
|
| 120 |
-
)
|
| 121 |
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
|
| 130 |
# ββ Feature / target extraction βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 131 |
|
| 132 |
def get_features_and_targets(df: pd.DataFrame):
|
| 133 |
"""
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
The dataset stores raw price or return values directly under ticker names.
|
| 137 |
-
We compute daily log returns for target ETFs if they are not already returns.
|
| 138 |
|
| 139 |
Returns:
|
| 140 |
-
input_features : list
|
| 141 |
-
target_etfs : list
|
| 142 |
-
tbill_rate :
|
| 143 |
-
df : DataFrame
|
|
|
|
| 144 |
"""
|
| 145 |
-
|
| 146 |
-
# ββ Confirm target ETFs exist βββββββββββββββββββββββββββββββββββββββββββββ
|
| 147 |
missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
|
| 148 |
if missing:
|
| 149 |
raise ValueError(
|
|
@@ -151,71 +139,75 @@ def get_features_and_targets(df: pd.DataFrame):
|
|
| 151 |
f"Found in dataset: {list(df.columns)}"
|
| 152 |
)
|
| 153 |
|
| 154 |
-
|
| 155 |
-
# If values look like prices (>5), compute pct returns.
|
| 156 |
-
# If they already look like small returns (<1 in abs), use as-is.
|
| 157 |
-
target_etfs = []
|
| 158 |
-
for col in TARGET_ETF_COLS:
|
| 159 |
-
ret_col = f"{col}_Ret"
|
| 160 |
-
if ret_col not in df.columns:
|
| 161 |
-
sample = df[col].dropna()
|
| 162 |
-
if len(sample) > 0 and abs(sample.median()) > 1:
|
| 163 |
-
# Looks like price β compute pct change
|
| 164 |
-
df[ret_col] = df[col].pct_change()
|
| 165 |
-
else:
|
| 166 |
-
# Already returns
|
| 167 |
-
df[ret_col] = df[col]
|
| 168 |
-
target_etfs.append(ret_col)
|
| 169 |
-
|
| 170 |
-
# Same for benchmarks
|
| 171 |
-
for col in BENCHMARK_COLS:
|
| 172 |
-
ret_col = f"{col}_Ret"
|
| 173 |
-
if ret_col not in df.columns and col in df.columns:
|
| 174 |
-
sample = df[col].dropna()
|
| 175 |
-
if len(sample) > 0 and abs(sample.median()) > 1:
|
| 176 |
-
df[ret_col] = df[col].pct_change()
|
| 177 |
-
else:
|
| 178 |
-
df[ret_col] = df[col]
|
| 179 |
|
| 180 |
-
#
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
# ββ Input features ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 184 |
-
# Use macro columns directly; exclude ETF price/return cols and benchmarks
|
| 185 |
exclude = set(
|
| 186 |
-
TARGET_ETF_COLS + BENCHMARK_COLS +
|
| 187 |
-
|
| 188 |
-
[f"{c}_Ret" for c in BENCHMARK_COLS] +
|
| 189 |
-
[TBILL_COL]
|
| 190 |
)
|
| 191 |
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
c for c in df.columns
|
| 194 |
if c not in exclude
|
| 195 |
-
and c in
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
])
|
| 200 |
]
|
|
|
|
| 201 |
|
| 202 |
-
# Fallback:
|
| 203 |
if not input_features:
|
| 204 |
input_features = [
|
| 205 |
c for c in df.columns
|
| 206 |
-
if c not in exclude
|
| 207 |
-
and pd.api.types.is_numeric_dtype(df[c])
|
| 208 |
]
|
| 209 |
|
| 210 |
# ββ T-bill rate βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 211 |
-
tbill_rate = 0.045
|
| 212 |
if TBILL_COL in df.columns:
|
| 213 |
raw = df[TBILL_COL].dropna()
|
| 214 |
if len(raw) > 0:
|
| 215 |
-
|
| 216 |
-
tbill_rate =
|
| 217 |
|
| 218 |
-
return input_features, target_etfs, tbill_rate, df
|
| 219 |
|
| 220 |
|
| 221 |
# ββ Dataset summary βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -228,8 +220,9 @@ def dataset_summary(df: pd.DataFrame) -> dict:
|
|
| 228 |
"columns": len(df.columns),
|
| 229 |
"start_date": df.index[0].strftime("%Y-%m-%d"),
|
| 230 |
"end_date": df.index[-1].strftime("%Y-%m-%d"),
|
| 231 |
-
"etfs_found": [c for c in TARGET_ETF_COLS
|
| 232 |
-
"benchmarks": [c for c in BENCHMARK_COLS
|
| 233 |
-
"macro_found": [c for c in MACRO_COLS
|
| 234 |
"tbill_found": TBILL_COL in df.columns,
|
|
|
|
| 235 |
}
|
|
|
|
| 4 |
Validates freshness against the last NYSE trading day.
|
| 5 |
No external pings β all data comes from HF Dataset only.
|
| 6 |
|
| 7 |
+
Actual dataset columns (confirmed from parquet inspection):
|
| 8 |
ETFs : AGG, GLD, SLV, SPY, TBT, TLT, VNQ
|
| 9 |
Macro : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD
|
| 10 |
"""
|
|
|
|
| 15 |
from huggingface_hub import hf_hub_download
|
| 16 |
from datetime import datetime, timedelta
|
| 17 |
import pytz
|
|
|
|
| 18 |
|
| 19 |
try:
|
| 20 |
import pandas_market_calendars as mcal
|
|
|
|
| 25 |
DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
|
| 26 |
PARQUET_FILE = "master_data.parquet"
|
| 27 |
|
| 28 |
+
TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"]
|
| 29 |
+
BENCHMARK_COLS = ["SPY", "AGG"]
|
| 30 |
+
TBILL_COL = "TBILL_3M"
|
| 31 |
+
MACRO_COLS = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"]
|
|
|
|
| 32 |
|
| 33 |
|
| 34 |
+
# ββ NYSE calendar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
|
| 36 |
def get_last_nyse_trading_day(as_of=None):
|
|
|
|
| 37 |
est = pytz.timezone("US/Eastern")
|
| 38 |
if as_of is None:
|
| 39 |
as_of = datetime.now(est)
|
| 40 |
today = as_of.date()
|
|
|
|
| 41 |
if NYSE_CAL_AVAILABLE:
|
| 42 |
try:
|
| 43 |
nyse = mcal.get_calendar("NYSE")
|
| 44 |
+
sched = nyse.schedule(start_date=today - timedelta(days=10), end_date=today)
|
|
|
|
| 45 |
if len(sched) > 0:
|
| 46 |
return sched.index[-1].date()
|
| 47 |
except Exception:
|
| 48 |
pass
|
|
|
|
|
|
|
| 49 |
candidate = today
|
| 50 |
while candidate.weekday() >= 5:
|
| 51 |
candidate -= timedelta(days=1)
|
|
|
|
| 56 |
|
| 57 |
@st.cache_data(ttl=3600, show_spinner=False)
|
| 58 |
def load_dataset(hf_token: str) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
try:
|
| 60 |
path = hf_hub_download(
|
| 61 |
repo_id=DATASET_REPO,
|
|
|
|
| 65 |
)
|
| 66 |
df = pd.read_parquet(path)
|
| 67 |
|
|
|
|
| 68 |
if not isinstance(df.index, pd.DatetimeIndex):
|
| 69 |
for col in ["Date", "date", "DATE"]:
|
| 70 |
if col in df.columns:
|
|
|
|
| 72 |
break
|
| 73 |
df.index = pd.to_datetime(df.index)
|
| 74 |
|
| 75 |
+
return df.sort_index()
|
|
|
|
| 76 |
|
| 77 |
except Exception as e:
|
| 78 |
+
st.error(f"β Failed to load dataset: {e}")
|
| 79 |
return pd.DataFrame()
|
| 80 |
|
| 81 |
|
| 82 |
# ββ Freshness check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 83 |
|
| 84 |
def check_data_freshness(df: pd.DataFrame) -> dict:
|
|
|
|
|
|
|
|
|
|
| 85 |
if df.empty:
|
| 86 |
+
return {"fresh": False, "message": "Dataset is empty."}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
last = df.index[-1].date()
|
| 89 |
+
expect = get_last_nyse_trading_day()
|
| 90 |
+
fresh = last >= expect
|
| 91 |
+
|
| 92 |
+
msg = (
|
| 93 |
+
f"β
Dataset up to date through **{last}**." if fresh else
|
| 94 |
+
f"β οΈ **{expect}** data not yet updated. Latest: **{last}**. "
|
| 95 |
+
f"Dataset updates daily after market close."
|
| 96 |
+
)
|
| 97 |
+
return {"fresh": fresh, "last_date_in_data": last,
|
| 98 |
+
"expected_date": expect, "message": msg}
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# ββ Detect whether a column holds prices or returns βββββββββββββββββββββββββββ
|
| 102 |
+
|
| 103 |
+
def _is_price_series(series: pd.Series) -> bool:
|
| 104 |
+
"""
|
| 105 |
+
Heuristic: a price series has abs(median) > 2 and std/mean < 0.5.
|
| 106 |
+
A return series has abs(median) < 0.1 and many values near zero.
|
| 107 |
+
"""
|
| 108 |
+
clean = series.dropna()
|
| 109 |
+
if len(clean) == 0:
|
| 110 |
+
return False
|
| 111 |
+
med = abs(clean.median())
|
| 112 |
+
# Strong price signal: median > 2 (e.g. TLT ~ 90, TBT ~ 20)
|
| 113 |
+
if med > 2:
|
| 114 |
+
return True
|
| 115 |
+
# Strong return signal: most values between -0.2 and 0.2
|
| 116 |
+
if (clean.abs() < 0.2).mean() > 0.9:
|
| 117 |
+
return False
|
| 118 |
+
return med > 0.5
|
| 119 |
|
| 120 |
|
| 121 |
# ββ Feature / target extraction βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 122 |
|
| 123 |
def get_features_and_targets(df: pd.DataFrame):
|
| 124 |
"""
|
| 125 |
+
Build return columns for target ETFs and benchmarks.
|
| 126 |
+
Auto-detects whether source columns are prices or already returns.
|
|
|
|
|
|
|
| 127 |
|
| 128 |
Returns:
|
| 129 |
+
input_features : list[str]
|
| 130 |
+
target_etfs : list[str] e.g. ["TLT_Ret", ...]
|
| 131 |
+
tbill_rate : float
|
| 132 |
+
df : DataFrame with _Ret columns added
|
| 133 |
+
col_info : dict of diagnostics for sidebar display
|
| 134 |
"""
|
|
|
|
|
|
|
| 135 |
missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
|
| 136 |
if missing:
|
| 137 |
raise ValueError(
|
|
|
|
| 139 |
f"Found in dataset: {list(df.columns)}"
|
| 140 |
)
|
| 141 |
|
| 142 |
+
col_info = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
| 144 |
+
# ββ Build _Ret columns ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 145 |
+
def make_ret(col):
|
| 146 |
+
ret_col = f"{col}_Ret"
|
| 147 |
+
if ret_col in df.columns:
|
| 148 |
+
col_info[col] = "pre-computed _Ret"
|
| 149 |
+
return ret_col
|
| 150 |
+
if _is_price_series(df[col]):
|
| 151 |
+
df[ret_col] = df[col].pct_change()
|
| 152 |
+
col_info[col] = f"priceβpct_change (median={df[col].median():.2f})"
|
| 153 |
+
else:
|
| 154 |
+
df[ret_col] = df[col]
|
| 155 |
+
col_info[col] = f"used as-is (median={df[col].median():.4f})"
|
| 156 |
+
return ret_col
|
| 157 |
+
|
| 158 |
+
target_etfs = [make_ret(c) for c in TARGET_ETF_COLS]
|
| 159 |
+
benchmark_rets = [make_ret(c) for c in BENCHMARK_COLS if c in df.columns]
|
| 160 |
+
|
| 161 |
+
# Drop NaN rows (first row from pct_change)
|
| 162 |
+
df = df.dropna(subset=target_etfs).copy()
|
| 163 |
+
|
| 164 |
+
# Sanity check: target returns should be small daily values
|
| 165 |
+
for ret_col in target_etfs:
|
| 166 |
+
med = df[ret_col].abs().median()
|
| 167 |
+
if med > 0.1:
|
| 168 |
+
st.warning(
|
| 169 |
+
f"β οΈ {ret_col} has median absolute value {med:.4f} β "
|
| 170 |
+
f"these may not be daily returns. Check dataset column '{ret_col.replace('_Ret','')}'. "
|
| 171 |
+
f"Sample values: {df[ret_col].tail(3).values}"
|
| 172 |
+
)
|
| 173 |
|
| 174 |
# ββ Input features ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 175 |
exclude = set(
|
| 176 |
+
TARGET_ETF_COLS + BENCHMARK_COLS + target_etfs + benchmark_rets +
|
| 177 |
+
[f"{c}_Ret" for c in BENCHMARK_COLS] + [TBILL_COL]
|
|
|
|
|
|
|
| 178 |
)
|
| 179 |
|
| 180 |
+
# First try known macro columns
|
| 181 |
+
input_features = [c for c in MACRO_COLS if c in df.columns and c not in exclude]
|
| 182 |
+
|
| 183 |
+
# Then add any engineered signal columns
|
| 184 |
+
extra = [
|
| 185 |
c for c in df.columns
|
| 186 |
if c not in exclude
|
| 187 |
+
and c not in input_features
|
| 188 |
+
and any(k in c for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_",
|
| 189 |
+
"Rates_", "VIX_", "Spread", "DXY", "T10Y",
|
| 190 |
+
"TBILL", "SOFR", "MOVE"])
|
| 191 |
+
and pd.api.types.is_numeric_dtype(df[c])
|
| 192 |
]
|
| 193 |
+
input_features += extra
|
| 194 |
|
| 195 |
+
# Fallback: all numeric non-excluded columns
|
| 196 |
if not input_features:
|
| 197 |
input_features = [
|
| 198 |
c for c in df.columns
|
| 199 |
+
if c not in exclude and pd.api.types.is_numeric_dtype(df[c])
|
|
|
|
| 200 |
]
|
| 201 |
|
| 202 |
# ββ T-bill rate βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 203 |
+
tbill_rate = 0.045
|
| 204 |
if TBILL_COL in df.columns:
|
| 205 |
raw = df[TBILL_COL].dropna()
|
| 206 |
if len(raw) > 0:
|
| 207 |
+
v = float(raw.iloc[-1])
|
| 208 |
+
tbill_rate = v / 100 if v > 1 else v
|
| 209 |
|
| 210 |
+
return input_features, target_etfs, tbill_rate, df, col_info
|
| 211 |
|
| 212 |
|
| 213 |
# ββ Dataset summary βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 220 |
"columns": len(df.columns),
|
| 221 |
"start_date": df.index[0].strftime("%Y-%m-%d"),
|
| 222 |
"end_date": df.index[-1].strftime("%Y-%m-%d"),
|
| 223 |
+
"etfs_found": [c for c in TARGET_ETF_COLS if c in df.columns],
|
| 224 |
+
"benchmarks": [c for c in BENCHMARK_COLS if c in df.columns],
|
| 225 |
+
"macro_found": [c for c in MACRO_COLS if c in df.columns],
|
| 226 |
"tbill_found": TBILL_COL in df.columns,
|
| 227 |
+
"all_cols": list(df.columns),
|
| 228 |
}
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
CHANGED
|
@@ -10,9 +10,11 @@ import pandas as pd
|
|
| 10 |
import numpy as np
|
| 11 |
|
| 12 |
# ββ Module imports ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
-
from data.loader import load_dataset, check_data_freshness,
|
|
|
|
| 14 |
from utils.calendar import get_est_time, is_sync_window, get_next_signal_date
|
| 15 |
-
from models.base import build_sequences, train_val_test_split,
|
|
|
|
| 16 |
from models.approach1_wavelet import train_approach1, predict_approach1
|
| 17 |
from models.approach2_regime import train_approach2, predict_approach2
|
| 18 |
from models.approach3_multiscale import train_approach3, predict_approach3
|
|
@@ -47,10 +49,10 @@ with st.sidebar:
|
|
| 47 |
|
| 48 |
st.divider()
|
| 49 |
|
| 50 |
-
start_yr
|
| 51 |
-
fee_bps
|
| 52 |
-
lookback
|
| 53 |
-
epochs
|
| 54 |
|
| 55 |
st.divider()
|
| 56 |
|
|
@@ -58,8 +60,10 @@ with st.sidebar:
|
|
| 58 |
split_map = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}
|
| 59 |
train_pct, val_pct = split_map[split_option]
|
| 60 |
|
| 61 |
-
include_cash = st.checkbox(
|
| 62 |
-
|
|
|
|
|
|
|
| 63 |
|
| 64 |
st.divider()
|
| 65 |
|
|
@@ -70,90 +74,102 @@ st.title("π§ P2-ETF-CNN-LSTM")
|
|
| 70 |
st.caption("Approach 1: Wavelet Β· Approach 2: Regime-Conditioned Β· Approach 3: Multi-Scale Parallel")
|
| 71 |
st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
|
| 72 |
|
| 73 |
-
# ββ
|
| 74 |
if not HF_TOKEN:
|
| 75 |
-
st.error("β HF_TOKEN secret not found.
|
| 76 |
st.stop()
|
| 77 |
|
|
|
|
| 78 |
with st.spinner("π‘ Loading dataset from HuggingFace..."):
|
| 79 |
-
|
| 80 |
|
| 81 |
-
if
|
| 82 |
st.stop()
|
| 83 |
|
| 84 |
# ββ Freshness check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 85 |
-
freshness = check_data_freshness(
|
| 86 |
show_freshness_status(freshness)
|
| 87 |
|
| 88 |
# ββ Dataset summary in sidebar ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 89 |
with st.sidebar:
|
| 90 |
st.divider()
|
| 91 |
st.subheader("π¦ Dataset Info")
|
| 92 |
-
summary = dataset_summary(
|
| 93 |
if summary:
|
| 94 |
st.write(f"**Rows:** {summary['rows']:,}")
|
| 95 |
st.write(f"**Range:** {summary['start_date']} β {summary['end_date']}")
|
| 96 |
-
st.write(f"**ETFs:** {', '.join(
|
| 97 |
-
st.write(f"**Benchmarks:** {', '.join(
|
|
|
|
| 98 |
st.write(f"**T-bill col:** {'β
' if summary['tbill_found'] else 'β'}")
|
| 99 |
|
| 100 |
-
# ββ
|
| 101 |
if not run_button:
|
| 102 |
-
st.info("π Configure parameters in the sidebar and click **π Run All 3 Approaches**
|
| 103 |
st.stop()
|
| 104 |
|
| 105 |
# ββ Filter by start year ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 106 |
-
df =
|
| 107 |
-
st.write(
|
| 108 |
-
|
|
|
|
|
|
|
| 109 |
|
| 110 |
-
# ββ
|
| 111 |
try:
|
| 112 |
-
input_features, target_etfs, tbill_rate = get_features_and_targets(df)
|
| 113 |
except ValueError as e:
|
| 114 |
st.error(str(e))
|
| 115 |
st.stop()
|
| 116 |
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
-
# ββ
|
| 121 |
-
X_raw
|
| 122 |
-
y_raw
|
| 123 |
-
n_etfs = len(target_etfs)
|
| 124 |
-
n_classes = n_etfs + (1 if include_cash else 0) # +1 for CASH
|
| 125 |
|
| 126 |
-
# Fill NaNs with column means
|
| 127 |
col_means = np.nanmean(X_raw, axis=0)
|
| 128 |
for j in range(X_raw.shape[1]):
|
| 129 |
mask = np.isnan(X_raw[:, j])
|
| 130 |
-
|
|
|
|
| 131 |
|
| 132 |
X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
|
| 133 |
y_labels = returns_to_labels(y_seq, include_cash=include_cash)
|
| 134 |
|
| 135 |
-
X_train, y_train_r, X_val, y_val_r,
|
| 136 |
-
|
|
|
|
|
|
|
| 137 |
|
| 138 |
X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
|
| 139 |
|
| 140 |
train_size = len(X_train)
|
| 141 |
val_size = len(X_val)
|
| 142 |
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
test_slice = slice(test_start, test_start + len(X_test))
|
| 147 |
|
| 148 |
-
st.success(
|
|
|
|
|
|
|
| 149 |
|
| 150 |
# ββ Train all three approaches ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 151 |
results = {}
|
| 152 |
-
trained_info = {}
|
| 153 |
|
| 154 |
progress = st.progress(0, text="Starting training...")
|
| 155 |
|
| 156 |
-
# ββ Approach 1
|
| 157 |
with st.spinner("π Training Approach 1 β Wavelet CNN-LSTM..."):
|
| 158 |
try:
|
| 159 |
model1, hist1, _ = train_approach1(
|
|
@@ -163,7 +179,8 @@ with st.spinner("π Training Approach 1 β Wavelet CNN-LSTM..."):
|
|
| 163 |
)
|
| 164 |
preds1, proba1 = predict_approach1(model1, X_test_s)
|
| 165 |
results["Approach 1"] = execute_strategy(
|
| 166 |
-
preds1, proba1, y_test_r, test_dates,
|
|
|
|
| 167 |
)
|
| 168 |
trained_info["Approach 1"] = {"proba": proba1}
|
| 169 |
st.success("β
Approach 1 complete")
|
|
@@ -173,7 +190,7 @@ with st.spinner("π Training Approach 1 β Wavelet CNN-LSTM..."):
|
|
| 173 |
|
| 174 |
progress.progress(33, text="Approach 1 done...")
|
| 175 |
|
| 176 |
-
# ββ Approach 2
|
| 177 |
with st.spinner("π Training Approach 2 β Regime-Conditioned CNN-LSTM..."):
|
| 178 |
try:
|
| 179 |
model2, hist2, hmm2, regime_cols2 = train_approach2(
|
|
@@ -191,7 +208,8 @@ with st.spinner("π Training Approach 2 β Regime-Conditioned CNN-LSTM..."):
|
|
| 191 |
lookback, train_size, val_size,
|
| 192 |
)
|
| 193 |
results["Approach 2"] = execute_strategy(
|
| 194 |
-
preds2, proba2, y_test_r, test_dates,
|
|
|
|
| 195 |
)
|
| 196 |
trained_info["Approach 2"] = {"proba": proba2}
|
| 197 |
st.success("β
Approach 2 complete")
|
|
@@ -201,7 +219,7 @@ with st.spinner("π Training Approach 2 β Regime-Conditioned CNN-LSTM..."):
|
|
| 201 |
|
| 202 |
progress.progress(66, text="Approach 2 done...")
|
| 203 |
|
| 204 |
-
# ββ Approach 3
|
| 205 |
with st.spinner("π‘ Training Approach 3 β Multi-Scale CNN-LSTM..."):
|
| 206 |
try:
|
| 207 |
model3, hist3 = train_approach3(
|
|
@@ -211,7 +229,8 @@ with st.spinner("π‘ Training Approach 3 β Multi-Scale CNN-LSTM..."):
|
|
| 211 |
)
|
| 212 |
preds3, proba3 = predict_approach3(model3, X_test_s)
|
| 213 |
results["Approach 3"] = execute_strategy(
|
| 214 |
-
preds3, proba3, y_test_r, test_dates,
|
|
|
|
| 215 |
)
|
| 216 |
trained_info["Approach 3"] = {"proba": proba3}
|
| 217 |
st.success("β
Approach 3 complete")
|
|
@@ -227,15 +246,14 @@ winner_name = select_winner(results)
|
|
| 227 |
winner_res = results.get(winner_name)
|
| 228 |
|
| 229 |
if winner_res is None:
|
| 230 |
-
st.error("β All approaches failed. Please check
|
| 231 |
st.stop()
|
| 232 |
|
| 233 |
-
# ββ Next trading date βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 234 |
next_date = get_next_signal_date()
|
| 235 |
|
| 236 |
st.divider()
|
| 237 |
|
| 238 |
-
# ββ Signal banner
|
| 239 |
show_signal_banner(winner_res["next_signal"], next_date, winner_name)
|
| 240 |
|
| 241 |
# ββ Conviction panel ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -256,7 +274,6 @@ st.subheader("π Approach Comparison (Winner = Highest Raw Annualised Return)"
|
|
| 256 |
comparison_df = build_comparison_table(results, winner_name)
|
| 257 |
show_comparison_table(comparison_df)
|
| 258 |
|
| 259 |
-
# ββ Comparison bar chart ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 260 |
st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
|
| 261 |
|
| 262 |
st.divider()
|
|
@@ -268,6 +285,6 @@ st.plotly_chart(fig, use_container_width=True)
|
|
| 268 |
|
| 269 |
st.divider()
|
| 270 |
|
| 271 |
-
# ββ Audit trail
|
| 272 |
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 273 |
show_audit_trail(winner_res["audit_trail"])
|
|
|
|
| 10 |
import numpy as np
|
| 11 |
|
| 12 |
# ββ Module imports ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
+
from data.loader import (load_dataset, check_data_freshness,
|
| 14 |
+
get_features_and_targets, dataset_summary)
|
| 15 |
from utils.calendar import get_est_time, is_sync_window, get_next_signal_date
|
| 16 |
+
from models.base import (build_sequences, train_val_test_split,
|
| 17 |
+
scale_features, returns_to_labels)
|
| 18 |
from models.approach1_wavelet import train_approach1, predict_approach1
|
| 19 |
from models.approach2_regime import train_approach2, predict_approach2
|
| 20 |
from models.approach3_multiscale import train_approach3, predict_approach3
|
|
|
|
| 49 |
|
| 50 |
st.divider()
|
| 51 |
|
| 52 |
+
start_yr = st.slider("π
Start Year", 2010, 2024, 2016)
|
| 53 |
+
fee_bps = st.slider("π° Fee (bps)", 0, 50, 10)
|
| 54 |
+
lookback = st.slider("π Lookback (days)", 20, 60, 30, step=5)
|
| 55 |
+
epochs = st.number_input("π Max Epochs", 20, 300, 100, step=10)
|
| 56 |
|
| 57 |
st.divider()
|
| 58 |
|
|
|
|
| 60 |
split_map = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}
|
| 61 |
train_pct, val_pct = split_map[split_option]
|
| 62 |
|
| 63 |
+
include_cash = st.checkbox(
|
| 64 |
+
"π΅ Include CASH class", value=True,
|
| 65 |
+
help="Model can select CASH (earns T-bill rate) instead of any ETF",
|
| 66 |
+
)
|
| 67 |
|
| 68 |
st.divider()
|
| 69 |
|
|
|
|
| 74 |
st.caption("Approach 1: Wavelet Β· Approach 2: Regime-Conditioned Β· Approach 3: Multi-Scale Parallel")
|
| 75 |
st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
|
| 76 |
|
| 77 |
+
# ββ Token check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 78 |
if not HF_TOKEN:
|
| 79 |
+
st.error("β HF_TOKEN secret not found. Add it to HF Space / GitHub secrets.")
|
| 80 |
st.stop()
|
| 81 |
|
| 82 |
+
# ββ Load dataset ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 83 |
with st.spinner("π‘ Loading dataset from HuggingFace..."):
|
| 84 |
+
df_raw = load_dataset(HF_TOKEN)
|
| 85 |
|
| 86 |
+
if df_raw.empty:
|
| 87 |
st.stop()
|
| 88 |
|
| 89 |
# ββ Freshness check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 90 |
+
freshness = check_data_freshness(df_raw)
|
| 91 |
show_freshness_status(freshness)
|
| 92 |
|
| 93 |
# ββ Dataset summary in sidebar ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 94 |
with st.sidebar:
|
| 95 |
st.divider()
|
| 96 |
st.subheader("π¦ Dataset Info")
|
| 97 |
+
summary = dataset_summary(df_raw)
|
| 98 |
if summary:
|
| 99 |
st.write(f"**Rows:** {summary['rows']:,}")
|
| 100 |
st.write(f"**Range:** {summary['start_date']} β {summary['end_date']}")
|
| 101 |
+
st.write(f"**ETFs:** {', '.join(summary['etfs_found'])}")
|
| 102 |
+
st.write(f"**Benchmarks:** {', '.join(summary['benchmarks'])}")
|
| 103 |
+
st.write(f"**Macro:** {', '.join(summary['macro_found'])}")
|
| 104 |
st.write(f"**T-bill col:** {'β
' if summary['tbill_found'] else 'β'}")
|
| 105 |
|
| 106 |
+
# ββ Wait for run button βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 107 |
if not run_button:
|
| 108 |
+
st.info("π Configure parameters in the sidebar and click **π Run All 3 Approaches**.")
|
| 109 |
st.stop()
|
| 110 |
|
| 111 |
# ββ Filter by start year ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 112 |
+
df = df_raw[df_raw.index.year >= start_yr].copy()
|
| 113 |
+
st.write(
|
| 114 |
+
f"π
**Data:** {df.index[0].strftime('%Y-%m-%d')} β {df.index[-1].strftime('%Y-%m-%d')} "
|
| 115 |
+
f"({df.index[-1].year - df.index[0].year + 1} years)"
|
| 116 |
+
)
|
| 117 |
|
| 118 |
+
# ββ Features & targets ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 119 |
try:
|
| 120 |
+
input_features, target_etfs, tbill_rate, df = get_features_and_targets(df)
|
| 121 |
except ValueError as e:
|
| 122 |
st.error(str(e))
|
| 123 |
st.stop()
|
| 124 |
|
| 125 |
+
n_etfs = len(target_etfs)
|
| 126 |
+
n_classes = n_etfs + (1 if include_cash else 0)
|
| 127 |
+
|
| 128 |
+
st.info(
|
| 129 |
+
f"π― **Targets:** {', '.join([t.replace('_Ret','') for t in target_etfs])} Β· "
|
| 130 |
+
f"**Features:** {len(input_features)} signals Β· "
|
| 131 |
+
f"**T-bill:** {tbill_rate*100:.2f}%"
|
| 132 |
+
)
|
| 133 |
|
| 134 |
+
# ββ Build sequences βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 135 |
+
X_raw = df[input_features].values.astype(np.float32)
|
| 136 |
+
y_raw = df[target_etfs].values.astype(np.float32)
|
|
|
|
|
|
|
| 137 |
|
| 138 |
+
# Fill any remaining NaNs with column means
|
| 139 |
col_means = np.nanmean(X_raw, axis=0)
|
| 140 |
for j in range(X_raw.shape[1]):
|
| 141 |
mask = np.isnan(X_raw[:, j])
|
| 142 |
+
if mask.any():
|
| 143 |
+
X_raw[mask, j] = col_means[j]
|
| 144 |
|
| 145 |
X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
|
| 146 |
y_labels = returns_to_labels(y_seq, include_cash=include_cash)
|
| 147 |
|
| 148 |
+
(X_train, y_train_r, X_val, y_val_r,
|
| 149 |
+
X_test, y_test_r) = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
|
| 150 |
+
(_, y_train_l, _, y_val_l,
|
| 151 |
+
_, y_test_l) = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
|
| 152 |
|
| 153 |
X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
|
| 154 |
|
| 155 |
train_size = len(X_train)
|
| 156 |
val_size = len(X_val)
|
| 157 |
|
| 158 |
+
test_start = lookback + train_size + val_size
|
| 159 |
+
test_dates = df.index[test_start: test_start + len(X_test)]
|
| 160 |
+
test_slice = slice(test_start, test_start + len(X_test))
|
|
|
|
| 161 |
|
| 162 |
+
st.success(
|
| 163 |
+
f"β
Sequences β Train: {train_size:,} Β· Val: {val_size:,} Β· Test: {len(X_test):,}"
|
| 164 |
+
)
|
| 165 |
|
| 166 |
# ββ Train all three approaches ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 167 |
results = {}
|
| 168 |
+
trained_info = {}
|
| 169 |
|
| 170 |
progress = st.progress(0, text="Starting training...")
|
| 171 |
|
| 172 |
+
# ββ Approach 1 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 173 |
with st.spinner("π Training Approach 1 β Wavelet CNN-LSTM..."):
|
| 174 |
try:
|
| 175 |
model1, hist1, _ = train_approach1(
|
|
|
|
| 179 |
)
|
| 180 |
preds1, proba1 = predict_approach1(model1, X_test_s)
|
| 181 |
results["Approach 1"] = execute_strategy(
|
| 182 |
+
preds1, proba1, y_test_r, test_dates,
|
| 183 |
+
target_etfs, fee_bps, tbill_rate, include_cash,
|
| 184 |
)
|
| 185 |
trained_info["Approach 1"] = {"proba": proba1}
|
| 186 |
st.success("β
Approach 1 complete")
|
|
|
|
| 190 |
|
| 191 |
progress.progress(33, text="Approach 1 done...")
|
| 192 |
|
| 193 |
+
# ββ Approach 2 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 194 |
with st.spinner("π Training Approach 2 β Regime-Conditioned CNN-LSTM..."):
|
| 195 |
try:
|
| 196 |
model2, hist2, hmm2, regime_cols2 = train_approach2(
|
|
|
|
| 208 |
lookback, train_size, val_size,
|
| 209 |
)
|
| 210 |
results["Approach 2"] = execute_strategy(
|
| 211 |
+
preds2, proba2, y_test_r, test_dates,
|
| 212 |
+
target_etfs, fee_bps, tbill_rate, include_cash,
|
| 213 |
)
|
| 214 |
trained_info["Approach 2"] = {"proba": proba2}
|
| 215 |
st.success("β
Approach 2 complete")
|
|
|
|
| 219 |
|
| 220 |
progress.progress(66, text="Approach 2 done...")
|
| 221 |
|
| 222 |
+
# ββ Approach 3 ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 223 |
with st.spinner("π‘ Training Approach 3 β Multi-Scale CNN-LSTM..."):
|
| 224 |
try:
|
| 225 |
model3, hist3 = train_approach3(
|
|
|
|
| 229 |
)
|
| 230 |
preds3, proba3 = predict_approach3(model3, X_test_s)
|
| 231 |
results["Approach 3"] = execute_strategy(
|
| 232 |
+
preds3, proba3, y_test_r, test_dates,
|
| 233 |
+
target_etfs, fee_bps, tbill_rate, include_cash,
|
| 234 |
)
|
| 235 |
trained_info["Approach 3"] = {"proba": proba3}
|
| 236 |
st.success("β
Approach 3 complete")
|
|
|
|
| 246 |
winner_res = results.get(winner_name)
|
| 247 |
|
| 248 |
if winner_res is None:
|
| 249 |
+
st.error("β All approaches failed. Please check data and configuration.")
|
| 250 |
st.stop()
|
| 251 |
|
|
|
|
| 252 |
next_date = get_next_signal_date()
|
| 253 |
|
| 254 |
st.divider()
|
| 255 |
|
| 256 |
+
# ββ Signal banner βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 257 |
show_signal_banner(winner_res["next_signal"], next_date, winner_name)
|
| 258 |
|
| 259 |
# ββ Conviction panel ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 274 |
comparison_df = build_comparison_table(results, winner_name)
|
| 275 |
show_comparison_table(comparison_df)
|
| 276 |
|
|
|
|
| 277 |
st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
|
| 278 |
|
| 279 |
st.divider()
|
|
|
|
| 285 |
|
| 286 |
st.divider()
|
| 287 |
|
| 288 |
+
# ββ Audit trail βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 289 |
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 290 |
show_audit_trail(winner_res["audit_trail"])
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py
CHANGED
|
@@ -3,6 +3,10 @@ data/loader.py
|
|
| 3 |
Loads master_data.parquet from HF Dataset.
|
| 4 |
Validates freshness against the last NYSE trading day.
|
| 5 |
No external pings β all data comes from HF Dataset only.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import pandas as pd
|
|
@@ -22,31 +26,29 @@ except ImportError:
|
|
| 22 |
DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
|
| 23 |
PARQUET_FILE = "master_data.parquet"
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
-
BENCHMARK_COLS
|
| 28 |
-
TBILL_COL
|
| 29 |
-
|
| 30 |
|
| 31 |
|
| 32 |
# ββ NYSE calendar helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 33 |
|
| 34 |
-
def get_last_nyse_trading_day(as_of
|
| 35 |
-
"""Return the most recent NYSE trading day
|
| 36 |
est = pytz.timezone("US/Eastern")
|
| 37 |
if as_of is None:
|
| 38 |
as_of = datetime.now(est)
|
| 39 |
-
|
| 40 |
today = as_of.date()
|
| 41 |
|
| 42 |
if NYSE_CAL_AVAILABLE:
|
| 43 |
try:
|
| 44 |
-
nyse
|
| 45 |
-
# Look back up to 10 days to find last trading day
|
| 46 |
start = today - timedelta(days=10)
|
| 47 |
-
|
| 48 |
-
if len(
|
| 49 |
-
return
|
| 50 |
except Exception:
|
| 51 |
pass
|
| 52 |
|
|
@@ -57,18 +59,6 @@ def get_last_nyse_trading_day(as_of: datetime = None) -> datetime.date:
|
|
| 57 |
return candidate
|
| 58 |
|
| 59 |
|
| 60 |
-
def is_nyse_trading_day(date) -> bool:
|
| 61 |
-
"""Return True if date is a NYSE trading day."""
|
| 62 |
-
if NYSE_CAL_AVAILABLE:
|
| 63 |
-
try:
|
| 64 |
-
nyse = mcal.get_calendar("NYSE")
|
| 65 |
-
schedule = nyse.schedule(start_date=date, end_date=date)
|
| 66 |
-
return len(schedule) > 0
|
| 67 |
-
except Exception:
|
| 68 |
-
pass
|
| 69 |
-
return date.weekday() < 5
|
| 70 |
-
|
| 71 |
-
|
| 72 |
# ββ Data loading ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 73 |
|
| 74 |
@st.cache_data(ttl=3600, show_spinner=False)
|
|
@@ -88,10 +78,10 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
|
|
| 88 |
|
| 89 |
# Ensure DatetimeIndex
|
| 90 |
if not isinstance(df.index, pd.DatetimeIndex):
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
df.index = pd.to_datetime(df.index)
|
| 96 |
|
| 97 |
df = df.sort_index()
|
|
@@ -107,14 +97,6 @@ def load_dataset(hf_token: str) -> pd.DataFrame:
|
|
| 107 |
def check_data_freshness(df: pd.DataFrame) -> dict:
|
| 108 |
"""
|
| 109 |
Check whether the dataset contains data for the last NYSE trading day.
|
| 110 |
-
|
| 111 |
-
Returns a dict:
|
| 112 |
-
{
|
| 113 |
-
"fresh": bool,
|
| 114 |
-
"last_date_in_data": date,
|
| 115 |
-
"expected_date": date,
|
| 116 |
-
"message": str
|
| 117 |
-
}
|
| 118 |
"""
|
| 119 |
if df.empty:
|
| 120 |
return {
|
|
@@ -126,8 +108,7 @@ def check_data_freshness(df: pd.DataFrame) -> dict:
|
|
| 126 |
|
| 127 |
last_date_in_data = df.index[-1].date()
|
| 128 |
expected_date = get_last_nyse_trading_day()
|
| 129 |
-
|
| 130 |
-
fresh = last_date_in_data >= expected_date
|
| 131 |
|
| 132 |
if fresh:
|
| 133 |
message = f"β
Dataset is up to date through **{last_date_in_data}**."
|
|
@@ -150,66 +131,105 @@ def check_data_freshness(df: pd.DataFrame) -> dict:
|
|
| 150 |
|
| 151 |
def get_features_and_targets(df: pd.DataFrame):
|
| 152 |
"""
|
| 153 |
-
Extract input feature columns and target ETF return columns
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
Returns:
|
| 156 |
-
input_features : list of column names
|
| 157 |
-
target_etfs : list of ETF
|
| 158 |
-
tbill_rate : latest 3m T-bill rate as
|
|
|
|
| 159 |
"""
|
| 160 |
-
# Target ETF return columns
|
| 161 |
-
target_etfs = [c for c in REQUIRED_ETF_COLS if c in df.columns]
|
| 162 |
|
| 163 |
-
|
|
|
|
|
|
|
| 164 |
raise ValueError(
|
| 165 |
-
f"
|
| 166 |
f"Found in dataset: {list(df.columns)}"
|
| 167 |
)
|
| 168 |
|
| 169 |
-
#
|
| 170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
input_features = [
|
| 172 |
c for c in df.columns
|
| 173 |
if c not in exclude
|
| 174 |
-
and (
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
or "Credit_" in c
|
| 180 |
-
or "Rates_" in c
|
| 181 |
-
or "VIX_" in c
|
| 182 |
-
or "Spread" in c
|
| 183 |
-
or "DXY" in c
|
| 184 |
-
or "VIX" in c
|
| 185 |
-
or "T10Y" in c
|
| 186 |
-
)
|
| 187 |
]
|
| 188 |
|
| 189 |
-
#
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
if TBILL_COL in df.columns:
|
| 192 |
raw = df[TBILL_COL].dropna()
|
| 193 |
if len(raw) > 0:
|
| 194 |
-
last_val
|
| 195 |
-
|
| 196 |
-
tbill_rate = float(last_val) / 100 if last_val > 1 else float(last_val)
|
| 197 |
|
| 198 |
-
return input_features, target_etfs, tbill_rate
|
| 199 |
|
| 200 |
|
| 201 |
-
# ββ
|
| 202 |
|
| 203 |
def dataset_summary(df: pd.DataFrame) -> dict:
|
| 204 |
-
"""Return a brief summary dict for sidebar display."""
|
| 205 |
if df.empty:
|
| 206 |
return {}
|
| 207 |
return {
|
| 208 |
-
"rows":
|
| 209 |
-
"columns":
|
| 210 |
-
"start_date":
|
| 211 |
-
"end_date":
|
| 212 |
-
"etfs_found":
|
| 213 |
-
"benchmarks":
|
|
|
|
| 214 |
"tbill_found": TBILL_COL in df.columns,
|
| 215 |
}
|
|
|
|
| 3 |
Loads master_data.parquet from HF Dataset.
|
| 4 |
Validates freshness against the last NYSE trading day.
|
| 5 |
No external pings β all data comes from HF Dataset only.
|
| 6 |
+
|
| 7 |
+
Actual dataset columns (from parquet inspection):
|
| 8 |
+
ETFs : AGG, GLD, SLV, SPY, TBT, TLT, VNQ
|
| 9 |
+
Macro : VIX, DXY, T10Y2Y, TBILL_3M, IG_SPREAD, HY_SPREAD
|
| 10 |
"""
|
| 11 |
|
| 12 |
import pandas as pd
|
|
|
|
| 26 |
DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
|
| 27 |
PARQUET_FILE = "master_data.parquet"
|
| 28 |
|
| 29 |
+
# ββ Actual column names in the dataset βββββββββββββββββββββββββββββββββββββββ
|
| 30 |
+
TARGET_ETF_COLS = ["TLT", "TBT", "VNQ", "SLV", "GLD"] # traded ETFs
|
| 31 |
+
BENCHMARK_COLS = ["SPY", "AGG"] # chart only
|
| 32 |
+
TBILL_COL = "TBILL_3M" # 3m T-bill rate
|
| 33 |
+
MACRO_COLS = ["VIX", "DXY", "T10Y2Y", "IG_SPREAD", "HY_SPREAD"]
|
| 34 |
|
| 35 |
|
| 36 |
# ββ NYSE calendar helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 37 |
|
| 38 |
+
def get_last_nyse_trading_day(as_of=None):
|
| 39 |
+
"""Return the most recent NYSE trading day on or before as_of (default: today EST)."""
|
| 40 |
est = pytz.timezone("US/Eastern")
|
| 41 |
if as_of is None:
|
| 42 |
as_of = datetime.now(est)
|
|
|
|
| 43 |
today = as_of.date()
|
| 44 |
|
| 45 |
if NYSE_CAL_AVAILABLE:
|
| 46 |
try:
|
| 47 |
+
nyse = mcal.get_calendar("NYSE")
|
|
|
|
| 48 |
start = today - timedelta(days=10)
|
| 49 |
+
sched = nyse.schedule(start_date=start, end_date=today)
|
| 50 |
+
if len(sched) > 0:
|
| 51 |
+
return sched.index[-1].date()
|
| 52 |
except Exception:
|
| 53 |
pass
|
| 54 |
|
|
|
|
| 59 |
return candidate
|
| 60 |
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
# ββ Data loading ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 63 |
|
| 64 |
@st.cache_data(ttl=3600, show_spinner=False)
|
|
|
|
| 78 |
|
| 79 |
# Ensure DatetimeIndex
|
| 80 |
if not isinstance(df.index, pd.DatetimeIndex):
|
| 81 |
+
for col in ["Date", "date", "DATE"]:
|
| 82 |
+
if col in df.columns:
|
| 83 |
+
df = df.set_index(col)
|
| 84 |
+
break
|
| 85 |
df.index = pd.to_datetime(df.index)
|
| 86 |
|
| 87 |
df = df.sort_index()
|
|
|
|
| 97 |
def check_data_freshness(df: pd.DataFrame) -> dict:
|
| 98 |
"""
|
| 99 |
Check whether the dataset contains data for the last NYSE trading day.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
"""
|
| 101 |
if df.empty:
|
| 102 |
return {
|
|
|
|
| 108 |
|
| 109 |
last_date_in_data = df.index[-1].date()
|
| 110 |
expected_date = get_last_nyse_trading_day()
|
| 111 |
+
fresh = last_date_in_data >= expected_date
|
|
|
|
| 112 |
|
| 113 |
if fresh:
|
| 114 |
message = f"β
Dataset is up to date through **{last_date_in_data}**."
|
|
|
|
| 131 |
|
| 132 |
def get_features_and_targets(df: pd.DataFrame):
|
| 133 |
"""
|
| 134 |
+
Extract input feature columns and target ETF return columns.
|
| 135 |
+
|
| 136 |
+
The dataset stores raw price or return values directly under ticker names.
|
| 137 |
+
We compute daily log returns for target ETFs if they are not already returns.
|
| 138 |
|
| 139 |
Returns:
|
| 140 |
+
input_features : list of column names to use as model inputs
|
| 141 |
+
target_etfs : list of ETF column names (after return computation)
|
| 142 |
+
tbill_rate : latest 3m T-bill rate as float (annualised, e.g. 0.045)
|
| 143 |
+
df : DataFrame (possibly with new _Ret columns added)
|
| 144 |
"""
|
|
|
|
|
|
|
| 145 |
|
| 146 |
+
# ββ Confirm target ETFs exist βββββββββββββββββββββββββββββββββββββββββββββ
|
| 147 |
+
missing = [c for c in TARGET_ETF_COLS if c not in df.columns]
|
| 148 |
+
if missing:
|
| 149 |
raise ValueError(
|
| 150 |
+
f"Missing ETF columns: {missing}. "
|
| 151 |
f"Found in dataset: {list(df.columns)}"
|
| 152 |
)
|
| 153 |
|
| 154 |
+
# ββ Build return columns ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 155 |
+
# If values look like prices (>5), compute pct returns.
|
| 156 |
+
# If they already look like small returns (<1 in abs), use as-is.
|
| 157 |
+
target_etfs = []
|
| 158 |
+
for col in TARGET_ETF_COLS:
|
| 159 |
+
ret_col = f"{col}_Ret"
|
| 160 |
+
if ret_col not in df.columns:
|
| 161 |
+
sample = df[col].dropna()
|
| 162 |
+
if len(sample) > 0 and abs(sample.median()) > 1:
|
| 163 |
+
# Looks like price β compute pct change
|
| 164 |
+
df[ret_col] = df[col].pct_change()
|
| 165 |
+
else:
|
| 166 |
+
# Already returns
|
| 167 |
+
df[ret_col] = df[col]
|
| 168 |
+
target_etfs.append(ret_col)
|
| 169 |
+
|
| 170 |
+
# Same for benchmarks
|
| 171 |
+
for col in BENCHMARK_COLS:
|
| 172 |
+
ret_col = f"{col}_Ret"
|
| 173 |
+
if ret_col not in df.columns and col in df.columns:
|
| 174 |
+
sample = df[col].dropna()
|
| 175 |
+
if len(sample) > 0 and abs(sample.median()) > 1:
|
| 176 |
+
df[ret_col] = df[col].pct_change()
|
| 177 |
+
else:
|
| 178 |
+
df[ret_col] = df[col]
|
| 179 |
+
|
| 180 |
+
# Drop rows with NaN in target columns (first row after pct_change)
|
| 181 |
+
df = df.dropna(subset=target_etfs)
|
| 182 |
+
|
| 183 |
+
# ββ Input features ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 184 |
+
# Use macro columns directly; exclude ETF price/return cols and benchmarks
|
| 185 |
+
exclude = set(
|
| 186 |
+
TARGET_ETF_COLS + BENCHMARK_COLS +
|
| 187 |
+
target_etfs +
|
| 188 |
+
[f"{c}_Ret" for c in BENCHMARK_COLS] +
|
| 189 |
+
[TBILL_COL]
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
input_features = [
|
| 193 |
c for c in df.columns
|
| 194 |
if c not in exclude
|
| 195 |
+
and c in (MACRO_COLS + [
|
| 196 |
+
col for col in df.columns
|
| 197 |
+
if any(k in col for k in ["_Z", "_Vol", "Regime", "YC_", "Credit_",
|
| 198 |
+
"Rates_", "VIX_", "Spread", "DXY", "T10Y"])
|
| 199 |
+
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
]
|
| 201 |
|
| 202 |
+
# Fallback: if none matched, use all non-excluded numeric columns
|
| 203 |
+
if not input_features:
|
| 204 |
+
input_features = [
|
| 205 |
+
c for c in df.columns
|
| 206 |
+
if c not in exclude
|
| 207 |
+
and pd.api.types.is_numeric_dtype(df[c])
|
| 208 |
+
]
|
| 209 |
+
|
| 210 |
+
# ββ T-bill rate βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 211 |
+
tbill_rate = 0.045 # default
|
| 212 |
if TBILL_COL in df.columns:
|
| 213 |
raw = df[TBILL_COL].dropna()
|
| 214 |
if len(raw) > 0:
|
| 215 |
+
last_val = float(raw.iloc[-1])
|
| 216 |
+
tbill_rate = last_val / 100 if last_val > 1 else last_val
|
|
|
|
| 217 |
|
| 218 |
+
return input_features, target_etfs, tbill_rate, df
|
| 219 |
|
| 220 |
|
| 221 |
+
# ββ Dataset summary βββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½βββββββββββββββ
|
| 222 |
|
| 223 |
def dataset_summary(df: pd.DataFrame) -> dict:
|
|
|
|
| 224 |
if df.empty:
|
| 225 |
return {}
|
| 226 |
return {
|
| 227 |
+
"rows": len(df),
|
| 228 |
+
"columns": len(df.columns),
|
| 229 |
+
"start_date": df.index[0].strftime("%Y-%m-%d"),
|
| 230 |
+
"end_date": df.index[-1].strftime("%Y-%m-%d"),
|
| 231 |
+
"etfs_found": [c for c in TARGET_ETF_COLS if c in df.columns],
|
| 232 |
+
"benchmarks": [c for c in BENCHMARK_COLS if c in df.columns],
|
| 233 |
+
"macro_found": [c for c in MACRO_COLS if c in df.columns],
|
| 234 |
"tbill_found": TBILL_COL in df.columns,
|
| 235 |
}
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/data/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
|
| 2 |
|
| 3 |
Macro-driven ETF rotation using three augmented CNN-LSTM variants.
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
|
| 3 |
+
emoji: π§
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
sdk_version: "1.32.0"
|
| 8 |
+
python_version: "3.10"
|
| 9 |
+
app_file: app.py
|
| 10 |
+
pinned: false
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
# P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
|
| 14 |
|
| 15 |
Macro-driven ETF rotation using three augmented CNN-LSTM variants.
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/data/loader.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
data/loader.py
|
| 3 |
+
Loads master_data.parquet from HF Dataset.
|
| 4 |
+
Validates freshness against the last NYSE trading day.
|
| 5 |
+
No external pings β all data comes from HF Dataset only.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import numpy as np
|
| 10 |
+
import streamlit as st
|
| 11 |
+
from huggingface_hub import hf_hub_download
|
| 12 |
+
from datetime import datetime, timedelta
|
| 13 |
+
import pytz
|
| 14 |
+
import os
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
import pandas_market_calendars as mcal
|
| 18 |
+
NYSE_CAL_AVAILABLE = True
|
| 19 |
+
except ImportError:
|
| 20 |
+
NYSE_CAL_AVAILABLE = False
|
| 21 |
+
|
| 22 |
+
DATASET_REPO = "P2SAMAPA/fi-etf-macro-signal-master-data"
|
| 23 |
+
PARQUET_FILE = "master_data.parquet"
|
| 24 |
+
|
| 25 |
+
# Columns expected in the dataset
|
| 26 |
+
REQUIRED_ETF_COLS = ["TLT_Ret", "TBT_Ret", "VNQ_Ret", "SLV_Ret", "GLD_Ret"]
|
| 27 |
+
BENCHMARK_COLS = ["SPY_Ret", "AGG_Ret"]
|
| 28 |
+
TBILL_COL = "DTB3" # 3m T-bill column in HF dataset
|
| 29 |
+
TARGET_ETFS = REQUIRED_ETF_COLS # 5 targets (no CASH in returns, CASH handled in strategy)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# ββ NYSE calendar helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 33 |
+
|
| 34 |
+
def get_last_nyse_trading_day(as_of: datetime = None) -> datetime.date:
|
| 35 |
+
"""Return the most recent NYSE trading day before or on as_of (default: today EST)."""
|
| 36 |
+
est = pytz.timezone("US/Eastern")
|
| 37 |
+
if as_of is None:
|
| 38 |
+
as_of = datetime.now(est)
|
| 39 |
+
|
| 40 |
+
today = as_of.date()
|
| 41 |
+
|
| 42 |
+
if NYSE_CAL_AVAILABLE:
|
| 43 |
+
try:
|
| 44 |
+
nyse = mcal.get_calendar("NYSE")
|
| 45 |
+
# Look back up to 10 days to find last trading day
|
| 46 |
+
start = today - timedelta(days=10)
|
| 47 |
+
schedule = nyse.schedule(start_date=start, end_date=today)
|
| 48 |
+
if len(schedule) > 0:
|
| 49 |
+
return schedule.index[-1].date()
|
| 50 |
+
except Exception:
|
| 51 |
+
pass
|
| 52 |
+
|
| 53 |
+
# Fallback: skip weekends
|
| 54 |
+
candidate = today
|
| 55 |
+
while candidate.weekday() >= 5:
|
| 56 |
+
candidate -= timedelta(days=1)
|
| 57 |
+
return candidate
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def is_nyse_trading_day(date) -> bool:
|
| 61 |
+
"""Return True if date is a NYSE trading day."""
|
| 62 |
+
if NYSE_CAL_AVAILABLE:
|
| 63 |
+
try:
|
| 64 |
+
nyse = mcal.get_calendar("NYSE")
|
| 65 |
+
schedule = nyse.schedule(start_date=date, end_date=date)
|
| 66 |
+
return len(schedule) > 0
|
| 67 |
+
except Exception:
|
| 68 |
+
pass
|
| 69 |
+
return date.weekday() < 5
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# ββ Data loading ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 73 |
+
|
| 74 |
+
@st.cache_data(ttl=3600, show_spinner=False)
|
| 75 |
+
def load_dataset(hf_token: str) -> pd.DataFrame:
|
| 76 |
+
"""
|
| 77 |
+
Download master_data.parquet from HF Dataset and return as DataFrame.
|
| 78 |
+
Cached for 1 hour. Index is parsed as DatetimeIndex.
|
| 79 |
+
"""
|
| 80 |
+
try:
|
| 81 |
+
path = hf_hub_download(
|
| 82 |
+
repo_id=DATASET_REPO,
|
| 83 |
+
filename=PARQUET_FILE,
|
| 84 |
+
repo_type="dataset",
|
| 85 |
+
token=hf_token,
|
| 86 |
+
)
|
| 87 |
+
df = pd.read_parquet(path)
|
| 88 |
+
|
| 89 |
+
# Ensure DatetimeIndex
|
| 90 |
+
if not isinstance(df.index, pd.DatetimeIndex):
|
| 91 |
+
if "Date" in df.columns:
|
| 92 |
+
df = df.set_index("Date")
|
| 93 |
+
elif "date" in df.columns:
|
| 94 |
+
df = df.set_index("date")
|
| 95 |
+
df.index = pd.to_datetime(df.index)
|
| 96 |
+
|
| 97 |
+
df = df.sort_index()
|
| 98 |
+
return df
|
| 99 |
+
|
| 100 |
+
except Exception as e:
|
| 101 |
+
st.error(f"β Failed to load dataset from HuggingFace: {e}")
|
| 102 |
+
return pd.DataFrame()
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
# ββ Freshness check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 106 |
+
|
| 107 |
+
def check_data_freshness(df: pd.DataFrame) -> dict:
|
| 108 |
+
"""
|
| 109 |
+
Check whether the dataset contains data for the last NYSE trading day.
|
| 110 |
+
|
| 111 |
+
Returns a dict:
|
| 112 |
+
{
|
| 113 |
+
"fresh": bool,
|
| 114 |
+
"last_date_in_data": date,
|
| 115 |
+
"expected_date": date,
|
| 116 |
+
"message": str
|
| 117 |
+
}
|
| 118 |
+
"""
|
| 119 |
+
if df.empty:
|
| 120 |
+
return {
|
| 121 |
+
"fresh": False,
|
| 122 |
+
"last_date_in_data": None,
|
| 123 |
+
"expected_date": None,
|
| 124 |
+
"message": "Dataset is empty.",
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
last_date_in_data = df.index[-1].date()
|
| 128 |
+
expected_date = get_last_nyse_trading_day()
|
| 129 |
+
|
| 130 |
+
fresh = last_date_in_data >= expected_date
|
| 131 |
+
|
| 132 |
+
if fresh:
|
| 133 |
+
message = f"β
Dataset is up to date through **{last_date_in_data}**."
|
| 134 |
+
else:
|
| 135 |
+
message = (
|
| 136 |
+
f"β οΈ **{expected_date}** data not yet updated in dataset. "
|
| 137 |
+
f"Latest available: **{last_date_in_data}**. "
|
| 138 |
+
f"Please check back later β the dataset updates daily after market close."
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
return {
|
| 142 |
+
"fresh": fresh,
|
| 143 |
+
"last_date_in_data": last_date_in_data,
|
| 144 |
+
"expected_date": expected_date,
|
| 145 |
+
"message": message,
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
# ββ Feature / target extraction βββββββββββββββββββββββββββββββββββββββοΏ½οΏ½βββββββ
|
| 150 |
+
|
| 151 |
+
def get_features_and_targets(df: pd.DataFrame):
|
| 152 |
+
"""
|
| 153 |
+
Extract input feature columns and target ETF return columns from the dataset.
|
| 154 |
+
|
| 155 |
+
Returns:
|
| 156 |
+
input_features : list of column names
|
| 157 |
+
target_etfs : list of ETF return column names (e.g. TLT_Ret)
|
| 158 |
+
tbill_rate : latest 3m T-bill rate as a float (annualised, e.g. 0.045)
|
| 159 |
+
"""
|
| 160 |
+
# Target ETF return columns
|
| 161 |
+
target_etfs = [c for c in REQUIRED_ETF_COLS if c in df.columns]
|
| 162 |
+
|
| 163 |
+
if not target_etfs:
|
| 164 |
+
raise ValueError(
|
| 165 |
+
f"No target ETF columns found. Expected: {REQUIRED_ETF_COLS}. "
|
| 166 |
+
f"Found in dataset: {list(df.columns)}"
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
# Input features: Z-scores, vol, regime, yield curve, credit, rates, VIX terms
|
| 170 |
+
exclude = set(target_etfs + BENCHMARK_COLS + [TBILL_COL])
|
| 171 |
+
input_features = [
|
| 172 |
+
c for c in df.columns
|
| 173 |
+
if c not in exclude
|
| 174 |
+
and (
|
| 175 |
+
c.endswith("_Z")
|
| 176 |
+
or c.endswith("_Vol")
|
| 177 |
+
or "Regime" in c
|
| 178 |
+
or "YC_" in c
|
| 179 |
+
or "Credit_" in c
|
| 180 |
+
or "Rates_" in c
|
| 181 |
+
or "VIX_" in c
|
| 182 |
+
or "Spread" in c
|
| 183 |
+
or "DXY" in c
|
| 184 |
+
or "VIX" in c
|
| 185 |
+
or "T10Y" in c
|
| 186 |
+
)
|
| 187 |
+
]
|
| 188 |
+
|
| 189 |
+
# 3m T-bill rate (for CASH return & Sharpe)
|
| 190 |
+
tbill_rate = 0.045 # default fallback
|
| 191 |
+
if TBILL_COL in df.columns:
|
| 192 |
+
raw = df[TBILL_COL].dropna()
|
| 193 |
+
if len(raw) > 0:
|
| 194 |
+
last_val = raw.iloc[-1]
|
| 195 |
+
# DTB3 is typically in percent (e.g. 5.25 means 5.25%)
|
| 196 |
+
tbill_rate = float(last_val) / 100 if last_val > 1 else float(last_val)
|
| 197 |
+
|
| 198 |
+
return input_features, target_etfs, tbill_rate
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
# ββ Column info helper (for sidebar display) ββββββββββββββββββββββββββββββββββ
|
| 202 |
+
|
| 203 |
+
def dataset_summary(df: pd.DataFrame) -> dict:
|
| 204 |
+
"""Return a brief summary dict for sidebar display."""
|
| 205 |
+
if df.empty:
|
| 206 |
+
return {}
|
| 207 |
+
return {
|
| 208 |
+
"rows": len(df),
|
| 209 |
+
"columns": len(df.columns),
|
| 210 |
+
"start_date": df.index[0].strftime("%Y-%m-%d"),
|
| 211 |
+
"end_date": df.index[-1].strftime("%Y-%m-%d"),
|
| 212 |
+
"etfs_found": [c for c in REQUIRED_ETF_COLS if c in df.columns],
|
| 213 |
+
"benchmarks": [c for c in BENCHMARK_COLS if c in df.columns],
|
| 214 |
+
"tbill_found": TBILL_COL in df.columns,
|
| 215 |
+
}
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md
CHANGED
|
@@ -1,19 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
|
| 2 |
+
|
| 3 |
+
Macro-driven ETF rotation using three augmented CNN-LSTM variants.
|
| 4 |
+
Winner selected by **highest raw annualised return** on the out-of-sample test set.
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Architecture Overview
|
| 9 |
+
|
| 10 |
+
| Approach | Core Idea | Key Addition |
|
| 11 |
+
|---|---|---|
|
| 12 |
+
| **1 β Wavelet** | DWT decomposes each macro signal into frequency subbands before the CNN | Separates trend / cycle / noise |
|
| 13 |
+
| **2 β Regime-Conditioned** | HMM detects macro regimes; one-hot regime label concatenated into the network | Removes non-stationarity |
|
| 14 |
+
| **3 β Multi-Scale Parallel** | Three CNN towers (kernels 3, 7, 21 days) run in parallel before the LSTM | Captures momentum + cycle + trend simultaneously |
|
| 15 |
+
|
| 16 |
---
|
| 17 |
+
|
| 18 |
+
## ETF Universe
|
| 19 |
+
|
| 20 |
+
| Ticker | Description |
|
| 21 |
+
|---|---|
|
| 22 |
+
| TLT | 20+ Year Treasury Bond |
|
| 23 |
+
| TBT | 20+ Year Treasury Short (2Γ) |
|
| 24 |
+
| VNQ | Real Estate (REIT) |
|
| 25 |
+
| SLV | Silver |
|
| 26 |
+
| GLD | Gold |
|
| 27 |
+
| CASH | 3m T-bill rate (from HF dataset) |
|
| 28 |
+
|
| 29 |
+
Benchmarks (chart only, not traded): **SPY**, **AGG**
|
| 30 |
+
|
| 31 |
+
---
|
| 32 |
+
|
| 33 |
+
## Data
|
| 34 |
+
|
| 35 |
+
All data sourced exclusively from:
|
| 36 |
+
**`P2SAMAPA/fi-etf-macro-signal-master-data`** (HuggingFace Dataset)
|
| 37 |
+
File: `master_data.parquet`
|
| 38 |
+
|
| 39 |
+
No external API calls (no yfinance, no FRED).
|
| 40 |
+
The app checks daily whether the prior NYSE trading day's data is present in the dataset.
|
| 41 |
+
|
| 42 |
---
|
| 43 |
|
| 44 |
+
## Project Structure
|
| 45 |
+
|
| 46 |
+
```
|
| 47 |
+
βββ .github/
|
| 48 |
+
β βββ workflows/
|
| 49 |
+
β βββ sync.yml # Auto-sync GitHub β HF Space on push to main
|
| 50 |
+
β
|
| 51 |
+
βββ app.py # Streamlit orchestrator (UI wiring only)
|
| 52 |
+
β
|
| 53 |
+
βββ data/
|
| 54 |
+
β βββ loader.py # HF dataset load, freshness check, column validation
|
| 55 |
+
β
|
| 56 |
+
βββ models/
|
| 57 |
+
β βββ base.py # Shared: sequences, splits, scaling, callbacks
|
| 58 |
+
β βββ approach1_wavelet.py # Wavelet CNN-LSTM
|
| 59 |
+
β βββ approach2_regime.py # Regime-Conditioned CNN-LSTM
|
| 60 |
+
β βββ approach3_multiscale.py # Multi-Scale Parallel CNN-LSTM
|
| 61 |
+
β
|
| 62 |
+
βββ strategy/
|
| 63 |
+
β βββ backtest.py # execute_strategy, metrics, winner selection
|
| 64 |
+
β
|
| 65 |
+
βββ signals/
|
| 66 |
+
β βββ conviction.py # Z-score conviction scoring
|
| 67 |
+
β
|
| 68 |
+
βββ ui/
|
| 69 |
+
β βββ components.py # Banner, conviction panel, metrics, audit trail
|
| 70 |
+
β βββ charts.py # Plotly equity curve + comparison bar chart
|
| 71 |
+
β
|
| 72 |
+
βββ utils/
|
| 73 |
+
β βββ calendar.py # NYSE calendar, next trading day, EST time
|
| 74 |
+
β
|
| 75 |
+
βββ requirements.txt
|
| 76 |
+
βββ README.md
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
---
|
| 80 |
+
|
| 81 |
+
## Secrets Required
|
| 82 |
+
|
| 83 |
+
| Secret | Where | Purpose |
|
| 84 |
+
|---|---|---|
|
| 85 |
+
| `HF_TOKEN` | GitHub + HF Space | Read HF dataset Β· Sync HF Space |
|
| 86 |
+
|
| 87 |
+
Set in:
|
| 88 |
+
- GitHub: `Settings β Secrets β Actions β New repository secret`
|
| 89 |
+
- HF Space: `Settings β Repository secrets`
|
| 90 |
+
|
| 91 |
+
---
|
| 92 |
+
|
| 93 |
+
## Deployment
|
| 94 |
+
|
| 95 |
+
Push to `main` β GitHub Actions (`sync.yml`) automatically syncs to HF Space.
|
| 96 |
+
|
| 97 |
+
### Local development
|
| 98 |
+
|
| 99 |
+
```bash
|
| 100 |
+
pip install -r requirements.txt
|
| 101 |
+
export HF_TOKEN=your_token
|
| 102 |
+
streamlit run app.py
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
---
|
| 106 |
|
| 107 |
+
## Output UI
|
| 108 |
|
| 109 |
+
1. **Data freshness warning** β alerts if prior NYSE trading day data is missing
|
| 110 |
+
2. **Next Trading Day Signal** β date + ETF from the winning approach
|
| 111 |
+
3. **Signal Conviction** β Z-score gauge + per-ETF probability bars
|
| 112 |
+
4. **Performance Metrics** β Annualised Return, Sharpe, Hit Ratio, Max DD
|
| 113 |
+
5. **Approach Comparison Table** β all three approaches side by side
|
| 114 |
+
6. **Equity Curves** β all three approaches + SPY + AGG benchmarks
|
| 115 |
+
7. **Audit Trail** β last 20 trading days for the winning approach
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/app.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
app.py
|
| 3 |
+
P2-ETF-CNN-LSTM-ALTERNATIVE-APPROACHES
|
| 4 |
+
Streamlit orchestrator β UI wiring only, no business logic here.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import streamlit as st
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import numpy as np
|
| 11 |
+
|
| 12 |
+
# ββ Module imports ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
+
from data.loader import load_dataset, check_data_freshness, get_features_and_targets, dataset_summary
|
| 14 |
+
from utils.calendar import get_est_time, is_sync_window, get_next_signal_date
|
| 15 |
+
from models.base import build_sequences, train_val_test_split, scale_features, returns_to_labels
|
| 16 |
+
from models.approach1_wavelet import train_approach1, predict_approach1
|
| 17 |
+
from models.approach2_regime import train_approach2, predict_approach2
|
| 18 |
+
from models.approach3_multiscale import train_approach3, predict_approach3
|
| 19 |
+
from strategy.backtest import execute_strategy, select_winner, build_comparison_table
|
| 20 |
+
from signals.conviction import compute_conviction
|
| 21 |
+
from ui.components import (
|
| 22 |
+
show_freshness_status, show_signal_banner, show_conviction_panel,
|
| 23 |
+
show_metrics_row, show_comparison_table, show_audit_trail,
|
| 24 |
+
)
|
| 25 |
+
from ui.charts import equity_curve_chart, comparison_bar_chart
|
| 26 |
+
|
| 27 |
+
# ββ Page config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
+
st.set_page_config(
|
| 29 |
+
page_title="P2-ETF-CNN-LSTM",
|
| 30 |
+
page_icon="π§ ",
|
| 31 |
+
layout="wide",
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# ββ Secrets βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 36 |
+
|
| 37 |
+
# ββ Sidebar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 38 |
+
with st.sidebar:
|
| 39 |
+
st.header("βοΈ Configuration")
|
| 40 |
+
|
| 41 |
+
now_est = get_est_time()
|
| 42 |
+
st.write(f"π **EST:** {now_est.strftime('%H:%M:%S')}")
|
| 43 |
+
if is_sync_window():
|
| 44 |
+
st.success("β
Sync Window Active")
|
| 45 |
+
else:
|
| 46 |
+
st.info("βΈοΈ Sync Window Inactive")
|
| 47 |
+
|
| 48 |
+
st.divider()
|
| 49 |
+
|
| 50 |
+
start_yr = st.slider("π
Start Year", 2010, 2024, 2016)
|
| 51 |
+
fee_bps = st.slider("π° Fee (bps)", 0, 50, 10)
|
| 52 |
+
lookback = st.slider("π Lookback (days)", 20, 60, 30, step=5)
|
| 53 |
+
epochs = st.number_input("π Max Epochs", 20, 300, 100, step=10)
|
| 54 |
+
|
| 55 |
+
st.divider()
|
| 56 |
+
|
| 57 |
+
split_option = st.selectbox("π Train/Val/Test Split", ["70/15/15", "80/10/10"], index=0)
|
| 58 |
+
split_map = {"70/15/15": (0.70, 0.15), "80/10/10": (0.80, 0.10)}
|
| 59 |
+
train_pct, val_pct = split_map[split_option]
|
| 60 |
+
|
| 61 |
+
include_cash = st.checkbox("π΅ Include CASH class", value=True,
|
| 62 |
+
help="Model can select CASH (earns T-bill rate) as an alternative to any ETF")
|
| 63 |
+
|
| 64 |
+
st.divider()
|
| 65 |
+
|
| 66 |
+
run_button = st.button("π Run All 3 Approaches", type="primary", use_container_width=True)
|
| 67 |
+
|
| 68 |
+
# ββ Title βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 69 |
+
st.title("π§ P2-ETF-CNN-LSTM")
|
| 70 |
+
st.caption("Approach 1: Wavelet Β· Approach 2: Regime-Conditioned Β· Approach 3: Multi-Scale Parallel")
|
| 71 |
+
st.caption("Winner selected by highest raw annualised return on out-of-sample test set.")
|
| 72 |
+
|
| 73 |
+
# ββ Load data (always, to check freshness) ββββββββββββββββββββββββββββββββββββ
|
| 74 |
+
if not HF_TOKEN:
|
| 75 |
+
st.error("β HF_TOKEN secret not found. Please add it to your HF Space / GitHub secrets.")
|
| 76 |
+
st.stop()
|
| 77 |
+
|
| 78 |
+
with st.spinner("π‘ Loading dataset from HuggingFace..."):
|
| 79 |
+
df = load_dataset(HF_TOKEN)
|
| 80 |
+
|
| 81 |
+
if df.empty:
|
| 82 |
+
st.stop()
|
| 83 |
+
|
| 84 |
+
# ββ Freshness check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 85 |
+
freshness = check_data_freshness(df)
|
| 86 |
+
show_freshness_status(freshness)
|
| 87 |
+
|
| 88 |
+
# ββ Dataset summary in sidebar ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 89 |
+
with st.sidebar:
|
| 90 |
+
st.divider()
|
| 91 |
+
st.subheader("π¦ Dataset Info")
|
| 92 |
+
summary = dataset_summary(df)
|
| 93 |
+
if summary:
|
| 94 |
+
st.write(f"**Rows:** {summary['rows']:,}")
|
| 95 |
+
st.write(f"**Range:** {summary['start_date']} β {summary['end_date']}")
|
| 96 |
+
st.write(f"**ETFs:** {', '.join([e.replace('_Ret','') for e in summary['etfs_found']])}")
|
| 97 |
+
st.write(f"**Benchmarks:** {', '.join([b.replace('_Ret','') for b in summary['benchmarks']])}")
|
| 98 |
+
st.write(f"**T-bill col:** {'β
' if summary['tbill_found'] else 'β'}")
|
| 99 |
+
|
| 100 |
+
# ββ Main execution ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 101 |
+
if not run_button:
|
| 102 |
+
st.info("π Configure parameters in the sidebar and click **π Run All 3 Approaches** to begin.")
|
| 103 |
+
st.stop()
|
| 104 |
+
|
| 105 |
+
# ββ Filter by start year ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 106 |
+
df = df[df.index.year >= start_yr].copy()
|
| 107 |
+
st.write(f"π
**Data:** {df.index[0].strftime('%Y-%m-%d')} β {df.index[-1].strftime('%Y-%m-%d')} "
|
| 108 |
+
f"({df.index[-1].year - df.index[0].year + 1} years)")
|
| 109 |
+
|
| 110 |
+
# ββ Feature / target extraction βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 111 |
+
try:
|
| 112 |
+
input_features, target_etfs, tbill_rate = get_features_and_targets(df)
|
| 113 |
+
except ValueError as e:
|
| 114 |
+
st.error(str(e))
|
| 115 |
+
st.stop()
|
| 116 |
+
|
| 117 |
+
st.info(f"π― **Targets:** {len(target_etfs)} ETFs Β· **Features:** {len(input_features)} signals Β· "
|
| 118 |
+
f"**T-bill rate:** {tbill_rate*100:.2f}%")
|
| 119 |
+
|
| 120 |
+
# ββ Prepare sequences βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 121 |
+
X_raw = df[input_features].values.astype(np.float32)
|
| 122 |
+
y_raw = df[target_etfs].values.astype(np.float32)
|
| 123 |
+
n_etfs = len(target_etfs)
|
| 124 |
+
n_classes = n_etfs + (1 if include_cash else 0) # +1 for CASH
|
| 125 |
+
|
| 126 |
+
# Fill NaNs with column means
|
| 127 |
+
col_means = np.nanmean(X_raw, axis=0)
|
| 128 |
+
for j in range(X_raw.shape[1]):
|
| 129 |
+
mask = np.isnan(X_raw[:, j])
|
| 130 |
+
X_raw[mask, j] = col_means[j]
|
| 131 |
+
|
| 132 |
+
X_seq, y_seq = build_sequences(X_raw, y_raw, lookback)
|
| 133 |
+
y_labels = returns_to_labels(y_seq, include_cash=include_cash)
|
| 134 |
+
|
| 135 |
+
X_train, y_train_r, X_val, y_val_r, X_test, y_test_r = train_val_test_split(X_seq, y_seq, train_pct, val_pct)
|
| 136 |
+
_, y_train_l, _, y_val_l, _, y_test_l = train_val_test_split(X_seq, y_labels, train_pct, val_pct)
|
| 137 |
+
|
| 138 |
+
X_train_s, X_val_s, X_test_s, _ = scale_features(X_train, X_val, X_test)
|
| 139 |
+
|
| 140 |
+
train_size = len(X_train)
|
| 141 |
+
val_size = len(X_val)
|
| 142 |
+
|
| 143 |
+
# Test dates (aligned with y_test)
|
| 144 |
+
test_start = lookback + train_size + val_size
|
| 145 |
+
test_dates = df.index[test_start: test_start + len(X_test)]
|
| 146 |
+
test_slice = slice(test_start, test_start + len(X_test))
|
| 147 |
+
|
| 148 |
+
st.success(f"β
Sequences β Train: {train_size} Β· Val: {val_size} Β· Test: {len(X_test)}")
|
| 149 |
+
|
| 150 |
+
# ββ Train all three approaches ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 151 |
+
results = {}
|
| 152 |
+
trained_info = {} # store extra info needed for conviction
|
| 153 |
+
|
| 154 |
+
progress = st.progress(0, text="Starting training...")
|
| 155 |
+
|
| 156 |
+
# ββ Approach 1: Wavelet βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 157 |
+
with st.spinner("π Training Approach 1 β Wavelet CNN-LSTM..."):
|
| 158 |
+
try:
|
| 159 |
+
model1, hist1, _ = train_approach1(
|
| 160 |
+
X_train_s, y_train_l,
|
| 161 |
+
X_val_s, y_val_l,
|
| 162 |
+
n_classes=n_classes, epochs=int(epochs),
|
| 163 |
+
)
|
| 164 |
+
preds1, proba1 = predict_approach1(model1, X_test_s)
|
| 165 |
+
results["Approach 1"] = execute_strategy(
|
| 166 |
+
preds1, proba1, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
|
| 167 |
+
)
|
| 168 |
+
trained_info["Approach 1"] = {"proba": proba1}
|
| 169 |
+
st.success("β
Approach 1 complete")
|
| 170 |
+
except Exception as e:
|
| 171 |
+
st.warning(f"β οΈ Approach 1 failed: {e}")
|
| 172 |
+
results["Approach 1"] = None
|
| 173 |
+
|
| 174 |
+
progress.progress(33, text="Approach 1 done...")
|
| 175 |
+
|
| 176 |
+
# ββ Approach 2: Regime-Conditioned βββββββββββββββββββββββββββββββββββββββββββ
|
| 177 |
+
with st.spinner("π Training Approach 2 β Regime-Conditioned CNN-LSTM..."):
|
| 178 |
+
try:
|
| 179 |
+
model2, hist2, hmm2, regime_cols2 = train_approach2(
|
| 180 |
+
X_train_s, y_train_l,
|
| 181 |
+
X_val_s, y_val_l,
|
| 182 |
+
X_flat_all=X_raw,
|
| 183 |
+
feature_names=input_features,
|
| 184 |
+
lookback=lookback,
|
| 185 |
+
train_size=train_size,
|
| 186 |
+
val_size=val_size,
|
| 187 |
+
n_classes=n_classes, epochs=int(epochs),
|
| 188 |
+
)
|
| 189 |
+
preds2, proba2 = predict_approach2(
|
| 190 |
+
model2, X_test_s, X_raw, regime_cols2, hmm2,
|
| 191 |
+
lookback, train_size, val_size,
|
| 192 |
+
)
|
| 193 |
+
results["Approach 2"] = execute_strategy(
|
| 194 |
+
preds2, proba2, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
|
| 195 |
+
)
|
| 196 |
+
trained_info["Approach 2"] = {"proba": proba2}
|
| 197 |
+
st.success("β
Approach 2 complete")
|
| 198 |
+
except Exception as e:
|
| 199 |
+
st.warning(f"β οΈ Approach 2 failed: {e}")
|
| 200 |
+
results["Approach 2"] = None
|
| 201 |
+
|
| 202 |
+
progress.progress(66, text="Approach 2 done...")
|
| 203 |
+
|
| 204 |
+
# ββ Approach 3: Multi-Scale βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 205 |
+
with st.spinner("π‘ Training Approach 3 β Multi-Scale CNN-LSTM..."):
|
| 206 |
+
try:
|
| 207 |
+
model3, hist3 = train_approach3(
|
| 208 |
+
X_train_s, y_train_l,
|
| 209 |
+
X_val_s, y_val_l,
|
| 210 |
+
n_classes=n_classes, epochs=int(epochs),
|
| 211 |
+
)
|
| 212 |
+
preds3, proba3 = predict_approach3(model3, X_test_s)
|
| 213 |
+
results["Approach 3"] = execute_strategy(
|
| 214 |
+
preds3, proba3, y_test_r, test_dates, target_etfs, fee_bps, tbill_rate, include_cash,
|
| 215 |
+
)
|
| 216 |
+
trained_info["Approach 3"] = {"proba": proba3}
|
| 217 |
+
st.success("β
Approach 3 complete")
|
| 218 |
+
except Exception as e:
|
| 219 |
+
st.warning(f"β οΈ Approach 3 failed: {e}")
|
| 220 |
+
results["Approach 3"] = None
|
| 221 |
+
|
| 222 |
+
progress.progress(100, text="All approaches complete!")
|
| 223 |
+
progress.empty()
|
| 224 |
+
|
| 225 |
+
# ββ Select winner βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 226 |
+
winner_name = select_winner(results)
|
| 227 |
+
winner_res = results.get(winner_name)
|
| 228 |
+
|
| 229 |
+
if winner_res is None:
|
| 230 |
+
st.error("β All approaches failed. Please check your data and configuration.")
|
| 231 |
+
st.stop()
|
| 232 |
+
|
| 233 |
+
# ββ Next trading date βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 234 |
+
next_date = get_next_signal_date()
|
| 235 |
+
|
| 236 |
+
st.divider()
|
| 237 |
+
|
| 238 |
+
# ββ Signal banner (winner) ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 239 |
+
show_signal_banner(winner_res["next_signal"], next_date, winner_name)
|
| 240 |
+
|
| 241 |
+
# ββ Conviction panel ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 242 |
+
winner_proba = trained_info[winner_name]["proba"]
|
| 243 |
+
conviction = compute_conviction(winner_proba[-1], target_etfs, include_cash)
|
| 244 |
+
show_conviction_panel(conviction)
|
| 245 |
+
|
| 246 |
+
st.divider()
|
| 247 |
+
|
| 248 |
+
# ββ Winner metrics ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 249 |
+
st.subheader(f"π {winner_name} β Performance Metrics")
|
| 250 |
+
show_metrics_row(winner_res, tbill_rate)
|
| 251 |
+
|
| 252 |
+
st.divider()
|
| 253 |
+
|
| 254 |
+
# ββ Comparison table ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 255 |
+
st.subheader("π Approach Comparison (Winner = Highest Raw Annualised Return)")
|
| 256 |
+
comparison_df = build_comparison_table(results, winner_name)
|
| 257 |
+
show_comparison_table(comparison_df)
|
| 258 |
+
|
| 259 |
+
# ββ Comparison bar chart ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 260 |
+
st.plotly_chart(comparison_bar_chart(results, winner_name), use_container_width=True)
|
| 261 |
+
|
| 262 |
+
st.divider()
|
| 263 |
+
|
| 264 |
+
# ββ Equity curves βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 265 |
+
st.subheader("π Out-of-Sample Equity Curves β All Approaches vs Benchmarks")
|
| 266 |
+
fig = equity_curve_chart(results, winner_name, test_dates, df, test_slice, tbill_rate)
|
| 267 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 268 |
+
|
| 269 |
+
st.divider()
|
| 270 |
+
|
| 271 |
+
# ββ Audit trail (winner) ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 272 |
+
st.subheader(f"π Audit Trail β {winner_name} (Last 20 Trading Days)")
|
| 273 |
+
show_audit_trail(winner_res["audit_trail"])
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/Dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.13.5-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
RUN apt-get update && apt-get install -y \
|
| 6 |
+
build-essential \
|
| 7 |
+
curl \
|
| 8 |
+
git \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
COPY requirements.txt ./
|
| 12 |
+
COPY src/ ./src/
|
| 13 |
+
|
| 14 |
+
RUN pip3 install -r requirements.txt
|
| 15 |
+
|
| 16 |
+
EXPOSE 8501
|
| 17 |
+
|
| 18 |
+
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 19 |
+
|
| 20 |
+
ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/README.md
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: P2 ETF CNN LSTM ALTERNATIVE APPROACHES
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: red
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 8501
|
| 8 |
+
tags:
|
| 9 |
+
- streamlit
|
| 10 |
+
pinned: false
|
| 11 |
+
short_description: Streamlit template space
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Welcome to Streamlit!
|
| 15 |
+
|
| 16 |
+
Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
|
| 17 |
+
|
| 18 |
+
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 19 |
+
forums](https://discuss.streamlit.io).
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
altair
|
| 2 |
+
pandas
|
| 3 |
+
streamlit
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/src/streamlit_app.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import altair as alt
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import streamlit as st
|
| 5 |
+
|
| 6 |
+
"""
|
| 7 |
+
# Welcome to Streamlit!
|
| 8 |
+
|
| 9 |
+
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
|
| 10 |
+
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 11 |
+
forums](https://discuss.streamlit.io).
|
| 12 |
+
|
| 13 |
+
In the meantime, below is an example of what you can do with just a few lines of code:
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
|
| 17 |
+
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
| 18 |
+
|
| 19 |
+
indices = np.linspace(0, 1, num_points)
|
| 20 |
+
theta = 2 * np.pi * num_turns * indices
|
| 21 |
+
radius = indices
|
| 22 |
+
|
| 23 |
+
x = radius * np.cos(theta)
|
| 24 |
+
y = radius * np.sin(theta)
|
| 25 |
+
|
| 26 |
+
df = pd.DataFrame({
|
| 27 |
+
"x": x,
|
| 28 |
+
"y": y,
|
| 29 |
+
"idx": indices,
|
| 30 |
+
"rand": np.random.randn(num_points),
|
| 31 |
+
})
|
| 32 |
+
|
| 33 |
+
st.altair_chart(alt.Chart(df, height=700, width=700)
|
| 34 |
+
.mark_point(filled=True)
|
| 35 |
+
.encode(
|
| 36 |
+
x=alt.X("x", axis=None),
|
| 37 |
+
y=alt.Y("y", axis=None),
|
| 38 |
+
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
| 39 |
+
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
| 40 |
+
))
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/requirements.txt
CHANGED
|
@@ -1,3 +1,29 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core
|
| 2 |
+
streamlit>=1.32.0
|
| 3 |
+
pandas>=2.0.0
|
| 4 |
+
numpy>=1.24.0
|
| 5 |
+
|
| 6 |
+
# Hugging Face
|
| 7 |
+
huggingface_hub>=0.21.0
|
| 8 |
+
datasets>=2.18.0
|
| 9 |
+
|
| 10 |
+
# Machine Learning
|
| 11 |
+
tensorflow>=2.14.0
|
| 12 |
+
scikit-learn>=1.3.0
|
| 13 |
+
xgboost>=2.0.0
|
| 14 |
+
|
| 15 |
+
# Wavelet (Approach 1)
|
| 16 |
+
PyWavelets>=1.5.0
|
| 17 |
+
|
| 18 |
+
# Regime detection (Approach 2)
|
| 19 |
+
hmmlearn>=0.3.0
|
| 20 |
+
|
| 21 |
+
# Visualisation
|
| 22 |
+
plotly>=5.18.0
|
| 23 |
+
|
| 24 |
+
# NYSE Calendar
|
| 25 |
+
pandas_market_calendars>=4.3.0
|
| 26 |
+
pytz>=2024.1
|
| 27 |
+
|
| 28 |
+
# Parquet
|
| 29 |
+
pyarrow>=14.0.0
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
models/base.py
|
| 3 |
+
Shared utilities for all three CNN-LSTM variants:
|
| 4 |
+
- Data preparation (sequences, train/val/test split)
|
| 5 |
+
- Common Keras layers / callbacks
|
| 6 |
+
- Predict + evaluate helpers
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import numpy as np
|
| 10 |
+
import pandas as pd
|
| 11 |
+
from sklearn.preprocessing import RobustScaler
|
| 12 |
+
import tensorflow as tf
|
| 13 |
+
from tensorflow import keras
|
| 14 |
+
|
| 15 |
+
# ββ Reproducibility βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 16 |
+
SEED = 42
|
| 17 |
+
tf.random.set_seed(SEED)
|
| 18 |
+
np.random.seed(SEED)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# ββ Sequence builder ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 22 |
+
|
| 23 |
+
def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
|
| 24 |
+
"""
|
| 25 |
+
Build supervised sequences for CNN-LSTM input.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
features : 2-D array [n_days, n_features]
|
| 29 |
+
targets : 2-D array [n_days, n_etfs] (raw returns)
|
| 30 |
+
lookback : number of past days per sample
|
| 31 |
+
|
| 32 |
+
Returns:
|
| 33 |
+
X : [n_samples, lookback, n_features]
|
| 34 |
+
y : [n_samples, n_etfs] (raw returns for the next day)
|
| 35 |
+
"""
|
| 36 |
+
X, y = [], []
|
| 37 |
+
for i in range(lookback, len(features)):
|
| 38 |
+
X.append(features[i - lookback: i])
|
| 39 |
+
y.append(targets[i])
|
| 40 |
+
return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# ββ Train / val / test split ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 44 |
+
|
| 45 |
+
def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
|
| 46 |
+
"""Split sequences into train / val / test preserving temporal order."""
|
| 47 |
+
n = len(X)
|
| 48 |
+
t1 = int(n * train_pct)
|
| 49 |
+
t2 = int(n * (train_pct + val_pct))
|
| 50 |
+
|
| 51 |
+
return (
|
| 52 |
+
X[:t1], y[:t1],
|
| 53 |
+
X[t1:t2], y[t1:t2],
|
| 54 |
+
X[t2:], y[t2:],
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# ββ Feature scaling βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 59 |
+
|
| 60 |
+
def scale_features(X_train, X_val, X_test):
|
| 61 |
+
"""
|
| 62 |
+
Fit RobustScaler on training data only, apply to val and test.
|
| 63 |
+
Operates on the flattened feature dimension.
|
| 64 |
+
|
| 65 |
+
Returns scaled arrays with same shape as inputs.
|
| 66 |
+
"""
|
| 67 |
+
n_train, lb, n_feat = X_train.shape
|
| 68 |
+
scaler = RobustScaler()
|
| 69 |
+
|
| 70 |
+
# Fit on train
|
| 71 |
+
scaler.fit(X_train.reshape(-1, n_feat))
|
| 72 |
+
|
| 73 |
+
def _transform(X):
|
| 74 |
+
shape = X.shape
|
| 75 |
+
return scaler.transform(X.reshape(-1, n_feat)).reshape(shape)
|
| 76 |
+
|
| 77 |
+
return _transform(X_train), _transform(X_val), _transform(X_test), scaler
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
# ββ Label builder (classification: argmax of returns) ββββββββββββββββββββββββ
|
| 81 |
+
|
| 82 |
+
def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
|
| 83 |
+
"""
|
| 84 |
+
Convert raw return matrix to integer class labels.
|
| 85 |
+
|
| 86 |
+
If include_cash=True, adds a CASH class (index = n_etfs) when
|
| 87 |
+
the best ETF return is below cash_threshold.
|
| 88 |
+
|
| 89 |
+
Args:
|
| 90 |
+
y_raw : [n_samples, n_etfs]
|
| 91 |
+
include_cash : whether to allow CASH class
|
| 92 |
+
cash_threshold : minimum ETF return to prefer over CASH
|
| 93 |
+
|
| 94 |
+
Returns:
|
| 95 |
+
labels : [n_samples] integer class indices
|
| 96 |
+
"""
|
| 97 |
+
best = np.argmax(y_raw, axis=1)
|
| 98 |
+
if include_cash:
|
| 99 |
+
best_return = y_raw[np.arange(len(y_raw)), best]
|
| 100 |
+
cash_idx = y_raw.shape[1]
|
| 101 |
+
labels = np.where(best_return < cash_threshold, cash_idx, best)
|
| 102 |
+
else:
|
| 103 |
+
labels = best
|
| 104 |
+
return labels.astype(np.int32)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
# ββ Common Keras callbacks ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 108 |
+
|
| 109 |
+
def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6):
|
| 110 |
+
"""Standard early stopping + reduce-LR callbacks shared by all models."""
|
| 111 |
+
return [
|
| 112 |
+
keras.callbacks.EarlyStopping(
|
| 113 |
+
monitor="val_loss",
|
| 114 |
+
patience=patience_es,
|
| 115 |
+
restore_best_weights=True,
|
| 116 |
+
verbose=0,
|
| 117 |
+
),
|
| 118 |
+
keras.callbacks.ReduceLROnPlateau(
|
| 119 |
+
monitor="val_loss",
|
| 120 |
+
factor=0.5,
|
| 121 |
+
patience=patience_lr,
|
| 122 |
+
min_lr=min_lr,
|
| 123 |
+
verbose=0,
|
| 124 |
+
),
|
| 125 |
+
]
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
# ββ Common output head ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 129 |
+
|
| 130 |
+
def classification_head(x, n_classes: int, dropout: float = 0.3):
|
| 131 |
+
"""
|
| 132 |
+
Shared dense output head for all three CNN-LSTM variants.
|
| 133 |
+
|
| 134 |
+
Args:
|
| 135 |
+
x : input tensor
|
| 136 |
+
n_classes : number of ETF classes (+ 1 for CASH if applicable)
|
| 137 |
+
dropout : dropout rate
|
| 138 |
+
|
| 139 |
+
Returns:
|
| 140 |
+
output tensor with softmax activation
|
| 141 |
+
"""
|
| 142 |
+
x = keras.layers.Dense(64, activation="relu")(x)
|
| 143 |
+
x = keras.layers.Dropout(dropout)(x)
|
| 144 |
+
x = keras.layers.Dense(n_classes, activation="softmax")(x)
|
| 145 |
+
return x
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
# ββ Prediction helper βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 149 |
+
|
| 150 |
+
def predict_classes(model, X_test: np.ndarray) -> np.ndarray:
|
| 151 |
+
"""Return integer class predictions from a Keras model."""
|
| 152 |
+
proba = model.predict(X_test, verbose=0)
|
| 153 |
+
return np.argmax(proba, axis=1), proba
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
# ββ Metrics helper ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 157 |
+
|
| 158 |
+
def evaluate_returns(
|
| 159 |
+
preds: np.ndarray,
|
| 160 |
+
proba: np.ndarray,
|
| 161 |
+
y_raw_test: np.ndarray,
|
| 162 |
+
target_etfs: list,
|
| 163 |
+
tbill_rate: float,
|
| 164 |
+
fee_bps: int,
|
| 165 |
+
include_cash: bool = True,
|
| 166 |
+
):
|
| 167 |
+
"""
|
| 168 |
+
Given integer class predictions and raw return matrix,
|
| 169 |
+
compute strategy returns and summary metrics.
|
| 170 |
+
|
| 171 |
+
Returns:
|
| 172 |
+
strat_rets : np.ndarray of daily net returns
|
| 173 |
+
ann_return : annualised return (float)
|
| 174 |
+
cum_returns : cumulative return series
|
| 175 |
+
last_proba : probability vector for the last prediction
|
| 176 |
+
next_etf : name of ETF predicted for next session
|
| 177 |
+
"""
|
| 178 |
+
n_etfs = len(target_etfs)
|
| 179 |
+
strat_rets = []
|
| 180 |
+
|
| 181 |
+
for i, cls in enumerate(preds):
|
| 182 |
+
if include_cash and cls == n_etfs:
|
| 183 |
+
# CASH: earn daily T-bill rate
|
| 184 |
+
daily_tbill = tbill_rate / 252
|
| 185 |
+
net = daily_tbill - (fee_bps / 10000)
|
| 186 |
+
else:
|
| 187 |
+
ret = y_raw_test[i][cls]
|
| 188 |
+
net = ret - (fee_bps / 10000)
|
| 189 |
+
strat_rets.append(net)
|
| 190 |
+
|
| 191 |
+
strat_rets = np.array(strat_rets)
|
| 192 |
+
cum_returns = np.cumprod(1 + strat_rets)
|
| 193 |
+
ann_return = (cum_returns[-1] ** (252 / len(strat_rets))) - 1
|
| 194 |
+
|
| 195 |
+
last_proba = proba[-1]
|
| 196 |
+
next_cls = int(np.argmax(last_proba))
|
| 197 |
+
next_etf = "CASH" if (include_cash and next_cls == n_etfs) else target_etfs[next_cls].replace("_Ret", "")
|
| 198 |
+
|
| 199 |
+
return strat_rets, ann_return, cum_returns, last_proba, next_etf
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
models/approach1_wavelet.py
|
| 3 |
+
Approach 1: Wavelet Decomposition CNN-LSTM
|
| 4 |
+
|
| 5 |
+
Pipeline:
|
| 6 |
+
Raw macro signals
|
| 7 |
+
β DWT (db4, level=3) per signal β multi-band channel stack
|
| 8 |
+
β 1D CNN (64 filters, k=3) β MaxPool β (32 filters, k=3)
|
| 9 |
+
β LSTM (128 units)
|
| 10 |
+
β Dense 64 β Softmax (n_etfs + 1 CASH)
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import numpy as np
|
| 14 |
+
import pywt
|
| 15 |
+
import tensorflow as tf
|
| 16 |
+
from tensorflow import keras
|
| 17 |
+
from models.base import classification_head, get_callbacks
|
| 18 |
+
|
| 19 |
+
WAVELET = "db4"
|
| 20 |
+
LEVEL = 3
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# ββ Wavelet feature engineering βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
+
|
| 25 |
+
def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
|
| 26 |
+
"""
|
| 27 |
+
Decompose a 1-D signal into DWT subbands and return them stacked.
|
| 28 |
+
|
| 29 |
+
For a signal of length T:
|
| 30 |
+
coeffs = [cA_n, cD_n, cD_{n-1}, ..., cD_1]
|
| 31 |
+
We interpolate each subband back to length T so we can stack them.
|
| 32 |
+
|
| 33 |
+
Returns: array of shape [T, level+1]
|
| 34 |
+
"""
|
| 35 |
+
T = len(signal)
|
| 36 |
+
coeffs = pywt.wavedec(signal, wavelet, level=level)
|
| 37 |
+
bands = []
|
| 38 |
+
for c in coeffs:
|
| 39 |
+
# Interpolate back to original length
|
| 40 |
+
band = np.interp(
|
| 41 |
+
np.linspace(0, len(c) - 1, T),
|
| 42 |
+
np.arange(len(c)),
|
| 43 |
+
c,
|
| 44 |
+
)
|
| 45 |
+
bands.append(band)
|
| 46 |
+
return np.stack(bands, axis=-1) # [T, level+1]
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def apply_wavelet_transform(X: np.ndarray, wavelet: str = WAVELET, level: int = LEVEL) -> np.ndarray:
|
| 50 |
+
"""
|
| 51 |
+
Apply DWT to every feature channel across all samples.
|
| 52 |
+
|
| 53 |
+
Args:
|
| 54 |
+
X : [n_samples, lookback, n_features]
|
| 55 |
+
|
| 56 |
+
Returns:
|
| 57 |
+
X_wt : [n_samples, lookback, n_features * (level+1)]
|
| 58 |
+
"""
|
| 59 |
+
n_samples, lookback, n_features = X.shape
|
| 60 |
+
n_bands = level + 1
|
| 61 |
+
X_wt = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32)
|
| 62 |
+
|
| 63 |
+
for s in range(n_samples):
|
| 64 |
+
for f in range(n_features):
|
| 65 |
+
decomposed = _wavelet_decompose_signal(X[s, :, f], wavelet, level) # [T, n_bands]
|
| 66 |
+
start = f * n_bands
|
| 67 |
+
X_wt[s, :, start: start + n_bands] = decomposed
|
| 68 |
+
|
| 69 |
+
return X_wt
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# ββ Model builder βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 73 |
+
|
| 74 |
+
def build_wavelet_cnn_lstm(
|
| 75 |
+
input_shape: tuple,
|
| 76 |
+
n_classes: int,
|
| 77 |
+
dropout: float = 0.3,
|
| 78 |
+
lstm_units: int = 128,
|
| 79 |
+
) -> keras.Model:
|
| 80 |
+
"""
|
| 81 |
+
Build Wavelet CNN-LSTM model.
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
input_shape : (lookback, n_features * n_bands) β post-DWT shape
|
| 85 |
+
n_classes : number of output classes (ETFs + CASH)
|
| 86 |
+
dropout : dropout rate
|
| 87 |
+
lstm_units : LSTM hidden size
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
Compiled Keras model
|
| 91 |
+
"""
|
| 92 |
+
inputs = keras.Input(shape=input_shape, name="wavelet_input")
|
| 93 |
+
|
| 94 |
+
# CNN block 1
|
| 95 |
+
x = keras.layers.Conv1D(64, kernel_size=3, padding="causal", activation="relu")(inputs)
|
| 96 |
+
x = keras.layers.BatchNormalization()(x)
|
| 97 |
+
x = keras.layers.MaxPooling1D(pool_size=2)(x)
|
| 98 |
+
|
| 99 |
+
# CNN block 2
|
| 100 |
+
x = keras.layers.Conv1D(32, kernel_size=3, padding="causal", activation="relu")(x)
|
| 101 |
+
x = keras.layers.BatchNormalization()(x)
|
| 102 |
+
x = keras.layers.Dropout(dropout)(x)
|
| 103 |
+
|
| 104 |
+
# LSTM
|
| 105 |
+
x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x)
|
| 106 |
+
|
| 107 |
+
# Output head
|
| 108 |
+
outputs = classification_head(x, n_classes, dropout)
|
| 109 |
+
|
| 110 |
+
model = keras.Model(inputs, outputs, name="Approach1_Wavelet_CNN_LSTM")
|
| 111 |
+
model.compile(
|
| 112 |
+
optimizer=keras.optimizers.Adam(learning_rate=1e-3),
|
| 113 |
+
loss="sparse_categorical_crossentropy",
|
| 114 |
+
metrics=["accuracy"],
|
| 115 |
+
)
|
| 116 |
+
return model
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
# ββ Full train pipeline βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 120 |
+
|
| 121 |
+
def train_approach1(
|
| 122 |
+
X_train, y_train,
|
| 123 |
+
X_val, y_val,
|
| 124 |
+
n_classes: int,
|
| 125 |
+
epochs: int = 100,
|
| 126 |
+
batch_size: int = 32,
|
| 127 |
+
dropout: float = 0.3,
|
| 128 |
+
lstm_units: int = 128,
|
| 129 |
+
):
|
| 130 |
+
"""
|
| 131 |
+
Apply wavelet transform then train the CNN-LSTM.
|
| 132 |
+
|
| 133 |
+
Args:
|
| 134 |
+
X_train/val : [n, lookback, n_features] (scaled, pre-wavelet)
|
| 135 |
+
y_train/val : [n] integer class labels
|
| 136 |
+
n_classes : total output classes
|
| 137 |
+
|
| 138 |
+
Returns:
|
| 139 |
+
model : trained Keras model
|
| 140 |
+
history : training history
|
| 141 |
+
wt_shape : post-DWT input shape (for inference)
|
| 142 |
+
"""
|
| 143 |
+
# Apply DWT
|
| 144 |
+
X_train_wt = apply_wavelet_transform(X_train)
|
| 145 |
+
X_val_wt = apply_wavelet_transform(X_val)
|
| 146 |
+
|
| 147 |
+
input_shape = X_train_wt.shape[1:] # (lookback, n_features * n_bands)
|
| 148 |
+
model = build_wavelet_cnn_lstm(input_shape, n_classes, dropout, lstm_units)
|
| 149 |
+
|
| 150 |
+
history = model.fit(
|
| 151 |
+
X_train_wt, y_train,
|
| 152 |
+
validation_data=(X_val_wt, y_val),
|
| 153 |
+
epochs=epochs,
|
| 154 |
+
batch_size=batch_size,
|
| 155 |
+
callbacks=get_callbacks(),
|
| 156 |
+
verbose=0,
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
return model, history, input_shape
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def predict_approach1(model, X_test: np.ndarray) -> tuple:
|
| 163 |
+
"""Apply DWT to test set then predict. Returns (class_preds, proba)."""
|
| 164 |
+
X_test_wt = apply_wavelet_transform(X_test)
|
| 165 |
+
proba = model.predict(X_test_wt, verbose=0)
|
| 166 |
+
preds = np.argmax(proba, axis=1)
|
| 167 |
+
return preds, proba
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
models/approach3_multiscale.py
|
| 3 |
+
Approach 3: Multi-Scale Parallel CNN-LSTM
|
| 4 |
+
|
| 5 |
+
Pipeline:
|
| 6 |
+
Raw macro signals
|
| 7 |
+
β 3 parallel CNN towers: kernel 3 (short), 7 (medium), 21 (long)
|
| 8 |
+
β Concatenate [96 features]
|
| 9 |
+
β LSTM (128 units)
|
| 10 |
+
β Dense 64 β Softmax (n_etfs + 1 CASH)
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import numpy as np
|
| 14 |
+
import tensorflow as tf
|
| 15 |
+
from tensorflow import keras
|
| 16 |
+
from models.base import classification_head, get_callbacks
|
| 17 |
+
|
| 18 |
+
# Kernel sizes represent: momentum (3d), weekly cycle (7d), monthly trend (21d)
|
| 19 |
+
KERNEL_SIZES = [3, 7, 21]
|
| 20 |
+
FILTERS_EACH = 32 # 32 Γ 3 towers = 96 concatenated features
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# ββ Model builder βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
+
|
| 25 |
+
def build_multiscale_cnn_lstm(
|
| 26 |
+
input_shape: tuple,
|
| 27 |
+
n_classes: int,
|
| 28 |
+
kernel_sizes: list = None,
|
| 29 |
+
filters: int = FILTERS_EACH,
|
| 30 |
+
dropout: float = 0.3,
|
| 31 |
+
lstm_units: int = 128,
|
| 32 |
+
) -> keras.Model:
|
| 33 |
+
"""
|
| 34 |
+
Multi-scale parallel CNN-LSTM.
|
| 35 |
+
|
| 36 |
+
Three CNN towers with different kernel sizes run in parallel on the
|
| 37 |
+
same input, capturing momentum, weekly cycle, and monthly trend
|
| 38 |
+
simultaneously. Their outputs are concatenated before the LSTM.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
input_shape : (lookback, n_features)
|
| 42 |
+
n_classes : number of output classes (ETFs + CASH)
|
| 43 |
+
kernel_sizes : list of kernel sizes for each tower
|
| 44 |
+
filters : number of Conv1D filters per tower
|
| 45 |
+
dropout : dropout rate
|
| 46 |
+
lstm_units : LSTM hidden size
|
| 47 |
+
|
| 48 |
+
Returns:
|
| 49 |
+
Compiled Keras model
|
| 50 |
+
"""
|
| 51 |
+
if kernel_sizes is None:
|
| 52 |
+
kernel_sizes = KERNEL_SIZES
|
| 53 |
+
|
| 54 |
+
inputs = keras.Input(shape=input_shape, name="multiscale_input")
|
| 55 |
+
|
| 56 |
+
towers = []
|
| 57 |
+
for k in kernel_sizes:
|
| 58 |
+
# Each tower: Conv β BN β Conv β BN β GlobalAvgPool
|
| 59 |
+
t = keras.layers.Conv1D(
|
| 60 |
+
filters, kernel_size=k, padding="causal", activation="relu",
|
| 61 |
+
name=f"conv1_k{k}"
|
| 62 |
+
)(inputs)
|
| 63 |
+
t = keras.layers.BatchNormalization(name=f"bn1_k{k}")(t)
|
| 64 |
+
t = keras.layers.Conv1D(
|
| 65 |
+
filters, kernel_size=k, padding="causal", activation="relu",
|
| 66 |
+
name=f"conv2_k{k}"
|
| 67 |
+
)(t)
|
| 68 |
+
t = keras.layers.BatchNormalization(name=f"bn2_k{k}")(t)
|
| 69 |
+
t = keras.layers.Dropout(dropout, name=f"drop_k{k}")(t)
|
| 70 |
+
towers.append(t)
|
| 71 |
+
|
| 72 |
+
# Concatenate along the feature dimension β keeps temporal axis intact for LSTM
|
| 73 |
+
if len(towers) > 1:
|
| 74 |
+
merged = keras.layers.Concatenate(axis=-1, name="tower_concat")(towers)
|
| 75 |
+
else:
|
| 76 |
+
merged = towers[0]
|
| 77 |
+
|
| 78 |
+
# LSTM integrates multi-scale temporal features
|
| 79 |
+
x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1, name="lstm")(merged)
|
| 80 |
+
|
| 81 |
+
# Output head
|
| 82 |
+
outputs = classification_head(x, n_classes, dropout)
|
| 83 |
+
|
| 84 |
+
model = keras.Model(inputs, outputs, name="Approach3_MultiScale_CNN_LSTM")
|
| 85 |
+
model.compile(
|
| 86 |
+
optimizer=keras.optimizers.Adam(learning_rate=1e-3),
|
| 87 |
+
loss="sparse_categorical_crossentropy",
|
| 88 |
+
metrics=["accuracy"],
|
| 89 |
+
)
|
| 90 |
+
return model
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
# ββ Full train pipeline βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 94 |
+
|
| 95 |
+
def train_approach3(
|
| 96 |
+
X_train, y_train,
|
| 97 |
+
X_val, y_val,
|
| 98 |
+
n_classes: int,
|
| 99 |
+
epochs: int = 100,
|
| 100 |
+
batch_size: int = 32,
|
| 101 |
+
dropout: float = 0.3,
|
| 102 |
+
lstm_units: int = 128,
|
| 103 |
+
kernel_sizes: list = None,
|
| 104 |
+
):
|
| 105 |
+
"""
|
| 106 |
+
Build and train the multi-scale CNN-LSTM.
|
| 107 |
+
|
| 108 |
+
Args:
|
| 109 |
+
X_train/val : [n, lookback, n_features]
|
| 110 |
+
y_train/val : [n] integer class labels
|
| 111 |
+
n_classes : total output classes
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
model : trained Keras model
|
| 115 |
+
history : training history
|
| 116 |
+
"""
|
| 117 |
+
if kernel_sizes is None:
|
| 118 |
+
kernel_sizes = KERNEL_SIZES
|
| 119 |
+
|
| 120 |
+
# Guard: lookback must be >= largest kernel
|
| 121 |
+
lookback = X_train.shape[1]
|
| 122 |
+
valid_kernels = [k for k in kernel_sizes if k <= lookback]
|
| 123 |
+
if not valid_kernels:
|
| 124 |
+
valid_kernels = [min(3, lookback)]
|
| 125 |
+
|
| 126 |
+
model = build_multiscale_cnn_lstm(
|
| 127 |
+
input_shape=X_train.shape[1:],
|
| 128 |
+
n_classes=n_classes,
|
| 129 |
+
kernel_sizes=valid_kernels,
|
| 130 |
+
dropout=dropout,
|
| 131 |
+
lstm_units=lstm_units,
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
history = model.fit(
|
| 135 |
+
X_train, y_train,
|
| 136 |
+
validation_data=(X_val, y_val),
|
| 137 |
+
epochs=epochs,
|
| 138 |
+
batch_size=batch_size,
|
| 139 |
+
callbacks=get_callbacks(),
|
| 140 |
+
verbose=0,
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
return model, history
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def predict_approach3(model, X_test: np.ndarray) -> tuple:
|
| 147 |
+
"""Predict on test set. Returns (class_preds, proba)."""
|
| 148 |
+
proba = model.predict(X_test, verbose=0)
|
| 149 |
+
preds = np.argmax(proba, axis=1)
|
| 150 |
+
return preds, proba
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/backtest.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
strategy/backtest.py
|
| 3 |
+
Strategy execution, performance metrics, and benchmark calculations.
|
| 4 |
+
Supports CASH as a class (earns T-bill rate when selected).
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pandas as pd
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# ββ Strategy execution ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 13 |
+
|
| 14 |
+
def execute_strategy(
|
| 15 |
+
preds: np.ndarray,
|
| 16 |
+
proba: np.ndarray,
|
| 17 |
+
y_raw_test: np.ndarray,
|
| 18 |
+
test_dates: pd.DatetimeIndex,
|
| 19 |
+
target_etfs: list,
|
| 20 |
+
fee_bps: int,
|
| 21 |
+
tbill_rate: float,
|
| 22 |
+
include_cash: bool = True,
|
| 23 |
+
) -> dict:
|
| 24 |
+
"""
|
| 25 |
+
Execute strategy from model predictions.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
preds : [n] integer class predictions
|
| 29 |
+
proba : [n, n_classes] softmax probabilities
|
| 30 |
+
y_raw_test : [n, n_etfs] actual next-day ETF returns
|
| 31 |
+
test_dates : DatetimeIndex aligned with y_raw_test
|
| 32 |
+
target_etfs : list of ETF return column names e.g. ["TLT_Ret", ...]
|
| 33 |
+
fee_bps : transaction fee in basis points
|
| 34 |
+
tbill_rate : annualised 3m T-bill rate (e.g. 0.045)
|
| 35 |
+
include_cash: whether CASH is a valid class (index = n_etfs)
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
dict with keys:
|
| 39 |
+
strat_rets, cum_returns, ann_return, sharpe,
|
| 40 |
+
hit_ratio, max_dd, max_daily_dd, cum_max,
|
| 41 |
+
audit_trail, next_signal, next_proba
|
| 42 |
+
"""
|
| 43 |
+
n_etfs = len(target_etfs)
|
| 44 |
+
daily_tbill = tbill_rate / 252
|
| 45 |
+
today = datetime.now().date()
|
| 46 |
+
|
| 47 |
+
strat_rets = []
|
| 48 |
+
audit_trail = []
|
| 49 |
+
|
| 50 |
+
for i, cls in enumerate(preds):
|
| 51 |
+
if include_cash and cls == n_etfs:
|
| 52 |
+
signal_etf = "CASH"
|
| 53 |
+
realized_ret = daily_tbill
|
| 54 |
+
else:
|
| 55 |
+
cls = min(cls, n_etfs - 1)
|
| 56 |
+
signal_etf = target_etfs[cls].replace("_Ret", "")
|
| 57 |
+
realized_ret = float(y_raw_test[i][cls])
|
| 58 |
+
|
| 59 |
+
net_ret = realized_ret - (fee_bps / 10000)
|
| 60 |
+
strat_rets.append(net_ret)
|
| 61 |
+
|
| 62 |
+
trade_date = test_dates[i]
|
| 63 |
+
if trade_date.date() < today:
|
| 64 |
+
audit_trail.append({
|
| 65 |
+
"Date": trade_date.strftime("%Y-%m-%d"),
|
| 66 |
+
"Signal": signal_etf,
|
| 67 |
+
"Realized": realized_ret,
|
| 68 |
+
"Net_Return": net_ret,
|
| 69 |
+
})
|
| 70 |
+
|
| 71 |
+
strat_rets = np.array(strat_rets, dtype=np.float64)
|
| 72 |
+
|
| 73 |
+
# Next signal (last prediction)
|
| 74 |
+
last_cls = int(preds[-1])
|
| 75 |
+
next_proba = proba[-1]
|
| 76 |
+
|
| 77 |
+
if include_cash and last_cls == n_etfs:
|
| 78 |
+
next_signal = "CASH"
|
| 79 |
+
else:
|
| 80 |
+
last_cls = min(last_cls, n_etfs - 1)
|
| 81 |
+
next_signal = target_etfs[last_cls].replace("_Ret", "")
|
| 82 |
+
|
| 83 |
+
metrics = _compute_metrics(strat_rets, tbill_rate)
|
| 84 |
+
|
| 85 |
+
return {
|
| 86 |
+
**metrics,
|
| 87 |
+
"strat_rets": strat_rets,
|
| 88 |
+
"audit_trail": audit_trail,
|
| 89 |
+
"next_signal": next_signal,
|
| 90 |
+
"next_proba": next_proba,
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# ββ Performance metrics βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 95 |
+
|
| 96 |
+
def _compute_metrics(strat_rets: np.ndarray, tbill_rate: float) -> dict:
|
| 97 |
+
if len(strat_rets) == 0:
|
| 98 |
+
return {}
|
| 99 |
+
|
| 100 |
+
cum_returns = np.cumprod(1 + strat_rets)
|
| 101 |
+
n = len(strat_rets)
|
| 102 |
+
ann_return = float(cum_returns[-1] ** (252 / n) - 1)
|
| 103 |
+
|
| 104 |
+
excess = strat_rets - tbill_rate / 252
|
| 105 |
+
sharpe = float(np.mean(excess) / (np.std(strat_rets) + 1e-9) * np.sqrt(252))
|
| 106 |
+
|
| 107 |
+
recent = strat_rets[-15:]
|
| 108 |
+
hit_ratio = float(np.mean(recent > 0))
|
| 109 |
+
|
| 110 |
+
cum_max = np.maximum.accumulate(cum_returns)
|
| 111 |
+
drawdown = (cum_returns - cum_max) / cum_max
|
| 112 |
+
max_dd = float(np.min(drawdown))
|
| 113 |
+
max_daily = float(np.min(strat_rets))
|
| 114 |
+
|
| 115 |
+
return {
|
| 116 |
+
"cum_returns": cum_returns,
|
| 117 |
+
"ann_return": ann_return,
|
| 118 |
+
"sharpe": sharpe,
|
| 119 |
+
"hit_ratio": hit_ratio,
|
| 120 |
+
"max_dd": max_dd,
|
| 121 |
+
"max_daily_dd":max_daily,
|
| 122 |
+
"cum_max": cum_max,
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def compute_benchmark_metrics(returns: np.ndarray, tbill_rate: float) -> dict:
|
| 127 |
+
"""Compute metrics for a benchmark return series."""
|
| 128 |
+
return _compute_metrics(returns, tbill_rate)
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
# ββ Winner selection ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 132 |
+
|
| 133 |
+
def select_winner(results: dict) -> str:
|
| 134 |
+
"""
|
| 135 |
+
Given a dict of {approach_name: result_dict}, return the approach name
|
| 136 |
+
with the highest annualised return (raw, not risk-adjusted).
|
| 137 |
+
|
| 138 |
+
Args:
|
| 139 |
+
results : {"Approach 1": {...}, "Approach 2": {...}, "Approach 3": {...}}
|
| 140 |
+
|
| 141 |
+
Returns:
|
| 142 |
+
winner_name : str
|
| 143 |
+
"""
|
| 144 |
+
best_name = None
|
| 145 |
+
best_return = -np.inf
|
| 146 |
+
|
| 147 |
+
for name, res in results.items():
|
| 148 |
+
if res is None:
|
| 149 |
+
continue
|
| 150 |
+
ret = res.get("ann_return", -np.inf)
|
| 151 |
+
if ret > best_return:
|
| 152 |
+
best_return = ret
|
| 153 |
+
best_name = name
|
| 154 |
+
|
| 155 |
+
return best_name
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
# ββ Comparison table ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 159 |
+
|
| 160 |
+
def build_comparison_table(results: dict, winner_name: str) -> pd.DataFrame:
|
| 161 |
+
"""
|
| 162 |
+
Build a summary DataFrame comparing all three approaches.
|
| 163 |
+
|
| 164 |
+
Args:
|
| 165 |
+
results : {name: result_dict}
|
| 166 |
+
winner_name : name of the winner
|
| 167 |
+
|
| 168 |
+
Returns:
|
| 169 |
+
pd.DataFrame with one row per approach
|
| 170 |
+
"""
|
| 171 |
+
rows = []
|
| 172 |
+
for name, res in results.items():
|
| 173 |
+
if res is None:
|
| 174 |
+
rows.append({
|
| 175 |
+
"Approach": name,
|
| 176 |
+
"Ann. Return": "N/A",
|
| 177 |
+
"Sharpe": "N/A",
|
| 178 |
+
"Hit Ratio (15d)":"N/A",
|
| 179 |
+
"Max Drawdown": "N/A",
|
| 180 |
+
"Winner": "",
|
| 181 |
+
})
|
| 182 |
+
continue
|
| 183 |
+
|
| 184 |
+
rows.append({
|
| 185 |
+
"Approach": name,
|
| 186 |
+
"Ann. Return": f"{res['ann_return']*100:.2f}%",
|
| 187 |
+
"Sharpe": f"{res['sharpe']:.2f}",
|
| 188 |
+
"Hit Ratio (15d)": f"{res['hit_ratio']*100:.0f}%",
|
| 189 |
+
"Max Drawdown": f"{res['max_dd']*100:.2f}%",
|
| 190 |
+
"Winner": "β WINNER" if name == winner_name else "",
|
| 191 |
+
})
|
| 192 |
+
|
| 193 |
+
return pd.DataFrame(rows)
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/conviction.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
signals/conviction.py
|
| 3 |
+
Signal conviction scoring via Z-score of model probabilities.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
CONVICTION_THRESHOLDS = {
|
| 10 |
+
"Very High": 2.0,
|
| 11 |
+
"High": 1.0,
|
| 12 |
+
"Moderate": 0.0,
|
| 13 |
+
# Below 0.0 β "Low"
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def compute_conviction(proba: np.ndarray, target_etfs: list, include_cash: bool = True) -> dict:
|
| 18 |
+
"""
|
| 19 |
+
Compute Z-score conviction for the selected signal.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
proba : 1-D softmax probability vector [n_classes]
|
| 23 |
+
target_etfs : list of ETF return column names (e.g. ["TLT_Ret", ...])
|
| 24 |
+
include_cash: whether CASH is the last class
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
dict with keys:
|
| 28 |
+
best_idx : int
|
| 29 |
+
best_name : str (ETF ticker or "CASH")
|
| 30 |
+
z_score : float
|
| 31 |
+
label : str ("Very High" / "High" / "Moderate" / "Low")
|
| 32 |
+
scores : np.ndarray (raw proba)
|
| 33 |
+
etf_names : list of display names
|
| 34 |
+
sorted_pairs : list of (name, score) sorted highβlow
|
| 35 |
+
"""
|
| 36 |
+
scores = np.array(proba, dtype=float)
|
| 37 |
+
best_idx = int(np.argmax(scores))
|
| 38 |
+
n_etfs = len(target_etfs)
|
| 39 |
+
|
| 40 |
+
# Display names
|
| 41 |
+
etf_names = [e.replace("_Ret", "") for e in target_etfs]
|
| 42 |
+
if include_cash:
|
| 43 |
+
etf_names = etf_names + ["CASH"]
|
| 44 |
+
|
| 45 |
+
best_name = etf_names[best_idx] if best_idx < len(etf_names) else "CASH"
|
| 46 |
+
|
| 47 |
+
# Z-score
|
| 48 |
+
mean = np.mean(scores)
|
| 49 |
+
std = np.std(scores)
|
| 50 |
+
z = float((scores[best_idx] - mean) / std) if std > 1e-9 else 0.0
|
| 51 |
+
|
| 52 |
+
# Label
|
| 53 |
+
label = "Low"
|
| 54 |
+
for lbl, threshold in CONVICTION_THRESHOLDS.items():
|
| 55 |
+
if z >= threshold:
|
| 56 |
+
label = lbl
|
| 57 |
+
break
|
| 58 |
+
|
| 59 |
+
# Sorted pairs for UI bar chart
|
| 60 |
+
sorted_pairs = sorted(
|
| 61 |
+
zip(etf_names, scores),
|
| 62 |
+
key=lambda x: x[1],
|
| 63 |
+
reverse=True,
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
return {
|
| 67 |
+
"best_idx": best_idx,
|
| 68 |
+
"best_name": best_name,
|
| 69 |
+
"z_score": z,
|
| 70 |
+
"label": label,
|
| 71 |
+
"scores": scores,
|
| 72 |
+
"etf_names": etf_names,
|
| 73 |
+
"sorted_pairs": sorted_pairs,
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def conviction_color(label: str) -> str:
|
| 78 |
+
"""Return hex accent colour for a conviction label."""
|
| 79 |
+
return {
|
| 80 |
+
"Very High": "#00b894",
|
| 81 |
+
"High": "#00cec9",
|
| 82 |
+
"Moderate": "#fdcb6e",
|
| 83 |
+
"Low": "#d63031",
|
| 84 |
+
}.get(label, "#888888")
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def conviction_icon(label: str) -> str:
|
| 88 |
+
return {
|
| 89 |
+
"Very High": "π’",
|
| 90 |
+
"High": "π’",
|
| 91 |
+
"Moderate": "π‘",
|
| 92 |
+
"Low": "π΄",
|
| 93 |
+
}.get(label, "βͺ")
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ui/components.py
|
| 3 |
+
Reusable Streamlit UI blocks:
|
| 4 |
+
- Freshness warning banner
|
| 5 |
+
- Next trading day signal banner
|
| 6 |
+
- Signal conviction panel
|
| 7 |
+
- Metrics row
|
| 8 |
+
- Audit trail table
|
| 9 |
+
- Comparison summary table
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import streamlit as st
|
| 13 |
+
import pandas as pd
|
| 14 |
+
import numpy as np
|
| 15 |
+
|
| 16 |
+
from signals.conviction import conviction_color, conviction_icon
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# ββ Freshness warning βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 20 |
+
|
| 21 |
+
def show_freshness_status(freshness: dict):
|
| 22 |
+
"""Display data freshness status. Stops app if data is stale."""
|
| 23 |
+
if freshness.get("fresh"):
|
| 24 |
+
st.success(freshness["message"])
|
| 25 |
+
else:
|
| 26 |
+
st.warning(freshness["message"])
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# ββ Next trading day banner βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
+
|
| 31 |
+
def show_signal_banner(next_signal: str, next_date, approach_name: str):
|
| 32 |
+
"""Large coloured banner showing the winning approach's next signal."""
|
| 33 |
+
is_cash = next_signal == "CASH"
|
| 34 |
+
bg = "linear-gradient(135deg, #2d3436 0%, #1a1a2e 100%)" if is_cash else \
|
| 35 |
+
"linear-gradient(135deg, #00d1b2 0%, #00a896 100%)"
|
| 36 |
+
|
| 37 |
+
st.markdown(f"""
|
| 38 |
+
<div style="background:{bg}; padding:25px; border-radius:15px;
|
| 39 |
+
text-align:center; box-shadow:0 8px 16px rgba(0,0,0,0.3);
|
| 40 |
+
margin:16px 0;">
|
| 41 |
+
<div style="color:rgba(255,255,255,0.7); font-size:12px;
|
| 42 |
+
letter-spacing:3px; margin-bottom:6px;">
|
| 43 |
+
{approach_name.upper()} Β· NEXT TRADING DAY SIGNAL
|
| 44 |
+
</div>
|
| 45 |
+
<h1 style="color:white; font-size:44px; margin:0 0 8px 0;
|
| 46 |
+
font-weight:800; text-shadow:2px 2px 4px rgba(0,0,0,0.3);">
|
| 47 |
+
π― {next_date.strftime('%Y-%m-%d')} β {next_signal}
|
| 48 |
+
</h1>
|
| 49 |
+
</div>
|
| 50 |
+
""", unsafe_allow_html=True)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# ββ Signal conviction panel βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 54 |
+
|
| 55 |
+
def show_conviction_panel(conviction: dict):
|
| 56 |
+
"""
|
| 57 |
+
White-background conviction panel with Z-score gauge and per-ETF bars.
|
| 58 |
+
Uses separate st.markdown calls per ETF row to avoid Streamlit HTML escaping.
|
| 59 |
+
"""
|
| 60 |
+
label = conviction["label"]
|
| 61 |
+
z_score = conviction["z_score"]
|
| 62 |
+
best_name = conviction["best_name"]
|
| 63 |
+
sorted_pairs = conviction["sorted_pairs"]
|
| 64 |
+
|
| 65 |
+
color = conviction_color(label)
|
| 66 |
+
icon = conviction_icon(label)
|
| 67 |
+
|
| 68 |
+
z_clipped = max(-3.0, min(3.0, z_score))
|
| 69 |
+
bar_pct = int((z_clipped + 3) / 6 * 100)
|
| 70 |
+
|
| 71 |
+
max_score = max(s for _, s in sorted_pairs) if sorted_pairs else 1.0
|
| 72 |
+
if max_score <= 0:
|
| 73 |
+
max_score = 1.0
|
| 74 |
+
|
| 75 |
+
# ββ Header + gauge ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 76 |
+
st.markdown(f"""
|
| 77 |
+
<div style="background:#ffffff; border:1px solid #ddd;
|
| 78 |
+
border-left:5px solid {color}; border-radius:12px 12px 0 0;
|
| 79 |
+
padding:18px 24px 12px 24px; margin:12px 0 0 0;
|
| 80 |
+
box-shadow:0 2px 8px rgba(0,0,0,0.07);">
|
| 81 |
+
|
| 82 |
+
<div style="display:flex; align-items:center; gap:12px;
|
| 83 |
+
margin-bottom:14px; flex-wrap:wrap;">
|
| 84 |
+
<span style="font-size:20px;">{icon}</span>
|
| 85 |
+
<span style="font-size:18px; font-weight:700; color:#1a1a1a;">Signal Conviction</span>
|
| 86 |
+
<span style="background:#f0f0f0; border:1px solid {color};
|
| 87 |
+
color:{color}; font-weight:700; font-size:14px;
|
| 88 |
+
padding:3px 12px; border-radius:8px;">
|
| 89 |
+
Z = {z_score:.2f} σ
|
| 90 |
+
</span>
|
| 91 |
+
<span style="margin-left:auto; background:{color}; color:#fff;
|
| 92 |
+
font-weight:700; padding:4px 16px;
|
| 93 |
+
border-radius:20px; font-size:13px;">
|
| 94 |
+
{label}
|
| 95 |
+
</span>
|
| 96 |
+
</div>
|
| 97 |
+
|
| 98 |
+
<div style="display:flex; justify-content:space-between;
|
| 99 |
+
font-size:11px; color:#999; margin-bottom:4px;">
|
| 100 |
+
<span>Weak −3σ</span>
|
| 101 |
+
<span>Neutral 0σ</span>
|
| 102 |
+
<span>Strong +3σ</span>
|
| 103 |
+
</div>
|
| 104 |
+
<div style="background:#f0f0f0; border-radius:8px; height:14px;
|
| 105 |
+
overflow:hidden; position:relative; border:1px solid #e0e0e0;
|
| 106 |
+
margin-bottom:14px;">
|
| 107 |
+
<div style="position:absolute; left:50%; top:0; width:2px;
|
| 108 |
+
height:100%; background:#ccc;"></div>
|
| 109 |
+
<div style="width:{bar_pct}%; height:100%;
|
| 110 |
+
background:linear-gradient(90deg,#fab1a0,{color});
|
| 111 |
+
border-radius:8px;"></div>
|
| 112 |
+
</div>
|
| 113 |
+
|
| 114 |
+
<div style="font-size:12px; color:#999; margin-bottom:2px;">
|
| 115 |
+
Model probability by ETF (ranked high → low):
|
| 116 |
+
</div>
|
| 117 |
+
</div>
|
| 118 |
+
""", unsafe_allow_html=True)
|
| 119 |
+
|
| 120 |
+
# ββ Per-ETF rows ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 121 |
+
for i, (name, score) in enumerate(sorted_pairs):
|
| 122 |
+
is_winner = (name == best_name)
|
| 123 |
+
is_last = (i == len(sorted_pairs) - 1)
|
| 124 |
+
bar_w = int(score / max_score * 100)
|
| 125 |
+
name_style = "font-weight:700; color:#00897b;" if is_winner else "color:#444;"
|
| 126 |
+
bar_color = color if is_winner else "#b2dfdb" if score > max_score * 0.5 else "#e0e0e0"
|
| 127 |
+
star = " β
" if is_winner else ""
|
| 128 |
+
bottom_r = "0 0 12px 12px" if is_last else "0"
|
| 129 |
+
border_bot = "border-bottom:1px solid #f0f0f0;" if not is_last else ""
|
| 130 |
+
|
| 131 |
+
st.markdown(f"""
|
| 132 |
+
<div style="background:#ffffff; border:1px solid #ddd; border-top:none;
|
| 133 |
+
border-radius:{bottom_r}; padding:7px 24px; {border_bot}
|
| 134 |
+
box-shadow:0 2px 8px rgba(0,0,0,0.07);">
|
| 135 |
+
<div style="display:flex; align-items:center; gap:12px;">
|
| 136 |
+
<span style="width:44px; text-align:right; font-size:13px; {name_style}">{name}{star}</span>
|
| 137 |
+
<div style="flex:1; background:#f5f5f5; border-radius:4px;
|
| 138 |
+
height:14px; overflow:hidden; border:1px solid #e8e8e8;">
|
| 139 |
+
<div style="width:{bar_w}%; height:100%;
|
| 140 |
+
background:{bar_color}; border-radius:4px;"></div>
|
| 141 |
+
</div>
|
| 142 |
+
<span style="width:56px; font-size:12px; color:#888; text-align:right;">{score:.4f}</span>
|
| 143 |
+
</div>
|
| 144 |
+
</div>
|
| 145 |
+
""", unsafe_allow_html=True)
|
| 146 |
+
|
| 147 |
+
st.caption(
|
| 148 |
+
"Z-score = std deviations the top ETF's probability sits above the mean of all ETF probabilities. "
|
| 149 |
+
"Higher β model is more decisive."
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
# ββ Metrics row βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 154 |
+
|
| 155 |
+
def show_metrics_row(result: dict, tbill_rate: float):
|
| 156 |
+
"""Five-column metric display."""
|
| 157 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
| 158 |
+
|
| 159 |
+
col1.metric(
|
| 160 |
+
"π Annualised Return",
|
| 161 |
+
f"{result['ann_return']*100:.2f}%",
|
| 162 |
+
delta=f"vs T-bill: {(result['ann_return'] - tbill_rate)*100:.2f}%",
|
| 163 |
+
)
|
| 164 |
+
col2.metric(
|
| 165 |
+
"π Sharpe Ratio",
|
| 166 |
+
f"{result['sharpe']:.2f}",
|
| 167 |
+
delta="Risk-Adjusted" if result['sharpe'] > 1 else "Below Threshold",
|
| 168 |
+
)
|
| 169 |
+
col3.metric(
|
| 170 |
+
"π― Hit Ratio (15d)",
|
| 171 |
+
f"{result['hit_ratio']*100:.0f}%",
|
| 172 |
+
delta="Strong" if result['hit_ratio'] > 0.6 else "Weak",
|
| 173 |
+
)
|
| 174 |
+
col4.metric(
|
| 175 |
+
"π Max Drawdown",
|
| 176 |
+
f"{result['max_dd']*100:.2f}%",
|
| 177 |
+
delta="Peak to Trough",
|
| 178 |
+
)
|
| 179 |
+
col5.metric(
|
| 180 |
+
"β οΈ Max Daily DD",
|
| 181 |
+
f"{result['max_daily_dd']*100:.2f}%",
|
| 182 |
+
delta="Worst Day",
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
# ββ Comparison table ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 187 |
+
|
| 188 |
+
def show_comparison_table(comparison_df: pd.DataFrame):
|
| 189 |
+
"""Styled comparison table for all three approaches."""
|
| 190 |
+
def highlight_winner(row):
|
| 191 |
+
if "WINNER" in str(row.get("Winner", "")):
|
| 192 |
+
return ["background-color: rgba(0,200,150,0.15); font-weight:bold"] * len(row)
|
| 193 |
+
return [""] * len(row)
|
| 194 |
+
|
| 195 |
+
styled = comparison_df.style.apply(highlight_winner, axis=1).set_properties(**{
|
| 196 |
+
"text-align": "center",
|
| 197 |
+
"font-size": "14px",
|
| 198 |
+
}).set_table_styles([
|
| 199 |
+
{"selector": "th", "props": [("font-size", "14px"), ("font-weight", "bold"),
|
| 200 |
+
("text-align", "center")]},
|
| 201 |
+
{"selector": "td", "props": [("padding", "10px")]},
|
| 202 |
+
])
|
| 203 |
+
st.dataframe(styled, use_container_width=True)
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
# ββ Audit trail βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 207 |
+
|
| 208 |
+
def show_audit_trail(audit_trail: list):
|
| 209 |
+
"""Last 20 days styled audit trail."""
|
| 210 |
+
if not audit_trail:
|
| 211 |
+
st.info("No audit trail data available.")
|
| 212 |
+
return
|
| 213 |
+
|
| 214 |
+
df = pd.DataFrame(audit_trail).tail(20)[["Date", "Signal", "Net_Return"]]
|
| 215 |
+
|
| 216 |
+
def color_return(val):
|
| 217 |
+
return "color: #00c896; font-weight:bold" if val > 0 else "color: #ff4b4b; font-weight:bold"
|
| 218 |
+
|
| 219 |
+
styled = df.style.applymap(color_return, subset=["Net_Return"]).format(
|
| 220 |
+
{"Net_Return": "{:.2%}"}
|
| 221 |
+
).set_properties(**{
|
| 222 |
+
"font-size": "16px",
|
| 223 |
+
"text-align": "center",
|
| 224 |
+
}).set_table_styles([
|
| 225 |
+
{"selector": "th", "props": [("font-size", "16px"), ("font-weight", "bold"),
|
| 226 |
+
("text-align", "center")]},
|
| 227 |
+
{"selector": "td", "props": [("padding", "10px")]},
|
| 228 |
+
])
|
| 229 |
+
st.dataframe(styled, use_container_width=True, height=500)
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/charts.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ui/charts.py
|
| 3 |
+
All Plotly chart builders for the Streamlit UI.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import plotly.graph_objects as go
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
APPROACH_COLOURS = {
|
| 12 |
+
"Approach 1": "#00ffc8",
|
| 13 |
+
"Approach 2": "#7c6aff",
|
| 14 |
+
"Approach 3": "#ff6b6b",
|
| 15 |
+
}
|
| 16 |
+
BENCHMARK_COLOURS = {
|
| 17 |
+
"SPY": "#ff4b4b",
|
| 18 |
+
"AGG": "#ffa500",
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def equity_curve_chart(
|
| 23 |
+
results: dict,
|
| 24 |
+
winner_name: str,
|
| 25 |
+
plot_dates: pd.DatetimeIndex,
|
| 26 |
+
df: pd.DataFrame,
|
| 27 |
+
test_slice: slice,
|
| 28 |
+
tbill_rate: float,
|
| 29 |
+
) -> go.Figure:
|
| 30 |
+
"""
|
| 31 |
+
Equity curve chart showing all three approaches + SPY + AGG benchmarks.
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
results : {approach_name: result_dict}
|
| 35 |
+
winner_name : highlighted approach
|
| 36 |
+
plot_dates : DatetimeIndex for x-axis
|
| 37 |
+
df : full DataFrame (for benchmark columns)
|
| 38 |
+
test_slice : slice object to extract test-period benchmark returns
|
| 39 |
+
tbill_rate : for benchmark metric calculation
|
| 40 |
+
"""
|
| 41 |
+
from strategy.backtest import compute_benchmark_metrics
|
| 42 |
+
|
| 43 |
+
fig = go.Figure()
|
| 44 |
+
|
| 45 |
+
# ββ Strategy lines ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 46 |
+
for name, res in results.items():
|
| 47 |
+
if res is None:
|
| 48 |
+
continue
|
| 49 |
+
colour = APPROACH_COLOURS.get(name, "#aaaaaa")
|
| 50 |
+
width = 3 if name == winner_name else 1.5
|
| 51 |
+
dash = "solid" if name == winner_name else "dot"
|
| 52 |
+
|
| 53 |
+
n = min(len(res["cum_returns"]), len(plot_dates))
|
| 54 |
+
|
| 55 |
+
fig.add_trace(go.Scatter(
|
| 56 |
+
x=plot_dates[:n],
|
| 57 |
+
y=res["cum_returns"][:n],
|
| 58 |
+
mode="lines",
|
| 59 |
+
name=f"{name} {'β
' if name == winner_name else ''}",
|
| 60 |
+
line=dict(color=colour, width=width, dash=dash),
|
| 61 |
+
fill="tozeroy" if name == winner_name else None,
|
| 62 |
+
fillcolor=f"rgba({_hex_to_rgb(colour)},0.07)" if name == winner_name else None,
|
| 63 |
+
))
|
| 64 |
+
|
| 65 |
+
# ββ Benchmark: SPY ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 66 |
+
if "SPY_Ret" in df.columns:
|
| 67 |
+
spy_rets = df["SPY_Ret"].iloc[test_slice].values
|
| 68 |
+
n = min(len(spy_rets), len(plot_dates))
|
| 69 |
+
spy_m = compute_benchmark_metrics(spy_rets[:n], tbill_rate)
|
| 70 |
+
fig.add_trace(go.Scatter(
|
| 71 |
+
x=plot_dates[:n],
|
| 72 |
+
y=spy_m["cum_returns"],
|
| 73 |
+
mode="lines",
|
| 74 |
+
name="SPY (Equity BM)",
|
| 75 |
+
line=dict(color=BENCHMARK_COLOURS["SPY"], width=1.5, dash="dot"),
|
| 76 |
+
))
|
| 77 |
+
|
| 78 |
+
# ββ Benchmark: AGG ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 79 |
+
if "AGG_Ret" in df.columns:
|
| 80 |
+
agg_rets = df["AGG_Ret"].iloc[test_slice].values
|
| 81 |
+
n = min(len(agg_rets), len(plot_dates))
|
| 82 |
+
agg_m = compute_benchmark_metrics(agg_rets[:n], tbill_rate)
|
| 83 |
+
fig.add_trace(go.Scatter(
|
| 84 |
+
x=plot_dates[:n],
|
| 85 |
+
y=agg_m["cum_returns"],
|
| 86 |
+
mode="lines",
|
| 87 |
+
name="AGG (Bond BM)",
|
| 88 |
+
line=dict(color=BENCHMARK_COLOURS["AGG"], width=1.5, dash="dot"),
|
| 89 |
+
))
|
| 90 |
+
|
| 91 |
+
fig.update_layout(
|
| 92 |
+
template="plotly_dark",
|
| 93 |
+
height=460,
|
| 94 |
+
hovermode="x unified",
|
| 95 |
+
showlegend=True,
|
| 96 |
+
legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01, font=dict(size=11)),
|
| 97 |
+
xaxis_title="Date",
|
| 98 |
+
yaxis_title="Cumulative Return (Γ)",
|
| 99 |
+
margin=dict(l=50, r=30, t=20, b=50),
|
| 100 |
+
)
|
| 101 |
+
return fig
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def comparison_bar_chart(results: dict, winner_name: str) -> go.Figure:
|
| 105 |
+
"""
|
| 106 |
+
Horizontal bar chart comparing annualised returns across all three approaches.
|
| 107 |
+
"""
|
| 108 |
+
names = []
|
| 109 |
+
returns = []
|
| 110 |
+
colours = []
|
| 111 |
+
|
| 112 |
+
for name, res in results.items():
|
| 113 |
+
if res is None:
|
| 114 |
+
continue
|
| 115 |
+
names.append(name)
|
| 116 |
+
returns.append(res["ann_return"] * 100)
|
| 117 |
+
colours.append(APPROACH_COLOURS.get(name, "#aaaaaa"))
|
| 118 |
+
|
| 119 |
+
fig = go.Figure(go.Bar(
|
| 120 |
+
x=returns,
|
| 121 |
+
y=names,
|
| 122 |
+
orientation="h",
|
| 123 |
+
marker_color=colours,
|
| 124 |
+
text=[f"{r:.1f}%" for r in returns],
|
| 125 |
+
textposition="auto",
|
| 126 |
+
))
|
| 127 |
+
|
| 128 |
+
fig.update_layout(
|
| 129 |
+
template="plotly_dark",
|
| 130 |
+
height=200,
|
| 131 |
+
xaxis_title="Annualised Return (%)",
|
| 132 |
+
margin=dict(l=100, r=30, t=10, b=40),
|
| 133 |
+
showlegend=False,
|
| 134 |
+
)
|
| 135 |
+
return fig
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# ββ Helper ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 139 |
+
|
| 140 |
+
def _hex_to_rgb(hex_color: str) -> str:
|
| 141 |
+
"""Convert #rrggbb to 'r,g,b' string for rgba()."""
|
| 142 |
+
h = hex_color.lstrip("#")
|
| 143 |
+
r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
|
| 144 |
+
return f"{r},{g},{b}"
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/calendar.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
utils/calendar.py
|
| 3 |
+
NYSE calendar utilities:
|
| 4 |
+
- Next trading day for signal display
|
| 5 |
+
- Market open check
|
| 6 |
+
- EST time helper
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from datetime import datetime, timedelta
|
| 10 |
+
import pytz
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
import pandas_market_calendars as mcal
|
| 14 |
+
NYSE_CAL_AVAILABLE = True
|
| 15 |
+
except ImportError:
|
| 16 |
+
NYSE_CAL_AVAILABLE = False
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def get_est_time() -> datetime:
|
| 20 |
+
"""Return current datetime in US/Eastern timezone."""
|
| 21 |
+
return datetime.now(pytz.timezone("US/Eastern"))
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def is_market_open_today() -> bool:
|
| 25 |
+
"""Return True if today is a NYSE trading day."""
|
| 26 |
+
today = get_est_time().date()
|
| 27 |
+
if NYSE_CAL_AVAILABLE:
|
| 28 |
+
try:
|
| 29 |
+
nyse = mcal.get_calendar("NYSE")
|
| 30 |
+
schedule = nyse.schedule(start_date=today, end_date=today)
|
| 31 |
+
return len(schedule) > 0
|
| 32 |
+
except Exception:
|
| 33 |
+
pass
|
| 34 |
+
return today.weekday() < 5
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def get_next_signal_date() -> datetime.date:
|
| 38 |
+
"""
|
| 39 |
+
Determine the date for which the model's signal applies.
|
| 40 |
+
|
| 41 |
+
Rules:
|
| 42 |
+
- If today is a NYSE trading day AND it is before 09:30 EST
|
| 43 |
+
β signal applies to TODAY (market hasn't opened yet)
|
| 44 |
+
- Otherwise
|
| 45 |
+
β signal applies to the NEXT NYSE trading day
|
| 46 |
+
"""
|
| 47 |
+
now_est = get_est_time()
|
| 48 |
+
today = now_est.date()
|
| 49 |
+
|
| 50 |
+
market_not_open_yet = (
|
| 51 |
+
now_est.hour < 9 or
|
| 52 |
+
(now_est.hour == 9 and now_est.minute < 30)
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
if NYSE_CAL_AVAILABLE:
|
| 56 |
+
try:
|
| 57 |
+
nyse = mcal.get_calendar("NYSE")
|
| 58 |
+
schedule = nyse.schedule(
|
| 59 |
+
start_date=today,
|
| 60 |
+
end_date=today + timedelta(days=10),
|
| 61 |
+
)
|
| 62 |
+
if len(schedule) == 0:
|
| 63 |
+
return today # fallback
|
| 64 |
+
|
| 65 |
+
first_day = schedule.index[0].date()
|
| 66 |
+
|
| 67 |
+
# Today is a trading day and market hasn't opened β today
|
| 68 |
+
if first_day == today and market_not_open_yet:
|
| 69 |
+
return today
|
| 70 |
+
|
| 71 |
+
# Otherwise find first trading day strictly after today
|
| 72 |
+
for ts in schedule.index:
|
| 73 |
+
d = ts.date()
|
| 74 |
+
if d > today:
|
| 75 |
+
return d
|
| 76 |
+
|
| 77 |
+
return schedule.index[-1].date()
|
| 78 |
+
except Exception:
|
| 79 |
+
pass
|
| 80 |
+
|
| 81 |
+
# Fallback: simple weekend skip
|
| 82 |
+
candidate = today if market_not_open_yet else today + timedelta(days=1)
|
| 83 |
+
while candidate.weekday() >= 5:
|
| 84 |
+
candidate += timedelta(days=1)
|
| 85 |
+
return candidate
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def is_sync_window() -> bool:
|
| 89 |
+
"""True if current EST time is in the 07:00-08:00 or 19:00-20:00 window."""
|
| 90 |
+
now = get_est_time()
|
| 91 |
+
return (7 <= now.hour < 8) or (19 <= now.hour < 20)
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/models/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# models package
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
# strategy package
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/strategy/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
# strategy package
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/signals/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
#
|
|
|
|
| 1 |
+
# signals package
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
# ui package
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/utils/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
# utils package
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py
CHANGED
|
@@ -1 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
models/approach2_regime.py
|
| 3 |
+
Approach 2: Regime-Conditioned CNN-LSTM
|
| 4 |
|
| 5 |
+
Pipeline:
|
| 6 |
+
Raw macro signals
|
| 7 |
+
-> CNN Tower (64 filters, k=3) -> feature vector
|
| 8 |
+
-> Regime Classifier (HMM on VIX + HY spread + T10Y2Y) -> one-hot [4]
|
| 9 |
+
-> Concatenate CNN features + regime embedding
|
| 10 |
+
-> LSTM (128 units)
|
| 11 |
+
-> Dense 64 -> Softmax (n_etfs + 1 CASH)
|
| 12 |
+
|
| 13 |
+
NOTE: tensorflow and hmmlearn are imported lazily inside functions
|
| 14 |
+
to prevent module-level import failures from making this module
|
| 15 |
+
appear broken to Python's import system.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import numpy as np
|
| 19 |
+
|
| 20 |
+
N_REGIMES = 4
|
| 21 |
+
REGIME_HINTS = ["VIX", "HY", "Spread", "T10Y2Y", "T10Y3M", "Credit"]
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# ---------------------------------------------------------------------------
|
| 25 |
+
# Regime detection helpers
|
| 26 |
+
# ---------------------------------------------------------------------------
|
| 27 |
+
|
| 28 |
+
def _get_regime_cols(feature_names: list) -> list:
|
| 29 |
+
return [
|
| 30 |
+
f for f in feature_names
|
| 31 |
+
if any(hint.lower() in f.lower() for hint in REGIME_HINTS)
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def fit_regime_model(X_flat: np.ndarray, feature_names: list,
|
| 36 |
+
n_regimes: int = N_REGIMES):
|
| 37 |
+
"""
|
| 38 |
+
Fit a Gaussian HMM on regime-relevant macro features.
|
| 39 |
+
Returns (hmm_model, regime_cols_idx).
|
| 40 |
+
hmm_model is None if hmmlearn is unavailable or fitting fails.
|
| 41 |
+
"""
|
| 42 |
+
regime_col_names = _get_regime_cols(feature_names)
|
| 43 |
+
if not regime_col_names:
|
| 44 |
+
regime_col_names = feature_names[:min(3, len(feature_names))]
|
| 45 |
+
|
| 46 |
+
regime_cols_idx = [
|
| 47 |
+
feature_names.index(c) for c in regime_col_names
|
| 48 |
+
if c in feature_names
|
| 49 |
+
]
|
| 50 |
+
X_regime = X_flat[:, regime_cols_idx]
|
| 51 |
+
|
| 52 |
+
try:
|
| 53 |
+
from hmmlearn.hmm import GaussianHMM
|
| 54 |
+
hmm = GaussianHMM(
|
| 55 |
+
n_components=n_regimes,
|
| 56 |
+
covariance_type="diag",
|
| 57 |
+
n_iter=100,
|
| 58 |
+
random_state=42,
|
| 59 |
+
)
|
| 60 |
+
hmm.fit(X_regime)
|
| 61 |
+
return hmm, regime_cols_idx
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"[Approach 2] HMM fitting failed: {e}. Using fallback.")
|
| 64 |
+
return None, regime_cols_idx
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def predict_regimes(hmm_model, X_flat: np.ndarray,
|
| 68 |
+
regime_cols_idx: list,
|
| 69 |
+
n_regimes: int = N_REGIMES) -> np.ndarray:
|
| 70 |
+
"""Predict integer regime label for each day."""
|
| 71 |
+
X_regime = X_flat[:, regime_cols_idx]
|
| 72 |
+
|
| 73 |
+
if hmm_model is not None:
|
| 74 |
+
try:
|
| 75 |
+
return hmm_model.predict(X_regime)
|
| 76 |
+
except Exception:
|
| 77 |
+
pass
|
| 78 |
+
|
| 79 |
+
# Fallback: quantile binning on first regime feature
|
| 80 |
+
feat = X_regime[:, 0]
|
| 81 |
+
quantiles = np.percentile(feat, np.linspace(0, 100, n_regimes + 1))
|
| 82 |
+
return np.digitize(feat, quantiles[1:-1]).astype(int)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def regimes_to_onehot(regimes: np.ndarray,
|
| 86 |
+
n_regimes: int = N_REGIMES) -> np.ndarray:
|
| 87 |
+
one_hot = np.zeros((len(regimes), n_regimes), dtype=np.float32)
|
| 88 |
+
for i, r in enumerate(regimes):
|
| 89 |
+
one_hot[i, min(int(r), n_regimes - 1)] = 1.0
|
| 90 |
+
return one_hot
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def build_regime_sequences(X_seq: np.ndarray,
|
| 94 |
+
regimes_flat: np.ndarray,
|
| 95 |
+
lookback: int) -> np.ndarray:
|
| 96 |
+
n_samples = X_seq.shape[0]
|
| 97 |
+
aligned = regimes_flat[lookback: lookback + n_samples]
|
| 98 |
+
return regimes_to_onehot(aligned)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# ---------------------------------------------------------------------------
|
| 102 |
+
# Model builder
|
| 103 |
+
# ---------------------------------------------------------------------------
|
| 104 |
+
|
| 105 |
+
def build_regime_cnn_lstm(seq_input_shape: tuple,
|
| 106 |
+
n_classes: int,
|
| 107 |
+
n_regimes: int = N_REGIMES,
|
| 108 |
+
dropout: float = 0.3,
|
| 109 |
+
lstm_units: int = 128):
|
| 110 |
+
"""Build and compile the regime-conditioned CNN-LSTM model."""
|
| 111 |
+
from tensorflow import keras
|
| 112 |
+
from models.base import classification_head
|
| 113 |
+
|
| 114 |
+
seq_input = keras.Input(shape=seq_input_shape, name="seq_input")
|
| 115 |
+
x = keras.layers.Conv1D(64, kernel_size=3, padding="causal",
|
| 116 |
+
activation="relu")(seq_input)
|
| 117 |
+
x = keras.layers.BatchNormalization()(x)
|
| 118 |
+
x = keras.layers.MaxPooling1D(pool_size=2)(x)
|
| 119 |
+
x = keras.layers.Conv1D(32, kernel_size=3, padding="causal",
|
| 120 |
+
activation="relu")(x)
|
| 121 |
+
x = keras.layers.BatchNormalization()(x)
|
| 122 |
+
x = keras.layers.Dropout(dropout)(x)
|
| 123 |
+
cnn_out = keras.layers.GlobalAveragePooling1D()(x)
|
| 124 |
+
|
| 125 |
+
regime_input = keras.Input(shape=(n_regimes,), name="regime_input")
|
| 126 |
+
regime_emb = keras.layers.Dense(8, activation="relu")(regime_input)
|
| 127 |
+
|
| 128 |
+
merged = keras.layers.Concatenate()([cnn_out, regime_emb])
|
| 129 |
+
x = keras.layers.Reshape((1, merged.shape[-1]))(merged)
|
| 130 |
+
x = keras.layers.LSTM(lstm_units, dropout=dropout)(x)
|
| 131 |
+
|
| 132 |
+
outputs = classification_head(x, n_classes, dropout)
|
| 133 |
+
|
| 134 |
+
model = keras.Model(
|
| 135 |
+
inputs=[seq_input, regime_input],
|
| 136 |
+
outputs=outputs,
|
| 137 |
+
name="Approach2_Regime_CNN_LSTM",
|
| 138 |
+
)
|
| 139 |
+
model.compile(
|
| 140 |
+
optimizer=keras.optimizers.Adam(learning_rate=1e-3),
|
| 141 |
+
loss="sparse_categorical_crossentropy",
|
| 142 |
+
metrics=["accuracy"],
|
| 143 |
+
)
|
| 144 |
+
return model
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
# ---------------------------------------------------------------------------
|
| 148 |
+
# Training pipeline
|
| 149 |
+
# ---------------------------------------------------------------------------
|
| 150 |
+
|
| 151 |
+
def train_approach2(
|
| 152 |
+
X_train, y_train,
|
| 153 |
+
X_val, y_val,
|
| 154 |
+
X_flat_all: np.ndarray,
|
| 155 |
+
feature_names: list,
|
| 156 |
+
lookback: int,
|
| 157 |
+
train_size: int,
|
| 158 |
+
val_size: int,
|
| 159 |
+
n_classes: int,
|
| 160 |
+
epochs: int = 100,
|
| 161 |
+
batch_size: int = 32,
|
| 162 |
+
dropout: float = 0.3,
|
| 163 |
+
lstm_units: int = 128,
|
| 164 |
+
):
|
| 165 |
+
"""
|
| 166 |
+
Fit HMM regime model then train the regime-conditioned CNN-LSTM.
|
| 167 |
+
Returns: model, history, hmm_model, regime_cols_idx
|
| 168 |
+
"""
|
| 169 |
+
from models.base import get_callbacks
|
| 170 |
+
|
| 171 |
+
X_flat_train = X_flat_all[:train_size + lookback]
|
| 172 |
+
hmm_model, regime_cols_idx = fit_regime_model(X_flat_train, feature_names)
|
| 173 |
+
|
| 174 |
+
regimes_all = predict_regimes(hmm_model, X_flat_all, regime_cols_idx)
|
| 175 |
+
|
| 176 |
+
R_train = build_regime_sequences(X_train, regimes_all, lookback)
|
| 177 |
+
R_val = build_regime_sequences(X_val, regimes_all, lookback + train_size)
|
| 178 |
+
|
| 179 |
+
model = build_regime_cnn_lstm(
|
| 180 |
+
X_train.shape[1:], n_classes,
|
| 181 |
+
dropout=dropout, lstm_units=lstm_units,
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
history = model.fit(
|
| 185 |
+
[X_train, R_train], y_train,
|
| 186 |
+
validation_data=([X_val, R_val], y_val),
|
| 187 |
+
epochs=epochs,
|
| 188 |
+
batch_size=batch_size,
|
| 189 |
+
callbacks=get_callbacks(),
|
| 190 |
+
verbose=0,
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
return model, history, hmm_model, regime_cols_idx
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
# ---------------------------------------------------------------------------
|
| 197 |
+
# Inference
|
| 198 |
+
# ---------------------------------------------------------------------------
|
| 199 |
+
|
| 200 |
+
def predict_approach2(
|
| 201 |
+
model,
|
| 202 |
+
X_test: np.ndarray,
|
| 203 |
+
X_flat_all: np.ndarray,
|
| 204 |
+
regime_cols_idx: list,
|
| 205 |
+
hmm_model,
|
| 206 |
+
lookback: int,
|
| 207 |
+
train_size: int,
|
| 208 |
+
val_size: int,
|
| 209 |
+
) -> tuple:
|
| 210 |
+
"""Predict on test set with regime conditioning. Returns (preds, proba)."""
|
| 211 |
+
regimes_all = predict_regimes(hmm_model, X_flat_all, regime_cols_idx)
|
| 212 |
+
offset = lookback + train_size + val_size
|
| 213 |
+
R_test = build_regime_sequences(X_test, regimes_all, offset)
|
| 214 |
+
|
| 215 |
+
proba = model.predict([X_test, R_test], verbose=0)
|
| 216 |
+
preds = np.argmax(proba, axis=1)
|
| 217 |
+
return preds, proba
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/base.py
CHANGED
|
@@ -1,18 +1,16 @@
|
|
| 1 |
"""
|
| 2 |
models/base.py
|
| 3 |
-
Shared utilities for all three CNN-LSTM variants
|
| 4 |
-
|
| 5 |
-
- Common Keras layers / callbacks
|
| 6 |
-
- Predict + evaluate helpers
|
| 7 |
"""
|
| 8 |
|
| 9 |
import numpy as np
|
| 10 |
import pandas as pd
|
| 11 |
from sklearn.preprocessing import RobustScaler
|
|
|
|
| 12 |
import tensorflow as tf
|
| 13 |
from tensorflow import keras
|
| 14 |
|
| 15 |
-
# ββ Reproducibility βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 16 |
SEED = 42
|
| 17 |
tf.random.set_seed(SEED)
|
| 18 |
np.random.seed(SEED)
|
|
@@ -23,15 +21,7 @@ np.random.seed(SEED)
|
|
| 23 |
def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
|
| 24 |
"""
|
| 25 |
Build supervised sequences for CNN-LSTM input.
|
| 26 |
-
|
| 27 |
-
Args:
|
| 28 |
-
features : 2-D array [n_days, n_features]
|
| 29 |
-
targets : 2-D array [n_days, n_etfs] (raw returns)
|
| 30 |
-
lookback : number of past days per sample
|
| 31 |
-
|
| 32 |
-
Returns:
|
| 33 |
-
X : [n_samples, lookback, n_features]
|
| 34 |
-
y : [n_samples, n_etfs] (raw returns for the next day)
|
| 35 |
"""
|
| 36 |
X, y = [], []
|
| 37 |
for i in range(lookback, len(features)):
|
|
@@ -43,11 +33,9 @@ def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
|
|
| 43 |
# ββ Train / val / test split ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 44 |
|
| 45 |
def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
|
| 46 |
-
|
| 47 |
-
n = len(X)
|
| 48 |
t1 = int(n * train_pct)
|
| 49 |
t2 = int(n * (train_pct + val_pct))
|
| 50 |
-
|
| 51 |
return (
|
| 52 |
X[:t1], y[:t1],
|
| 53 |
X[t1:t2], y[t1:t2],
|
|
@@ -58,56 +46,66 @@ def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
|
|
| 58 |
# ββ Feature scaling βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 59 |
|
| 60 |
def scale_features(X_train, X_val, X_test):
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
Operates on the flattened feature dimension.
|
| 64 |
-
|
| 65 |
-
Returns scaled arrays with same shape as inputs.
|
| 66 |
-
"""
|
| 67 |
-
n_train, lb, n_feat = X_train.shape
|
| 68 |
-
scaler = RobustScaler()
|
| 69 |
-
|
| 70 |
-
# Fit on train
|
| 71 |
scaler.fit(X_train.reshape(-1, n_feat))
|
| 72 |
|
| 73 |
-
def
|
| 74 |
-
|
| 75 |
-
return scaler.transform(X.reshape(-1, n_feat)).reshape(
|
| 76 |
|
| 77 |
-
return
|
| 78 |
|
| 79 |
|
| 80 |
-
# ββ Label builder
|
| 81 |
|
| 82 |
def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
|
| 83 |
"""
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
If include_cash=True, adds a CASH class (index = n_etfs) when
|
| 87 |
-
the best ETF return is below cash_threshold.
|
| 88 |
-
|
| 89 |
-
Args:
|
| 90 |
-
y_raw : [n_samples, n_etfs]
|
| 91 |
-
include_cash : whether to allow CASH class
|
| 92 |
-
cash_threshold : minimum ETF return to prefer over CASH
|
| 93 |
-
|
| 94 |
-
Returns:
|
| 95 |
-
labels : [n_samples] integer class indices
|
| 96 |
"""
|
| 97 |
-
best
|
| 98 |
if include_cash:
|
| 99 |
-
|
| 100 |
-
cash_idx
|
| 101 |
-
labels
|
| 102 |
else:
|
| 103 |
labels = best
|
| 104 |
return labels.astype(np.int32)
|
| 105 |
|
| 106 |
|
| 107 |
-
# ββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
return [
|
| 112 |
keras.callbacks.EarlyStopping(
|
| 113 |
monitor="val_loss",
|
|
@@ -125,75 +123,51 @@ def get_callbacks(patience_es=15, patience_lr=8, min_lr=1e-6):
|
|
| 125 |
]
|
| 126 |
|
| 127 |
|
| 128 |
-
# ββ
|
| 129 |
|
| 130 |
def classification_head(x, n_classes: int, dropout: float = 0.3):
|
| 131 |
-
"""
|
| 132 |
-
Shared dense output head for all three CNN-LSTM variants.
|
| 133 |
-
|
| 134 |
-
Args:
|
| 135 |
-
x : input tensor
|
| 136 |
-
n_classes : number of ETF classes (+ 1 for CASH if applicable)
|
| 137 |
-
dropout : dropout rate
|
| 138 |
-
|
| 139 |
-
Returns:
|
| 140 |
-
output tensor with softmax activation
|
| 141 |
-
"""
|
| 142 |
x = keras.layers.Dense(64, activation="relu")(x)
|
|
|
|
| 143 |
x = keras.layers.Dropout(dropout)(x)
|
|
|
|
|
|
|
| 144 |
x = keras.layers.Dense(n_classes, activation="softmax")(x)
|
| 145 |
return x
|
| 146 |
|
| 147 |
|
| 148 |
-
# ββ Prediction
|
| 149 |
|
| 150 |
-
def predict_classes(model, X_test: np.ndarray) ->
|
| 151 |
-
"""Return integer class predictions from a Keras model."""
|
| 152 |
proba = model.predict(X_test, verbose=0)
|
| 153 |
return np.argmax(proba, axis=1), proba
|
| 154 |
|
| 155 |
|
| 156 |
-
# ββ Metrics
|
| 157 |
|
| 158 |
def evaluate_returns(
|
| 159 |
-
preds
|
| 160 |
-
proba: np.ndarray,
|
| 161 |
-
y_raw_test: np.ndarray,
|
| 162 |
-
target_etfs: list,
|
| 163 |
-
tbill_rate: float,
|
| 164 |
-
fee_bps: int,
|
| 165 |
-
include_cash: bool = True,
|
| 166 |
):
|
| 167 |
-
"""
|
| 168 |
-
Given integer class predictions and raw return matrix,
|
| 169 |
-
compute strategy returns and summary metrics.
|
| 170 |
-
|
| 171 |
-
Returns:
|
| 172 |
-
strat_rets : np.ndarray of daily net returns
|
| 173 |
-
ann_return : annualised return (float)
|
| 174 |
-
cum_returns : cumulative return series
|
| 175 |
-
last_proba : probability vector for the last prediction
|
| 176 |
-
next_etf : name of ETF predicted for next session
|
| 177 |
-
"""
|
| 178 |
n_etfs = len(target_etfs)
|
| 179 |
-
|
|
|
|
| 180 |
|
| 181 |
for i, cls in enumerate(preds):
|
| 182 |
if include_cash and cls == n_etfs:
|
| 183 |
-
|
| 184 |
-
daily_tbill = tbill_rate / 252
|
| 185 |
-
net = daily_tbill - (fee_bps / 10000)
|
| 186 |
else:
|
| 187 |
-
|
| 188 |
-
net =
|
| 189 |
strat_rets.append(net)
|
| 190 |
|
| 191 |
strat_rets = np.array(strat_rets)
|
| 192 |
cum_returns = np.cumprod(1 + strat_rets)
|
| 193 |
-
ann_return =
|
| 194 |
|
| 195 |
last_proba = proba[-1]
|
| 196 |
next_cls = int(np.argmax(last_proba))
|
| 197 |
-
next_etf =
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
return strat_rets, ann_return, cum_returns, last_proba, next_etf
|
|
|
|
| 1 |
"""
|
| 2 |
models/base.py
|
| 3 |
+
Shared utilities for all three CNN-LSTM variants.
|
| 4 |
+
Key fix: class_weight support to prevent majority-class collapse.
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import numpy as np
|
| 8 |
import pandas as pd
|
| 9 |
from sklearn.preprocessing import RobustScaler
|
| 10 |
+
from sklearn.utils.class_weight import compute_class_weight
|
| 11 |
import tensorflow as tf
|
| 12 |
from tensorflow import keras
|
| 13 |
|
|
|
|
| 14 |
SEED = 42
|
| 15 |
tf.random.set_seed(SEED)
|
| 16 |
np.random.seed(SEED)
|
|
|
|
| 21 |
def build_sequences(features: np.ndarray, targets: np.ndarray, lookback: int):
|
| 22 |
"""
|
| 23 |
Build supervised sequences for CNN-LSTM input.
|
| 24 |
+
X[i] = features[i : i+lookback] β predicts y[i+lookback]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"""
|
| 26 |
X, y = [], []
|
| 27 |
for i in range(lookback, len(features)):
|
|
|
|
| 33 |
# ββ Train / val / test split ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
|
| 35 |
def train_val_test_split(X, y, train_pct=0.70, val_pct=0.15):
|
| 36 |
+
n = len(X)
|
|
|
|
| 37 |
t1 = int(n * train_pct)
|
| 38 |
t2 = int(n * (train_pct + val_pct))
|
|
|
|
| 39 |
return (
|
| 40 |
X[:t1], y[:t1],
|
| 41 |
X[t1:t2], y[t1:t2],
|
|
|
|
| 46 |
# ββ Feature scaling βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 47 |
|
| 48 |
def scale_features(X_train, X_val, X_test):
|
| 49 |
+
n_feat = X_train.shape[2]
|
| 50 |
+
scaler = RobustScaler()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
scaler.fit(X_train.reshape(-1, n_feat))
|
| 52 |
|
| 53 |
+
def _t(X):
|
| 54 |
+
s = X.shape
|
| 55 |
+
return scaler.transform(X.reshape(-1, n_feat)).reshape(s)
|
| 56 |
|
| 57 |
+
return _t(X_train), _t(X_val), _t(X_test), scaler
|
| 58 |
|
| 59 |
|
| 60 |
+
# ββ Label builder βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
|
| 62 |
def returns_to_labels(y_raw, include_cash=True, cash_threshold=0.0):
|
| 63 |
"""
|
| 64 |
+
Assign label = argmax(returns).
|
| 65 |
+
If include_cash and best return < cash_threshold β label = n_etfs (CASH).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
"""
|
| 67 |
+
best = np.argmax(y_raw, axis=1)
|
| 68 |
if include_cash:
|
| 69 |
+
best_ret = y_raw[np.arange(len(y_raw)), best]
|
| 70 |
+
cash_idx = y_raw.shape[1]
|
| 71 |
+
labels = np.where(best_ret < cash_threshold, cash_idx, best)
|
| 72 |
else:
|
| 73 |
labels = best
|
| 74 |
return labels.astype(np.int32)
|
| 75 |
|
| 76 |
|
| 77 |
+
# ββ Class weights βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 78 |
+
|
| 79 |
+
def compute_class_weights(y_labels: np.ndarray, n_classes: int) -> dict:
|
| 80 |
+
"""
|
| 81 |
+
Compute balanced class weights to counteract majority-class collapse.
|
| 82 |
+
Returns dict {class_index: weight} for use in model.fit().
|
| 83 |
+
"""
|
| 84 |
+
classes = np.arange(n_classes)
|
| 85 |
+
present = np.unique(y_labels)
|
| 86 |
|
| 87 |
+
try:
|
| 88 |
+
weights = compute_class_weight(
|
| 89 |
+
class_weight="balanced",
|
| 90 |
+
classes=present,
|
| 91 |
+
y=y_labels,
|
| 92 |
+
)
|
| 93 |
+
weight_dict = {int(c): float(w) for c, w in zip(present, weights)}
|
| 94 |
+
except Exception:
|
| 95 |
+
weight_dict = {}
|
| 96 |
+
|
| 97 |
+
# Fill any missing classes with weight 1.0
|
| 98 |
+
for c in classes:
|
| 99 |
+
if c not in weight_dict:
|
| 100 |
+
weight_dict[c] = 1.0
|
| 101 |
+
|
| 102 |
+
return weight_dict
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
# ββ Callbacks βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 106 |
+
|
| 107 |
+
def get_callbacks(patience_es=20, patience_lr=10, min_lr=1e-6):
|
| 108 |
+
"""Longer patience to allow models time to learn past majority class."""
|
| 109 |
return [
|
| 110 |
keras.callbacks.EarlyStopping(
|
| 111 |
monitor="val_loss",
|
|
|
|
| 123 |
]
|
| 124 |
|
| 125 |
|
| 126 |
+
# ββ Output head βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 127 |
|
| 128 |
def classification_head(x, n_classes: int, dropout: float = 0.3):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
x = keras.layers.Dense(64, activation="relu")(x)
|
| 130 |
+
x = keras.layers.BatchNormalization()(x)
|
| 131 |
x = keras.layers.Dropout(dropout)(x)
|
| 132 |
+
x = keras.layers.Dense(32, activation="relu")(x)
|
| 133 |
+
x = keras.layers.Dropout(dropout / 2)(x)
|
| 134 |
x = keras.layers.Dense(n_classes, activation="softmax")(x)
|
| 135 |
return x
|
| 136 |
|
| 137 |
|
| 138 |
+
# ββ Prediction ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 139 |
|
| 140 |
+
def predict_classes(model, X_test: np.ndarray) -> tuple:
|
|
|
|
| 141 |
proba = model.predict(X_test, verbose=0)
|
| 142 |
return np.argmax(proba, axis=1), proba
|
| 143 |
|
| 144 |
|
| 145 |
+
# ββ Metrics βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 146 |
|
| 147 |
def evaluate_returns(
|
| 148 |
+
preds, proba, y_raw_test, target_etfs, tbill_rate, fee_bps, include_cash=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
n_etfs = len(target_etfs)
|
| 151 |
+
daily_tbill = tbill_rate / 252
|
| 152 |
+
strat_rets = []
|
| 153 |
|
| 154 |
for i, cls in enumerate(preds):
|
| 155 |
if include_cash and cls == n_etfs:
|
| 156 |
+
net = daily_tbill - fee_bps / 10000
|
|
|
|
|
|
|
| 157 |
else:
|
| 158 |
+
cls = min(int(cls), n_etfs - 1)
|
| 159 |
+
net = float(y_raw_test[i][cls]) - fee_bps / 10000
|
| 160 |
strat_rets.append(net)
|
| 161 |
|
| 162 |
strat_rets = np.array(strat_rets)
|
| 163 |
cum_returns = np.cumprod(1 + strat_rets)
|
| 164 |
+
ann_return = cum_returns[-1] ** (252 / len(strat_rets)) - 1
|
| 165 |
|
| 166 |
last_proba = proba[-1]
|
| 167 |
next_cls = int(np.argmax(last_proba))
|
| 168 |
+
next_etf = (
|
| 169 |
+
"CASH" if (include_cash and next_cls == n_etfs)
|
| 170 |
+
else target_etfs[min(next_cls, n_etfs - 1)].replace("_Ret", "")
|
| 171 |
+
)
|
| 172 |
|
| 173 |
return strat_rets, ann_return, cum_returns, last_proba, next_etf
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach1_wavelet.py
CHANGED
|
@@ -1,167 +1,86 @@
|
|
| 1 |
"""
|
| 2 |
models/approach1_wavelet.py
|
| 3 |
Approach 1: Wavelet Decomposition CNN-LSTM
|
| 4 |
-
|
| 5 |
-
Pipeline:
|
| 6 |
-
Raw macro signals
|
| 7 |
-
β DWT (db4, level=3) per signal β multi-band channel stack
|
| 8 |
-
β 1D CNN (64 filters, k=3) β MaxPool β (32 filters, k=3)
|
| 9 |
-
β LSTM (128 units)
|
| 10 |
-
β Dense 64 β Softmax (n_etfs + 1 CASH)
|
| 11 |
"""
|
| 12 |
|
| 13 |
import numpy as np
|
| 14 |
import pywt
|
| 15 |
-
import tensorflow as tf
|
| 16 |
-
from tensorflow import keras
|
| 17 |
-
from models.base import classification_head, get_callbacks
|
| 18 |
-
|
| 19 |
-
WAVELET = "db4"
|
| 20 |
-
LEVEL = 3
|
| 21 |
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
# ββ Wavelet feature engineering βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
|
| 25 |
def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
|
| 26 |
-
"""
|
| 27 |
-
Decompose a 1-D signal into DWT subbands and return them stacked.
|
| 28 |
-
|
| 29 |
-
For a signal of length T:
|
| 30 |
-
coeffs = [cA_n, cD_n, cD_{n-1}, ..., cD_1]
|
| 31 |
-
We interpolate each subband back to length T so we can stack them.
|
| 32 |
-
|
| 33 |
-
Returns: array of shape [T, level+1]
|
| 34 |
-
"""
|
| 35 |
T = len(signal)
|
| 36 |
coeffs = pywt.wavedec(signal, wavelet, level=level)
|
| 37 |
bands = []
|
| 38 |
for c in coeffs:
|
| 39 |
-
|
| 40 |
-
band = np.interp(
|
| 41 |
-
np.linspace(0, len(c) - 1, T),
|
| 42 |
-
np.arange(len(c)),
|
| 43 |
-
c,
|
| 44 |
-
)
|
| 45 |
bands.append(band)
|
| 46 |
-
return np.stack(bands, axis=-1)
|
| 47 |
-
|
| 48 |
|
| 49 |
-
def apply_wavelet_transform(X: np.ndarray, wavelet: str = WAVELET, level: int = LEVEL) -> np.ndarray:
|
| 50 |
-
"""
|
| 51 |
-
Apply DWT to every feature channel across all samples.
|
| 52 |
|
| 53 |
-
|
| 54 |
-
X : [n_samples, lookback, n_features]
|
| 55 |
-
|
| 56 |
-
Returns:
|
| 57 |
-
X_wt : [n_samples, lookback, n_features * (level+1)]
|
| 58 |
-
"""
|
| 59 |
n_samples, lookback, n_features = X.shape
|
| 60 |
-
n_bands
|
| 61 |
-
X_wt
|
| 62 |
-
|
| 63 |
for s in range(n_samples):
|
| 64 |
for f in range(n_features):
|
| 65 |
-
decomposed = _wavelet_decompose_signal(X[s, :, f], wavelet, level)
|
| 66 |
start = f * n_bands
|
| 67 |
X_wt[s, :, start: start + n_bands] = decomposed
|
| 68 |
-
|
| 69 |
return X_wt
|
| 70 |
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
input_shape: tuple,
|
| 76 |
-
n_classes: int,
|
| 77 |
-
dropout: float = 0.3,
|
| 78 |
-
lstm_units: int = 128,
|
| 79 |
-
) -> keras.Model:
|
| 80 |
-
"""
|
| 81 |
-
Build Wavelet CNN-LSTM model.
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
n_classes : number of output classes (ETFs + CASH)
|
| 86 |
-
dropout : dropout rate
|
| 87 |
-
lstm_units : LSTM hidden size
|
| 88 |
-
|
| 89 |
-
Returns:
|
| 90 |
-
Compiled Keras model
|
| 91 |
-
"""
|
| 92 |
-
inputs = keras.Input(shape=input_shape, name="wavelet_input")
|
| 93 |
-
|
| 94 |
-
# CNN block 1
|
| 95 |
-
x = keras.layers.Conv1D(64, kernel_size=3, padding="causal", activation="relu")(inputs)
|
| 96 |
x = keras.layers.BatchNormalization()(x)
|
| 97 |
-
x = keras.layers.MaxPooling1D(
|
| 98 |
-
|
| 99 |
-
# CNN block 2
|
| 100 |
-
x = keras.layers.Conv1D(32, kernel_size=3, padding="causal", activation="relu")(x)
|
| 101 |
x = keras.layers.BatchNormalization()(x)
|
| 102 |
x = keras.layers.Dropout(dropout)(x)
|
| 103 |
-
|
| 104 |
-
# LSTM
|
| 105 |
x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x)
|
| 106 |
-
|
| 107 |
-
# Output head
|
| 108 |
outputs = classification_head(x, n_classes, dropout)
|
| 109 |
|
| 110 |
-
model = keras.Model(inputs, outputs, name="
|
| 111 |
model.compile(
|
| 112 |
-
optimizer=keras.optimizers.Adam(
|
| 113 |
loss="sparse_categorical_crossentropy",
|
| 114 |
metrics=["accuracy"],
|
| 115 |
)
|
| 116 |
return model
|
| 117 |
|
| 118 |
|
| 119 |
-
# ββ Full train pipeline βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 120 |
-
|
| 121 |
def train_approach1(
|
| 122 |
-
X_train, y_train,
|
| 123 |
-
|
| 124 |
-
n_classes: int,
|
| 125 |
-
epochs: int = 100,
|
| 126 |
-
batch_size: int = 32,
|
| 127 |
-
dropout: float = 0.3,
|
| 128 |
-
lstm_units: int = 128,
|
| 129 |
):
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
y_train/val : [n] integer class labels
|
| 136 |
-
n_classes : total output classes
|
| 137 |
-
|
| 138 |
-
Returns:
|
| 139 |
-
model : trained Keras model
|
| 140 |
-
history : training history
|
| 141 |
-
wt_shape : post-DWT input shape (for inference)
|
| 142 |
-
"""
|
| 143 |
-
# Apply DWT
|
| 144 |
-
X_train_wt = apply_wavelet_transform(X_train)
|
| 145 |
-
X_val_wt = apply_wavelet_transform(X_val)
|
| 146 |
-
|
| 147 |
-
input_shape = X_train_wt.shape[1:] # (lookback, n_features * n_bands)
|
| 148 |
model = build_wavelet_cnn_lstm(input_shape, n_classes, dropout, lstm_units)
|
|
|
|
| 149 |
|
| 150 |
history = model.fit(
|
| 151 |
X_train_wt, y_train,
|
| 152 |
validation_data=(X_val_wt, y_val),
|
| 153 |
epochs=epochs,
|
| 154 |
batch_size=batch_size,
|
|
|
|
| 155 |
callbacks=get_callbacks(),
|
| 156 |
verbose=0,
|
| 157 |
)
|
| 158 |
-
|
| 159 |
return model, history, input_shape
|
| 160 |
|
| 161 |
|
| 162 |
def predict_approach1(model, X_test: np.ndarray) -> tuple:
|
| 163 |
-
"""Apply DWT to test set then predict. Returns (class_preds, proba)."""
|
| 164 |
X_test_wt = apply_wavelet_transform(X_test)
|
| 165 |
proba = model.predict(X_test_wt, verbose=0)
|
| 166 |
-
|
| 167 |
-
return preds, proba
|
|
|
|
| 1 |
"""
|
| 2 |
models/approach1_wavelet.py
|
| 3 |
Approach 1: Wavelet Decomposition CNN-LSTM
|
| 4 |
+
With class weights to prevent majority-class collapse.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import numpy as np
|
| 8 |
import pywt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
WAVELET = "db4"
|
| 11 |
+
LEVEL = 3
|
| 12 |
|
|
|
|
| 13 |
|
| 14 |
def _wavelet_decompose_signal(signal: np.ndarray, wavelet: str, level: int) -> np.ndarray:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
T = len(signal)
|
| 16 |
coeffs = pywt.wavedec(signal, wavelet, level=level)
|
| 17 |
bands = []
|
| 18 |
for c in coeffs:
|
| 19 |
+
band = np.interp(np.linspace(0, len(c)-1, T), np.arange(len(c)), c)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
bands.append(band)
|
| 21 |
+
return np.stack(bands, axis=-1)
|
|
|
|
| 22 |
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
def apply_wavelet_transform(X: np.ndarray, wavelet=WAVELET, level=LEVEL) -> np.ndarray:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
n_samples, lookback, n_features = X.shape
|
| 26 |
+
n_bands = level + 1
|
| 27 |
+
X_wt = np.zeros((n_samples, lookback, n_features * n_bands), dtype=np.float32)
|
|
|
|
| 28 |
for s in range(n_samples):
|
| 29 |
for f in range(n_features):
|
| 30 |
+
decomposed = _wavelet_decompose_signal(X[s, :, f], wavelet, level)
|
| 31 |
start = f * n_bands
|
| 32 |
X_wt[s, :, start: start + n_bands] = decomposed
|
|
|
|
| 33 |
return X_wt
|
| 34 |
|
| 35 |
|
| 36 |
+
def build_wavelet_cnn_lstm(input_shape, n_classes, dropout=0.3, lstm_units=128):
|
| 37 |
+
from tensorflow import keras
|
| 38 |
+
from models.base import classification_head
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
inputs = keras.Input(shape=input_shape)
|
| 41 |
+
x = keras.layers.Conv1D(64, 3, padding="causal", activation="relu")(inputs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
x = keras.layers.BatchNormalization()(x)
|
| 43 |
+
x = keras.layers.MaxPooling1D(2)(x)
|
| 44 |
+
x = keras.layers.Conv1D(32, 3, padding="causal", activation="relu")(x)
|
|
|
|
|
|
|
| 45 |
x = keras.layers.BatchNormalization()(x)
|
| 46 |
x = keras.layers.Dropout(dropout)(x)
|
|
|
|
|
|
|
| 47 |
x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(x)
|
|
|
|
|
|
|
| 48 |
outputs = classification_head(x, n_classes, dropout)
|
| 49 |
|
| 50 |
+
model = keras.Model(inputs, outputs, name="Approach1_Wavelet")
|
| 51 |
model.compile(
|
| 52 |
+
optimizer=keras.optimizers.Adam(1e-3),
|
| 53 |
loss="sparse_categorical_crossentropy",
|
| 54 |
metrics=["accuracy"],
|
| 55 |
)
|
| 56 |
return model
|
| 57 |
|
| 58 |
|
|
|
|
|
|
|
| 59 |
def train_approach1(
|
| 60 |
+
X_train, y_train, X_val, y_val,
|
| 61 |
+
n_classes, epochs=100, batch_size=32, dropout=0.3, lstm_units=128,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
):
|
| 63 |
+
from models.base import get_callbacks, compute_class_weights
|
| 64 |
+
|
| 65 |
+
X_train_wt = apply_wavelet_transform(X_train)
|
| 66 |
+
X_val_wt = apply_wavelet_transform(X_val)
|
| 67 |
+
input_shape = X_train_wt.shape[1:]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
model = build_wavelet_cnn_lstm(input_shape, n_classes, dropout, lstm_units)
|
| 69 |
+
cw = compute_class_weights(y_train, n_classes)
|
| 70 |
|
| 71 |
history = model.fit(
|
| 72 |
X_train_wt, y_train,
|
| 73 |
validation_data=(X_val_wt, y_val),
|
| 74 |
epochs=epochs,
|
| 75 |
batch_size=batch_size,
|
| 76 |
+
class_weight=cw,
|
| 77 |
callbacks=get_callbacks(),
|
| 78 |
verbose=0,
|
| 79 |
)
|
|
|
|
| 80 |
return model, history, input_shape
|
| 81 |
|
| 82 |
|
| 83 |
def predict_approach1(model, X_test: np.ndarray) -> tuple:
|
|
|
|
| 84 |
X_test_wt = apply_wavelet_transform(X_test)
|
| 85 |
proba = model.predict(X_test_wt, verbose=0)
|
| 86 |
+
return np.argmax(proba, axis=1), proba
|
|
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach2_regime.py
CHANGED
|
@@ -166,7 +166,7 @@ def train_approach2(
|
|
| 166 |
Fit HMM regime model then train the regime-conditioned CNN-LSTM.
|
| 167 |
Returns: model, history, hmm_model, regime_cols_idx
|
| 168 |
"""
|
| 169 |
-
from models.base import get_callbacks
|
| 170 |
|
| 171 |
X_flat_train = X_flat_all[:train_size + lookback]
|
| 172 |
hmm_model, regime_cols_idx = fit_regime_model(X_flat_train, feature_names)
|
|
@@ -181,11 +181,14 @@ def train_approach2(
|
|
| 181 |
dropout=dropout, lstm_units=lstm_units,
|
| 182 |
)
|
| 183 |
|
|
|
|
|
|
|
| 184 |
history = model.fit(
|
| 185 |
[X_train, R_train], y_train,
|
| 186 |
validation_data=([X_val, R_val], y_val),
|
| 187 |
epochs=epochs,
|
| 188 |
batch_size=batch_size,
|
|
|
|
| 189 |
callbacks=get_callbacks(),
|
| 190 |
verbose=0,
|
| 191 |
)
|
|
|
|
| 166 |
Fit HMM regime model then train the regime-conditioned CNN-LSTM.
|
| 167 |
Returns: model, history, hmm_model, regime_cols_idx
|
| 168 |
"""
|
| 169 |
+
from models.base import get_callbacks, compute_class_weights
|
| 170 |
|
| 171 |
X_flat_train = X_flat_all[:train_size + lookback]
|
| 172 |
hmm_model, regime_cols_idx = fit_regime_model(X_flat_train, feature_names)
|
|
|
|
| 181 |
dropout=dropout, lstm_units=lstm_units,
|
| 182 |
)
|
| 183 |
|
| 184 |
+
cw = compute_class_weights(y_train, n_classes)
|
| 185 |
+
|
| 186 |
history = model.fit(
|
| 187 |
[X_train, R_train], y_train,
|
| 188 |
validation_data=([X_val, R_val], y_val),
|
| 189 |
epochs=epochs,
|
| 190 |
batch_size=batch_size,
|
| 191 |
+
class_weight=cw,
|
| 192 |
callbacks=get_callbacks(),
|
| 193 |
verbose=0,
|
| 194 |
)
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/models/approach3_multiscale.py
CHANGED
|
@@ -1,150 +1,80 @@
|
|
| 1 |
"""
|
| 2 |
models/approach3_multiscale.py
|
| 3 |
Approach 3: Multi-Scale Parallel CNN-LSTM
|
| 4 |
-
|
| 5 |
-
Pipeline:
|
| 6 |
-
Raw macro signals
|
| 7 |
-
β 3 parallel CNN towers: kernel 3 (short), 7 (medium), 21 (long)
|
| 8 |
-
β Concatenate [96 features]
|
| 9 |
-
β LSTM (128 units)
|
| 10 |
-
β Dense 64 β Softmax (n_etfs + 1 CASH)
|
| 11 |
"""
|
| 12 |
|
| 13 |
import numpy as np
|
| 14 |
-
import tensorflow as tf
|
| 15 |
-
from tensorflow import keras
|
| 16 |
-
from models.base import classification_head, get_callbacks
|
| 17 |
-
|
| 18 |
-
# Kernel sizes represent: momentum (3d), weekly cycle (7d), monthly trend (21d)
|
| 19 |
-
KERNEL_SIZES = [3, 7, 21]
|
| 20 |
-
FILTERS_EACH = 32 # 32 Γ 3 towers = 96 concatenated features
|
| 21 |
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
# ββ Model builder βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
|
| 25 |
def build_multiscale_cnn_lstm(
|
| 26 |
-
input_shape
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
) -> keras.Model:
|
| 33 |
-
"""
|
| 34 |
-
Multi-scale parallel CNN-LSTM.
|
| 35 |
-
|
| 36 |
-
Three CNN towers with different kernel sizes run in parallel on the
|
| 37 |
-
same input, capturing momentum, weekly cycle, and monthly trend
|
| 38 |
-
simultaneously. Their outputs are concatenated before the LSTM.
|
| 39 |
-
|
| 40 |
-
Args:
|
| 41 |
-
input_shape : (lookback, n_features)
|
| 42 |
-
n_classes : number of output classes (ETFs + CASH)
|
| 43 |
-
kernel_sizes : list of kernel sizes for each tower
|
| 44 |
-
filters : number of Conv1D filters per tower
|
| 45 |
-
dropout : dropout rate
|
| 46 |
-
lstm_units : LSTM hidden size
|
| 47 |
-
|
| 48 |
-
Returns:
|
| 49 |
-
Compiled Keras model
|
| 50 |
-
"""
|
| 51 |
if kernel_sizes is None:
|
| 52 |
kernel_sizes = KERNEL_SIZES
|
| 53 |
|
| 54 |
-
inputs
|
|
|
|
| 55 |
|
| 56 |
-
towers = []
|
| 57 |
for k in kernel_sizes:
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
filters, kernel_size=k, padding="causal", activation="relu",
|
| 61 |
-
name=f"conv1_k{k}"
|
| 62 |
-
)(inputs)
|
| 63 |
t = keras.layers.BatchNormalization(name=f"bn1_k{k}")(t)
|
| 64 |
-
t = keras.layers.Conv1D(
|
| 65 |
-
|
| 66 |
-
name=f"conv2_k{k}"
|
| 67 |
-
)(t)
|
| 68 |
t = keras.layers.BatchNormalization(name=f"bn2_k{k}")(t)
|
| 69 |
t = keras.layers.Dropout(dropout, name=f"drop_k{k}")(t)
|
| 70 |
towers.append(t)
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
merged = keras.layers.Concatenate(axis=-1, name="tower_concat")(towers)
|
| 75 |
-
else:
|
| 76 |
-
merged = towers[0]
|
| 77 |
-
|
| 78 |
-
# LSTM integrates multi-scale temporal features
|
| 79 |
-
x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1, name="lstm")(merged)
|
| 80 |
-
|
| 81 |
-
# Output head
|
| 82 |
outputs = classification_head(x, n_classes, dropout)
|
| 83 |
|
| 84 |
-
model = keras.Model(inputs, outputs, name="
|
| 85 |
model.compile(
|
| 86 |
-
optimizer=keras.optimizers.Adam(
|
| 87 |
loss="sparse_categorical_crossentropy",
|
| 88 |
metrics=["accuracy"],
|
| 89 |
)
|
| 90 |
return model
|
| 91 |
|
| 92 |
|
| 93 |
-
# ββ Full train pipeline βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 94 |
-
|
| 95 |
def train_approach3(
|
| 96 |
-
X_train, y_train,
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
epochs: int = 100,
|
| 100 |
-
batch_size: int = 32,
|
| 101 |
-
dropout: float = 0.3,
|
| 102 |
-
lstm_units: int = 128,
|
| 103 |
-
kernel_sizes: list = None,
|
| 104 |
):
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
Args:
|
| 109 |
-
X_train/val : [n, lookback, n_features]
|
| 110 |
-
y_train/val : [n] integer class labels
|
| 111 |
-
n_classes : total output classes
|
| 112 |
-
|
| 113 |
-
Returns:
|
| 114 |
-
model : trained Keras model
|
| 115 |
-
history : training history
|
| 116 |
-
"""
|
| 117 |
if kernel_sizes is None:
|
| 118 |
kernel_sizes = KERNEL_SIZES
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
valid_kernels = [min(3, lookback)]
|
| 125 |
-
|
| 126 |
-
model = build_multiscale_cnn_lstm(
|
| 127 |
-
input_shape=X_train.shape[1:],
|
| 128 |
-
n_classes=n_classes,
|
| 129 |
-
kernel_sizes=valid_kernels,
|
| 130 |
-
dropout=dropout,
|
| 131 |
-
lstm_units=lstm_units,
|
| 132 |
)
|
|
|
|
| 133 |
|
| 134 |
history = model.fit(
|
| 135 |
X_train, y_train,
|
| 136 |
validation_data=(X_val, y_val),
|
| 137 |
epochs=epochs,
|
| 138 |
batch_size=batch_size,
|
|
|
|
| 139 |
callbacks=get_callbacks(),
|
| 140 |
verbose=0,
|
| 141 |
)
|
| 142 |
-
|
| 143 |
return model, history
|
| 144 |
|
| 145 |
|
| 146 |
def predict_approach3(model, X_test: np.ndarray) -> tuple:
|
| 147 |
-
"""Predict on test set. Returns (class_preds, proba)."""
|
| 148 |
proba = model.predict(X_test, verbose=0)
|
| 149 |
-
|
| 150 |
-
return preds, proba
|
|
|
|
| 1 |
"""
|
| 2 |
models/approach3_multiscale.py
|
| 3 |
Approach 3: Multi-Scale Parallel CNN-LSTM
|
| 4 |
+
With class weights to prevent majority-class collapse.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
KERNEL_SIZES = [3, 7, 21]
|
| 10 |
+
FILTERS_EACH = 32
|
| 11 |
|
|
|
|
| 12 |
|
| 13 |
def build_multiscale_cnn_lstm(
|
| 14 |
+
input_shape, n_classes, kernel_sizes=None,
|
| 15 |
+
filters=FILTERS_EACH, dropout=0.3, lstm_units=128,
|
| 16 |
+
):
|
| 17 |
+
from tensorflow import keras
|
| 18 |
+
from models.base import classification_head
|
| 19 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
if kernel_sizes is None:
|
| 21 |
kernel_sizes = KERNEL_SIZES
|
| 22 |
|
| 23 |
+
inputs = keras.Input(shape=input_shape, name="multiscale_input")
|
| 24 |
+
towers = []
|
| 25 |
|
|
|
|
| 26 |
for k in kernel_sizes:
|
| 27 |
+
t = keras.layers.Conv1D(filters, k, padding="causal", activation="relu",
|
| 28 |
+
name=f"conv1_k{k}")(inputs)
|
|
|
|
|
|
|
|
|
|
| 29 |
t = keras.layers.BatchNormalization(name=f"bn1_k{k}")(t)
|
| 30 |
+
t = keras.layers.Conv1D(filters, k, padding="causal", activation="relu",
|
| 31 |
+
name=f"conv2_k{k}")(t)
|
|
|
|
|
|
|
| 32 |
t = keras.layers.BatchNormalization(name=f"bn2_k{k}")(t)
|
| 33 |
t = keras.layers.Dropout(dropout, name=f"drop_k{k}")(t)
|
| 34 |
towers.append(t)
|
| 35 |
|
| 36 |
+
merged = keras.layers.Concatenate(axis=-1)(towers) if len(towers) > 1 else towers[0]
|
| 37 |
+
x = keras.layers.LSTM(lstm_units, dropout=dropout, recurrent_dropout=0.1)(merged)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
outputs = classification_head(x, n_classes, dropout)
|
| 39 |
|
| 40 |
+
model = keras.Model(inputs, outputs, name="Approach3_MultiScale")
|
| 41 |
model.compile(
|
| 42 |
+
optimizer=keras.optimizers.Adam(1e-3),
|
| 43 |
loss="sparse_categorical_crossentropy",
|
| 44 |
metrics=["accuracy"],
|
| 45 |
)
|
| 46 |
return model
|
| 47 |
|
| 48 |
|
|
|
|
|
|
|
| 49 |
def train_approach3(
|
| 50 |
+
X_train, y_train, X_val, y_val,
|
| 51 |
+
n_classes, epochs=100, batch_size=32,
|
| 52 |
+
dropout=0.3, lstm_units=128, kernel_sizes=None,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
):
|
| 54 |
+
from models.base import get_callbacks, compute_class_weights
|
| 55 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
if kernel_sizes is None:
|
| 57 |
kernel_sizes = KERNEL_SIZES
|
| 58 |
|
| 59 |
+
lookback = X_train.shape[1]
|
| 60 |
+
valid_kernels = [k for k in kernel_sizes if k <= lookback] or [min(3, lookback)]
|
| 61 |
+
model = build_multiscale_cnn_lstm(
|
| 62 |
+
X_train.shape[1:], n_classes, valid_kernels, dropout=dropout, lstm_units=lstm_units,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
)
|
| 64 |
+
cw = compute_class_weights(y_train, n_classes)
|
| 65 |
|
| 66 |
history = model.fit(
|
| 67 |
X_train, y_train,
|
| 68 |
validation_data=(X_val, y_val),
|
| 69 |
epochs=epochs,
|
| 70 |
batch_size=batch_size,
|
| 71 |
+
class_weight=cw,
|
| 72 |
callbacks=get_callbacks(),
|
| 73 |
verbose=0,
|
| 74 |
)
|
|
|
|
| 75 |
return model, history
|
| 76 |
|
| 77 |
|
| 78 |
def predict_approach3(model, X_test: np.ndarray) -> tuple:
|
|
|
|
| 79 |
proba = model.predict(X_test, verbose=0)
|
| 80 |
+
return np.argmax(proba, axis=1), proba
|
|
|
hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/hf_space/ui/components.py
CHANGED
|
@@ -227,3 +227,46 @@ def show_audit_trail(audit_trail: list):
|
|
| 227 |
{"selector": "td", "props": [("padding", "10px")]},
|
| 228 |
])
|
| 229 |
st.dataframe(styled, use_container_width=True, height=500)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
{"selector": "td", "props": [("padding", "10px")]},
|
| 228 |
])
|
| 229 |
st.dataframe(styled, use_container_width=True, height=500)
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
# ββ All models' next day signals panel βββββββββββββββββββββββββββββββββββββββ
|
| 233 |
+
|
| 234 |
+
def show_all_signals_panel(all_signals: dict, target_etfs: list, include_cash: bool, next_date):
|
| 235 |
+
"""
|
| 236 |
+
Compact panel showing what each model predicts for next trading day,
|
| 237 |
+
with top probability displayed.
|
| 238 |
+
"""
|
| 239 |
+
APPROACH_COLORS = {
|
| 240 |
+
"Approach 1": "#00ffc8",
|
| 241 |
+
"Approach 2": "#7c6aff",
|
| 242 |
+
"Approach 3": "#ff6b6b",
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
st.subheader(f"ποΈ All Models β {next_date.strftime('%Y-%m-%d')} Signals")
|
| 246 |
+
|
| 247 |
+
cols = st.columns(len(all_signals))
|
| 248 |
+
for col, (name, info) in zip(cols, all_signals.items()):
|
| 249 |
+
color = APPROACH_COLORS.get(name, "#888888")
|
| 250 |
+
signal = info["signal"]
|
| 251 |
+
proba = info["proba"]
|
| 252 |
+
top_prob = float(np.max(proba)) * 100
|
| 253 |
+
is_winner = info["is_winner"]
|
| 254 |
+
border = f"3px solid {color}"
|
| 255 |
+
badge = " β WINNER" if is_winner else ""
|
| 256 |
+
|
| 257 |
+
col.markdown(f"""
|
| 258 |
+
<div style="border:{border}; border-radius:12px; padding:18px 16px;
|
| 259 |
+
background:#111118; text-align:center;">
|
| 260 |
+
<div style="color:{color}; font-size:11px; font-weight:700;
|
| 261 |
+
letter-spacing:2px; margin-bottom:6px;">
|
| 262 |
+
{name.upper()}{badge}
|
| 263 |
+
</div>
|
| 264 |
+
<div style="color:white; font-size:28px; font-weight:800;
|
| 265 |
+
margin:8px 0;">
|
| 266 |
+
{signal}
|
| 267 |
+
</div>
|
| 268 |
+
<div style="color:#aaa; font-size:12px;">
|
| 269 |
+
Top prob: <span style="color:{color}; font-weight:700;">{top_prob:.1f}%</span>
|
| 270 |
+
</div>
|
| 271 |
+
</div>
|
| 272 |
+
""", unsafe_allow_html=True)
|