Spaces:
Running
Running
Upload 14 files
Browse files- .gitattributes +1 -0
- DockerFile +20 -0
- app.py +116 -0
- models/gru_booster_model.keras +3 -0
- models/robust_scaler.pkl +3 -0
- requirements.txt +17 -2
- src/__init__.py +0 -0
- src/__pycache__/__init__.cpython-310.pyc +0 -0
- src/__pycache__/data_fetcher.cpython-310.pyc +0 -0
- src/__pycache__/processor.cpython-310.pyc +0 -0
- src/__pycache__/strategy.cpython-310.pyc +0 -0
- src/data_fetcher.py +127 -0
- src/predictor.py +0 -0
- src/processor.py +123 -0
- src/strategy.py +30 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
models/gru_booster_model.keras filter=lfs diff=lfs merge=lfs -text
|
DockerFile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use an official Python runtime as a parent image
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory in the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy the requirements file into the container
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
|
| 10 |
+
# Install your dependencies
|
| 11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
+
|
| 13 |
+
# Copy the rest of your app's code
|
| 14 |
+
COPY . .
|
| 15 |
+
|
| 16 |
+
# Expose the port Streamlit runs on
|
| 17 |
+
EXPOSE 8501
|
| 18 |
+
|
| 19 |
+
# Command to run your app
|
| 20 |
+
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
app.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import tensorflow as tf
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
from src.data_fetcher import DataFetcher
|
| 6 |
+
from src.processor import Processor
|
| 7 |
+
from src.strategy import get_market_regime
|
| 8 |
+
|
| 9 |
+
st.set_page_config(page_title="Alpha Predict", page_icon="🏹", layout="wide")
|
| 10 |
+
|
| 11 |
+
@st.cache_resource
|
| 12 |
+
def load_alpha_model():
|
| 13 |
+
return tf.keras.models.load_model("models/gru_booster_model.keras")
|
| 14 |
+
|
| 15 |
+
@st.cache_resource
|
| 16 |
+
def load_processor():
|
| 17 |
+
return Processor()
|
| 18 |
+
|
| 19 |
+
def main():
|
| 20 |
+
st.title("🏹 Alpha Predict")
|
| 21 |
+
st.markdown("---")
|
| 22 |
+
|
| 23 |
+
with st.sidebar:
|
| 24 |
+
st.header("📖 Strategy Logic")
|
| 25 |
+
st.markdown("""
|
| 26 |
+
**Objective:** Directional probability for the next session.
|
| 27 |
+
- **🟢 GREEN (≥ 57.8%)**: 3x Leverage (SPXL/UPRO)
|
| 28 |
+
- **🟡 YELLOW (53.0% - 57.7%)**: 1x Exposure (SPY/VOO)
|
| 29 |
+
- **🔴 RED (< 53.0%)**: Cash (0x)
|
| 30 |
+
""")
|
| 31 |
+
st.divider()
|
| 32 |
+
st.caption("Alpha Predict")
|
| 33 |
+
|
| 34 |
+
fetcher = DataFetcher()
|
| 35 |
+
processor = load_processor()
|
| 36 |
+
model = load_alpha_model()
|
| 37 |
+
|
| 38 |
+
if st.button("Generate Today's Signal", type="primary"):
|
| 39 |
+
with st.spinner("🔭 Analyzing Market Nervous System..."):
|
| 40 |
+
|
| 41 |
+
# 1. Fetch data (Fetch 60 days to allow for the 26-day MACD EMA dropping NaNs)
|
| 42 |
+
market_df = fetcher.fetch_market_data(days=60)
|
| 43 |
+
news_df = fetcher.fetch_market_news(days=45)
|
| 44 |
+
|
| 45 |
+
st.warning(f"Earliest news fetched: {news_df['Date'].min()} | Total Headlines: {len(news_df)}")
|
| 46 |
+
|
| 47 |
+
# 2. Process - Now unpacking 4 items (df_features gives us the history!)
|
| 48 |
+
input_tensor, metrics, df_features, scored_news = processor.process(market_df, news_df)
|
| 49 |
+
|
| 50 |
+
# 3. Predict
|
| 51 |
+
prediction_prob = float(model.predict(input_tensor)[0][0])
|
| 52 |
+
|
| 53 |
+
# 4. Get Strategy Regime
|
| 54 |
+
regime = get_market_regime(prediction_prob)
|
| 55 |
+
|
| 56 |
+
# 5. UI Metrics
|
| 57 |
+
latest_vix = market_df['VIX'].iloc[-1]
|
| 58 |
+
prev_vix = market_df['VIX'].iloc[-2]
|
| 59 |
+
current_price = market_df['Close'].iloc[-1]
|
| 60 |
+
|
| 61 |
+
col1, col2, col3 = st.columns(3)
|
| 62 |
+
col1.metric("S&P 500 Baseline", f"${current_price:,.2f}")
|
| 63 |
+
col2.metric("VIX (Fear Gauge)", f"{latest_vix:.2f}",
|
| 64 |
+
delta=f"{latest_vix - prev_vix:+.2f} Fear", delta_color="inverse")
|
| 65 |
+
col3.metric("FinBERT Sentiment", f"{metrics['Sent_Mean']:+.2f}",
|
| 66 |
+
delta=f"{int(metrics['News_Volume'])} Headlines")
|
| 67 |
+
|
| 68 |
+
with st.expander("📊 How to interpret these values?"):
|
| 69 |
+
c1, c2 = st.columns(2)
|
| 70 |
+
c1.markdown("**VIX:** <15 Calm, 15-25 Normal, >25 Panic.")
|
| 71 |
+
c2.markdown("**Sentiment:** >+0.1 Bullish, ±0.1 Neutral, <-0.1 Bearish.")
|
| 72 |
+
|
| 73 |
+
st.divider()
|
| 74 |
+
|
| 75 |
+
# --- REGIME DISPLAY ---
|
| 76 |
+
st.subheader(f"{regime['icon']} Current Regime: :{regime['color']}[{regime['zone']}]")
|
| 77 |
+
|
| 78 |
+
res1, res2 = st.columns([1, 2])
|
| 79 |
+
res1.metric("Bullish Probability", f"{prediction_prob:.2%}")
|
| 80 |
+
res2.info(f"**Recommended Action:** {regime['action']}")
|
| 81 |
+
|
| 82 |
+
# --- LOGIC BREAKDOWN ---
|
| 83 |
+
st.write("### 🧠 Logic Breakdown (Last 30 Days)")
|
| 84 |
+
|
| 85 |
+
e_col1, e_col2 = st.columns(2)
|
| 86 |
+
|
| 87 |
+
with e_col1:
|
| 88 |
+
st.write("**Volatility Regime (VIX)**")
|
| 89 |
+
vix_trend = market_df['VIX'].tail(30)
|
| 90 |
+
st.line_chart(vix_trend)
|
| 91 |
+
|
| 92 |
+
vix_slope = vix_trend.iloc[-1] - vix_trend.iloc[0]
|
| 93 |
+
if vix_slope > 2:
|
| 94 |
+
st.warning(f"⚠️ Volatility is **trending up** (+{vix_slope:.1f}pts). The AI sees rising instability.")
|
| 95 |
+
elif vix_slope < -2:
|
| 96 |
+
st.success(f"✅ Volatility is **cooling off** ({vix_slope:.1f}pts). This supports the bullish case.")
|
| 97 |
+
else:
|
| 98 |
+
st.info("⚖️ Volatility is sideways. The model is focused on other factors.")
|
| 99 |
+
|
| 100 |
+
with e_col2:
|
| 101 |
+
st.write("**Sentiment Momentum (FinBERT)**")
|
| 102 |
+
|
| 103 |
+
# FIX: We now pull the historical trend from df_features!
|
| 104 |
+
sent_trend = df_features['Sent_Mean'].tail(30)
|
| 105 |
+
st.area_chart(sent_trend)
|
| 106 |
+
|
| 107 |
+
avg_30d = sent_trend.mean()
|
| 108 |
+
st.write(f"30-Day Avg Sentiment: **{avg_30d:+.2f}**")
|
| 109 |
+
|
| 110 |
+
if metrics['Sent_Mean'] > avg_30d:
|
| 111 |
+
st.write("📈 Today's news is **stronger** than the monthly average.")
|
| 112 |
+
else:
|
| 113 |
+
st.write("📉 Today's news is **weaker** than the monthly average.")
|
| 114 |
+
|
| 115 |
+
if __name__ == "__main__":
|
| 116 |
+
main()
|
models/gru_booster_model.keras
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4de2e84278571c819c366d8b62dea18ee56128ccdd38f6879979e9b74e6c70f
|
| 3 |
+
size 514054
|
models/robust_scaler.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72212a8ecadcea9b19932600b07a132cda24935ff987afdd45e68c42ac970ff8
|
| 3 |
+
size 727
|
requirements.txt
CHANGED
|
@@ -1,3 +1,18 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
pandas
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core Data & Math
|
| 2 |
+
numpy
|
| 3 |
pandas
|
| 4 |
+
yfinance
|
| 5 |
+
scipy
|
| 6 |
+
scikit-learn
|
| 7 |
+
finnhub-python
|
| 8 |
+
joblib
|
| 9 |
+
|
| 10 |
+
# Deep Learning
|
| 11 |
+
tensorflow>=2.16.1
|
| 12 |
+
torch
|
| 13 |
+
transformers
|
| 14 |
+
accelerate
|
| 15 |
+
|
| 16 |
+
# App & Environment
|
| 17 |
+
streamlit
|
| 18 |
+
python-dotenv
|
src/__init__.py
ADDED
|
File without changes
|
src/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (138 Bytes). View file
|
|
|
src/__pycache__/data_fetcher.cpython-310.pyc
ADDED
|
Binary file (3.28 kB). View file
|
|
|
src/__pycache__/processor.cpython-310.pyc
ADDED
|
Binary file (4.27 kB). View file
|
|
|
src/__pycache__/strategy.cpython-310.pyc
ADDED
|
Binary file (777 Bytes). View file
|
|
|
src/data_fetcher.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import time
|
| 3 |
+
import yfinance as yf
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import finnhub
|
| 6 |
+
import streamlit as st
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
from datetime import datetime, timedelta
|
| 9 |
+
|
| 10 |
+
# Load environment variables
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
class DataFetcher:
|
| 14 |
+
def __init__(self, ticker="^GSPC", vix_ticker="^VIX"):
|
| 15 |
+
self.ticker = ticker
|
| 16 |
+
self.vix_ticker = vix_ticker
|
| 17 |
+
|
| 18 |
+
# Initialize Finnhub Client
|
| 19 |
+
api_key = os.getenv("FINNHUB_API_KEY")
|
| 20 |
+
if not api_key:
|
| 21 |
+
raise ValueError("❌ FINNHUB_API_KEY not found in .env file!")
|
| 22 |
+
|
| 23 |
+
self.finnhub_client = finnhub.Client(api_key=api_key)
|
| 24 |
+
|
| 25 |
+
def fetch_market_data(self, days=50):
|
| 26 |
+
"""
|
| 27 |
+
Fetches raw OHLCV and VIX data from Yahoo Finance.
|
| 28 |
+
"""
|
| 29 |
+
print(f"📡 Fetching last {days} days of {self.ticker} and {self.vix_ticker}...")
|
| 30 |
+
|
| 31 |
+
# Download Index Data
|
| 32 |
+
df = yf.download(self.ticker, period=f"{days}d", interval="1d", progress=False)
|
| 33 |
+
# Download VIX
|
| 34 |
+
df_vix = yf.download(self.vix_ticker, period=f"{days}d", interval="1d", progress=False)
|
| 35 |
+
|
| 36 |
+
# Handle yfinance MultiIndex columns if they exist
|
| 37 |
+
if isinstance(df.columns, pd.MultiIndex):
|
| 38 |
+
df.columns = df.columns.get_level_values(0)
|
| 39 |
+
if isinstance(df_vix.columns, pd.MultiIndex):
|
| 40 |
+
df_vix.columns = df_vix.columns.get_level_values(0)
|
| 41 |
+
|
| 42 |
+
# Merge VIX and ensure data is clean
|
| 43 |
+
df['VIX'] = df_vix['Close']
|
| 44 |
+
df = df.ffill()
|
| 45 |
+
|
| 46 |
+
return df
|
| 47 |
+
|
| 48 |
+
# 🛡️ STREAMLIT CACHE: Ignores '_self' so it doesn't try to hash the Finnhub client.
|
| 49 |
+
# ttl=3600 caches the news for 1 hour so repeated button clicks load instantly.
|
| 50 |
+
@st.cache_data(ttl=3600, show_spinner=False)
|
| 51 |
+
def fetch_market_news(_self, days=45):
|
| 52 |
+
"""
|
| 53 |
+
Fetches historical market news by looping through days.
|
| 54 |
+
Uses 'SPY' as a proxy to allow historical date filtering on Finnhub.
|
| 55 |
+
"""
|
| 56 |
+
print(f"📰 Fetching last {days} days of market headlines...")
|
| 57 |
+
|
| 58 |
+
all_news = []
|
| 59 |
+
end_date = datetime.now()
|
| 60 |
+
|
| 61 |
+
# Try to render a Streamlit progress bar if running inside app.py
|
| 62 |
+
try:
|
| 63 |
+
progress_bar = st.progress(0, text="Fetching historical news data (avoiding rate limits)...")
|
| 64 |
+
except:
|
| 65 |
+
progress_bar = None
|
| 66 |
+
|
| 67 |
+
# Loop backwards through time, day by day
|
| 68 |
+
for i in range(days):
|
| 69 |
+
target_date = end_date - timedelta(days=i)
|
| 70 |
+
date_str = target_date.strftime('%Y-%m-%d')
|
| 71 |
+
|
| 72 |
+
try:
|
| 73 |
+
# FINNHUB TRICK: Use 'SPY' company news to get historical market coverage
|
| 74 |
+
daily_news = _self.finnhub_client.company_news('SPY', _from=date_str, to=date_str)
|
| 75 |
+
|
| 76 |
+
if daily_news:
|
| 77 |
+
all_news.extend(daily_news)
|
| 78 |
+
|
| 79 |
+
# 🛑 RATE LIMIT SHIELD: Finnhub free tier allows 60 requests/minute.
|
| 80 |
+
# Sleeping for 1.1 seconds guarantees we stay perfectly under the limit.
|
| 81 |
+
time.sleep(1.1)
|
| 82 |
+
|
| 83 |
+
except Exception as e:
|
| 84 |
+
print(f"⚠️ API Error on {date_str}: {e}")
|
| 85 |
+
time.sleep(5) # Take a longer pause if the API gets angry
|
| 86 |
+
|
| 87 |
+
# Update UI progress
|
| 88 |
+
if progress_bar:
|
| 89 |
+
progress_bar.progress((i + 1) / days, text=f"Fetched news for {date_str}...")
|
| 90 |
+
|
| 91 |
+
# Clear the progress bar when finished
|
| 92 |
+
if progress_bar:
|
| 93 |
+
progress_bar.empty()
|
| 94 |
+
|
| 95 |
+
# Convert the master list into a DataFrame
|
| 96 |
+
df_news = pd.DataFrame(all_news)
|
| 97 |
+
|
| 98 |
+
if df_news.empty:
|
| 99 |
+
print("⚠️ No news found in the specified window.")
|
| 100 |
+
return pd.DataFrame(columns=['Title', 'Date'])
|
| 101 |
+
|
| 102 |
+
# Convert Unix timestamp to YYYY-MM-DD Date object
|
| 103 |
+
df_news['Date'] = pd.to_datetime(df_news['datetime'], unit='s').dt.date
|
| 104 |
+
|
| 105 |
+
# Rename columns to match what Processor expects
|
| 106 |
+
df_news = df_news[['headline', 'Date']].rename(columns={'headline': 'Title'})
|
| 107 |
+
|
| 108 |
+
# Drop duplicates in case of overlapping API returns
|
| 109 |
+
df_news = df_news.drop_duplicates(subset=['Title', 'Date'])
|
| 110 |
+
|
| 111 |
+
print(f"✅ Successfully fetched {len(df_news)} historical headlines.")
|
| 112 |
+
return df_news
|
| 113 |
+
|
| 114 |
+
if __name__ == "__main__":
|
| 115 |
+
fetcher = DataFetcher()
|
| 116 |
+
|
| 117 |
+
# Test Market Fetch
|
| 118 |
+
market_df = fetcher.fetch_market_data(days=50)
|
| 119 |
+
print("\n--- Market Data Sample ---")
|
| 120 |
+
print(market_df.tail())
|
| 121 |
+
|
| 122 |
+
# Test News Fetch
|
| 123 |
+
news_df = fetcher.fetch_market_news(days=45)
|
| 124 |
+
print("\n--- Market News Sample ---")
|
| 125 |
+
print(news_df.head())
|
| 126 |
+
print(news_df.tail())
|
| 127 |
+
print(f"\nTotal Headlines Fetched: {len(news_df)}")
|
src/predictor.py
ADDED
|
File without changes
|
src/processor.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import torch
|
| 4 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
| 5 |
+
import joblib
|
| 6 |
+
|
| 7 |
+
class Processor:
|
| 8 |
+
def __init__(self, scaler_path="models/robust_scaler.pkl"):
|
| 9 |
+
print("⚙️ Initializing AlphaProcessor...")
|
| 10 |
+
self.device = 0 if torch.cuda.is_available() else -1
|
| 11 |
+
self.model_name = "ProsusAI/finbert"
|
| 12 |
+
|
| 13 |
+
# Load Scaler (Required for normalization before GRU)
|
| 14 |
+
try:
|
| 15 |
+
self.scaler = joblib.load(scaler_path)
|
| 16 |
+
print(f"✅ Scaler loaded from {scaler_path}")
|
| 17 |
+
except:
|
| 18 |
+
print("⚠️ Scaler not found. Ensure robust_scaler.pkl is in models/ folder.")
|
| 19 |
+
|
| 20 |
+
# Initialize FinBERT Pipeline
|
| 21 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
| 22 |
+
self.model = AutoModelForSequenceClassification.from_pretrained(
|
| 23 |
+
self.model_name, use_safetensors=True
|
| 24 |
+
)
|
| 25 |
+
self.sentiment_pipe = pipeline(
|
| 26 |
+
"sentiment-analysis",
|
| 27 |
+
model=self.model,
|
| 28 |
+
tokenizer=self.tokenizer,
|
| 29 |
+
device=self.device
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
def process(self, df_market, df_news):
|
| 33 |
+
"""
|
| 34 |
+
Modified to return metadata and historical features for Streamlit display.
|
| 35 |
+
"""
|
| 36 |
+
# 1. Process Sentiment features from headlines
|
| 37 |
+
df_sent, df_news_scored = self._generate_sentiment_profile(df_news)
|
| 38 |
+
|
| 39 |
+
# 2. Merge with market data and engineer all 14 features
|
| 40 |
+
df_features = self._engineer_14_features(df_market, df_sent)
|
| 41 |
+
|
| 42 |
+
# 3. Extract metadata for the UI (latest day's values)
|
| 43 |
+
latest_metrics = {
|
| 44 |
+
"Sent_Mean": df_features['Sent_Mean'].iloc[-1],
|
| 45 |
+
"News_Volume": np.exp(df_features['News_Volume'].iloc[-1]) - 1, # Reverse log
|
| 46 |
+
"Panic_Interaction": df_features['Sent_x_VIX'].iloc[-1],
|
| 47 |
+
"RSI": df_features['RSI'].iloc[-1] * 100
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
# 4. Get the last 30 days and scale for the Model
|
| 51 |
+
final_window = df_features.tail(30).values
|
| 52 |
+
scaled_window = self.scaler.transform(final_window)
|
| 53 |
+
input_tensor = np.expand_dims(scaled_window, axis=0).astype('float32')
|
| 54 |
+
|
| 55 |
+
# FIX: We now return df_features so app.py can plot the historical 30-day sentiment
|
| 56 |
+
return input_tensor, latest_metrics, df_features, df_news_scored
|
| 57 |
+
|
| 58 |
+
def _generate_sentiment_profile(self, df_news):
|
| 59 |
+
print("🧠 Running FinBERT Batch Analysis...")
|
| 60 |
+
titles = df_news['Title'].astype(str).tolist()
|
| 61 |
+
results = self.sentiment_pipe(titles, batch_size=32, truncation=True)
|
| 62 |
+
|
| 63 |
+
scores = []
|
| 64 |
+
for res in results:
|
| 65 |
+
label, score = res['label'].lower(), res['score']
|
| 66 |
+
scores.append(score if label == 'positive' else -score if label == 'negative' else 0.0)
|
| 67 |
+
|
| 68 |
+
df_news['Score'] = scores # Add scores to the raw news df
|
| 69 |
+
|
| 70 |
+
# Ensure dates match format for grouping
|
| 71 |
+
df_news['Date'] = pd.to_datetime(df_news['Date']).dt.date
|
| 72 |
+
grouped = df_news.groupby('Date')['Score']
|
| 73 |
+
|
| 74 |
+
daily = pd.DataFrame({
|
| 75 |
+
'Sent_Mean': grouped.mean(),
|
| 76 |
+
'Sent_Intensity': grouped.apply(lambda x: x.abs().mean()),
|
| 77 |
+
'News_Volume': np.log1p(grouped.count()),
|
| 78 |
+
'Net_Bull': grouped.apply(lambda x: x.sum() / (len(x) + 1))
|
| 79 |
+
}).fillna(0.0)
|
| 80 |
+
|
| 81 |
+
# Convert index back to datetime for merging
|
| 82 |
+
daily.index = pd.to_datetime(daily.index)
|
| 83 |
+
|
| 84 |
+
return daily, df_news
|
| 85 |
+
|
| 86 |
+
def _engineer_14_features(self, df, df_sent):
|
| 87 |
+
data = df.copy()
|
| 88 |
+
|
| 89 |
+
# --- QUANT BRANCH (7 Features) ---
|
| 90 |
+
tp = (data['High'] + data['Low'] + data['Close']) / 3
|
| 91 |
+
vwap = (tp * data['Volume']).rolling(20).sum() / (data['Volume'].rolling(20).sum() + 1e-9)
|
| 92 |
+
data['VWAP_Dist'] = np.log(data['Close'] / vwap)
|
| 93 |
+
|
| 94 |
+
delta = data['Close'].diff()
|
| 95 |
+
gain = (delta.where(delta > 0, 0)).rolling(14).mean()
|
| 96 |
+
loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
|
| 97 |
+
data['RSI'] = (100 - (100 / (1 + (gain/(loss + 1e-9))))) / 100.0
|
| 98 |
+
|
| 99 |
+
ema_12, ema_26 = data['Close'].ewm(span=12).mean(), data['Close'].ewm(span=26).mean()
|
| 100 |
+
data['MACD_Hist'] = ((ema_12 - ema_26) - (ema_12 - ema_26).ewm(span=9).mean()) / data['Close']
|
| 101 |
+
|
| 102 |
+
data['VIX_Norm'] = data['VIX'] / 100.0
|
| 103 |
+
data['VIX_Change'] = data['VIX'].pct_change()
|
| 104 |
+
|
| 105 |
+
tr = pd.concat([data['High']-data['Low'], abs(data['High']-data['Close'].shift()),
|
| 106 |
+
abs(data['Low']-data['Close'].shift())], axis=1).max(axis=1)
|
| 107 |
+
data['ATR_Dist'] = np.tanh((data['Close'] - data['Close'].rolling(22).mean()) / (tr.rolling(14).mean() + 1e-9))
|
| 108 |
+
data['Realized_Vol'] = data['Close'].pct_change().rolling(10).std() * 10
|
| 109 |
+
|
| 110 |
+
# --- SENTIMENT BRANCH (7 Features) ---
|
| 111 |
+
# Ensure indices match for joining
|
| 112 |
+
data.index = pd.to_datetime(data.index)
|
| 113 |
+
data = data.join(df_sent, how='left').fillna(0.0)
|
| 114 |
+
|
| 115 |
+
data['Sent_Mean_Delta'] = data['Sent_Mean'].diff().fillna(0.0)
|
| 116 |
+
data['Sent_Mean_EMA'] = data['Sent_Mean'].ewm(span=3).mean()
|
| 117 |
+
data['Sent_x_VIX'] = data['Sent_Mean'] * data['VIX_Norm'] # Panic Interaction
|
| 118 |
+
|
| 119 |
+
feature_cols = [
|
| 120 |
+
'VWAP_Dist', 'RSI', 'MACD_Hist', 'VIX_Norm', 'VIX_Change', 'ATR_Dist', 'Realized_Vol',
|
| 121 |
+
'Sent_Mean', 'Sent_Intensity', 'News_Volume', 'Net_Bull', 'Sent_Mean_Delta', 'Sent_Mean_EMA', 'Sent_x_VIX'
|
| 122 |
+
]
|
| 123 |
+
return data[feature_cols].dropna()
|
src/strategy.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# src/strategy.py
|
| 2 |
+
|
| 3 |
+
def get_market_regime(prediction_prob):
|
| 4 |
+
"""
|
| 5 |
+
Translates model probability into actionable trading zones.
|
| 6 |
+
"""
|
| 7 |
+
if prediction_prob >= 0.578:
|
| 8 |
+
return {
|
| 9 |
+
"zone": "GREEN ZONE (Sniper)",
|
| 10 |
+
"color": "green",
|
| 11 |
+
"icon": "🚀",
|
| 12 |
+
"action": "Move/Stay to 3x S&P 500 (SPXL/UPRO)",
|
| 13 |
+
"tag": "High Conviction Bullish"
|
| 14 |
+
}
|
| 15 |
+
elif 0.530 <= prediction_prob < 0.578:
|
| 16 |
+
return {
|
| 17 |
+
"zone": "YELLOW ZONE (Normal)",
|
| 18 |
+
"color": "orange",
|
| 19 |
+
"icon": "⚖️",
|
| 20 |
+
"action": "Move/Stay to 1x S&P 500 (SPY/VOO)",
|
| 21 |
+
"tag": "Standard Market Beta"
|
| 22 |
+
}
|
| 23 |
+
else:
|
| 24 |
+
return {
|
| 25 |
+
"zone": "RED ZONE (Cash)",
|
| 26 |
+
"color": "red",
|
| 27 |
+
"icon": "🛡️",
|
| 28 |
+
"action": "Move/Stay to CASH (0x)",
|
| 29 |
+
"tag": "Risk Aversion / Defensive"
|
| 30 |
+
}
|