DevKX commited on
Commit
4cb21eb
·
verified ·
1 Parent(s): 2951b30

Upload 14 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ models/gru_booster_model.keras filter=lfs diff=lfs merge=lfs -text
DockerFile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file into the container
8
+ COPY requirements.txt .
9
+
10
+ # Install your dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy the rest of your app's code
14
+ COPY . .
15
+
16
+ # Expose the port Streamlit runs on
17
+ EXPOSE 8501
18
+
19
+ # Command to run your app
20
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import tensorflow as tf
3
+ import pandas as pd
4
+ import numpy as np
5
+ from src.data_fetcher import DataFetcher
6
+ from src.processor import Processor
7
+ from src.strategy import get_market_regime
8
+
9
+ st.set_page_config(page_title="Alpha Predict", page_icon="🏹", layout="wide")
10
+
11
+ @st.cache_resource
12
+ def load_alpha_model():
13
+ return tf.keras.models.load_model("models/gru_booster_model.keras")
14
+
15
+ @st.cache_resource
16
+ def load_processor():
17
+ return Processor()
18
+
19
+ def main():
20
+ st.title("🏹 Alpha Predict")
21
+ st.markdown("---")
22
+
23
+ with st.sidebar:
24
+ st.header("📖 Strategy Logic")
25
+ st.markdown("""
26
+ **Objective:** Directional probability for the next session.
27
+ - **🟢 GREEN (≥ 57.8%)**: 3x Leverage (SPXL/UPRO)
28
+ - **🟡 YELLOW (53.0% - 57.7%)**: 1x Exposure (SPY/VOO)
29
+ - **🔴 RED (< 53.0%)**: Cash (0x)
30
+ """)
31
+ st.divider()
32
+ st.caption("Alpha Predict")
33
+
34
+ fetcher = DataFetcher()
35
+ processor = load_processor()
36
+ model = load_alpha_model()
37
+
38
+ if st.button("Generate Today's Signal", type="primary"):
39
+ with st.spinner("🔭 Analyzing Market Nervous System..."):
40
+
41
+ # 1. Fetch data (Fetch 60 days to allow for the 26-day MACD EMA dropping NaNs)
42
+ market_df = fetcher.fetch_market_data(days=60)
43
+ news_df = fetcher.fetch_market_news(days=45)
44
+
45
+ st.warning(f"Earliest news fetched: {news_df['Date'].min()} | Total Headlines: {len(news_df)}")
46
+
47
+ # 2. Process - Now unpacking 4 items (df_features gives us the history!)
48
+ input_tensor, metrics, df_features, scored_news = processor.process(market_df, news_df)
49
+
50
+ # 3. Predict
51
+ prediction_prob = float(model.predict(input_tensor)[0][0])
52
+
53
+ # 4. Get Strategy Regime
54
+ regime = get_market_regime(prediction_prob)
55
+
56
+ # 5. UI Metrics
57
+ latest_vix = market_df['VIX'].iloc[-1]
58
+ prev_vix = market_df['VIX'].iloc[-2]
59
+ current_price = market_df['Close'].iloc[-1]
60
+
61
+ col1, col2, col3 = st.columns(3)
62
+ col1.metric("S&P 500 Baseline", f"${current_price:,.2f}")
63
+ col2.metric("VIX (Fear Gauge)", f"{latest_vix:.2f}",
64
+ delta=f"{latest_vix - prev_vix:+.2f} Fear", delta_color="inverse")
65
+ col3.metric("FinBERT Sentiment", f"{metrics['Sent_Mean']:+.2f}",
66
+ delta=f"{int(metrics['News_Volume'])} Headlines")
67
+
68
+ with st.expander("📊 How to interpret these values?"):
69
+ c1, c2 = st.columns(2)
70
+ c1.markdown("**VIX:** <15 Calm, 15-25 Normal, >25 Panic.")
71
+ c2.markdown("**Sentiment:** >+0.1 Bullish, ±0.1 Neutral, <-0.1 Bearish.")
72
+
73
+ st.divider()
74
+
75
+ # --- REGIME DISPLAY ---
76
+ st.subheader(f"{regime['icon']} Current Regime: :{regime['color']}[{regime['zone']}]")
77
+
78
+ res1, res2 = st.columns([1, 2])
79
+ res1.metric("Bullish Probability", f"{prediction_prob:.2%}")
80
+ res2.info(f"**Recommended Action:** {regime['action']}")
81
+
82
+ # --- LOGIC BREAKDOWN ---
83
+ st.write("### 🧠 Logic Breakdown (Last 30 Days)")
84
+
85
+ e_col1, e_col2 = st.columns(2)
86
+
87
+ with e_col1:
88
+ st.write("**Volatility Regime (VIX)**")
89
+ vix_trend = market_df['VIX'].tail(30)
90
+ st.line_chart(vix_trend)
91
+
92
+ vix_slope = vix_trend.iloc[-1] - vix_trend.iloc[0]
93
+ if vix_slope > 2:
94
+ st.warning(f"⚠️ Volatility is **trending up** (+{vix_slope:.1f}pts). The AI sees rising instability.")
95
+ elif vix_slope < -2:
96
+ st.success(f"✅ Volatility is **cooling off** ({vix_slope:.1f}pts). This supports the bullish case.")
97
+ else:
98
+ st.info("⚖️ Volatility is sideways. The model is focused on other factors.")
99
+
100
+ with e_col2:
101
+ st.write("**Sentiment Momentum (FinBERT)**")
102
+
103
+ # FIX: We now pull the historical trend from df_features!
104
+ sent_trend = df_features['Sent_Mean'].tail(30)
105
+ st.area_chart(sent_trend)
106
+
107
+ avg_30d = sent_trend.mean()
108
+ st.write(f"30-Day Avg Sentiment: **{avg_30d:+.2f}**")
109
+
110
+ if metrics['Sent_Mean'] > avg_30d:
111
+ st.write("📈 Today's news is **stronger** than the monthly average.")
112
+ else:
113
+ st.write("📉 Today's news is **weaker** than the monthly average.")
114
+
115
+ if __name__ == "__main__":
116
+ main()
models/gru_booster_model.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4de2e84278571c819c366d8b62dea18ee56128ccdd38f6879979e9b74e6c70f
3
+ size 514054
models/robust_scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72212a8ecadcea9b19932600b07a132cda24935ff987afdd45e68c42ac970ff8
3
+ size 727
requirements.txt CHANGED
@@ -1,3 +1,18 @@
1
- altair
 
2
  pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core Data & Math
2
+ numpy
3
  pandas
4
+ yfinance
5
+ scipy
6
+ scikit-learn
7
+ finnhub-python
8
+ joblib
9
+
10
+ # Deep Learning
11
+ tensorflow>=2.16.1
12
+ torch
13
+ transformers
14
+ accelerate
15
+
16
+ # App & Environment
17
+ streamlit
18
+ python-dotenv
src/__init__.py ADDED
File without changes
src/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (138 Bytes). View file
 
src/__pycache__/data_fetcher.cpython-310.pyc ADDED
Binary file (3.28 kB). View file
 
src/__pycache__/processor.cpython-310.pyc ADDED
Binary file (4.27 kB). View file
 
src/__pycache__/strategy.cpython-310.pyc ADDED
Binary file (777 Bytes). View file
 
src/data_fetcher.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import yfinance as yf
4
+ import pandas as pd
5
+ import finnhub
6
+ import streamlit as st
7
+ from dotenv import load_dotenv
8
+ from datetime import datetime, timedelta
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ class DataFetcher:
14
+ def __init__(self, ticker="^GSPC", vix_ticker="^VIX"):
15
+ self.ticker = ticker
16
+ self.vix_ticker = vix_ticker
17
+
18
+ # Initialize Finnhub Client
19
+ api_key = os.getenv("FINNHUB_API_KEY")
20
+ if not api_key:
21
+ raise ValueError("❌ FINNHUB_API_KEY not found in .env file!")
22
+
23
+ self.finnhub_client = finnhub.Client(api_key=api_key)
24
+
25
+ def fetch_market_data(self, days=50):
26
+ """
27
+ Fetches raw OHLCV and VIX data from Yahoo Finance.
28
+ """
29
+ print(f"📡 Fetching last {days} days of {self.ticker} and {self.vix_ticker}...")
30
+
31
+ # Download Index Data
32
+ df = yf.download(self.ticker, period=f"{days}d", interval="1d", progress=False)
33
+ # Download VIX
34
+ df_vix = yf.download(self.vix_ticker, period=f"{days}d", interval="1d", progress=False)
35
+
36
+ # Handle yfinance MultiIndex columns if they exist
37
+ if isinstance(df.columns, pd.MultiIndex):
38
+ df.columns = df.columns.get_level_values(0)
39
+ if isinstance(df_vix.columns, pd.MultiIndex):
40
+ df_vix.columns = df_vix.columns.get_level_values(0)
41
+
42
+ # Merge VIX and ensure data is clean
43
+ df['VIX'] = df_vix['Close']
44
+ df = df.ffill()
45
+
46
+ return df
47
+
48
+ # 🛡️ STREAMLIT CACHE: Ignores '_self' so it doesn't try to hash the Finnhub client.
49
+ # ttl=3600 caches the news for 1 hour so repeated button clicks load instantly.
50
+ @st.cache_data(ttl=3600, show_spinner=False)
51
+ def fetch_market_news(_self, days=45):
52
+ """
53
+ Fetches historical market news by looping through days.
54
+ Uses 'SPY' as a proxy to allow historical date filtering on Finnhub.
55
+ """
56
+ print(f"📰 Fetching last {days} days of market headlines...")
57
+
58
+ all_news = []
59
+ end_date = datetime.now()
60
+
61
+ # Try to render a Streamlit progress bar if running inside app.py
62
+ try:
63
+ progress_bar = st.progress(0, text="Fetching historical news data (avoiding rate limits)...")
64
+ except:
65
+ progress_bar = None
66
+
67
+ # Loop backwards through time, day by day
68
+ for i in range(days):
69
+ target_date = end_date - timedelta(days=i)
70
+ date_str = target_date.strftime('%Y-%m-%d')
71
+
72
+ try:
73
+ # FINNHUB TRICK: Use 'SPY' company news to get historical market coverage
74
+ daily_news = _self.finnhub_client.company_news('SPY', _from=date_str, to=date_str)
75
+
76
+ if daily_news:
77
+ all_news.extend(daily_news)
78
+
79
+ # 🛑 RATE LIMIT SHIELD: Finnhub free tier allows 60 requests/minute.
80
+ # Sleeping for 1.1 seconds guarantees we stay perfectly under the limit.
81
+ time.sleep(1.1)
82
+
83
+ except Exception as e:
84
+ print(f"⚠️ API Error on {date_str}: {e}")
85
+ time.sleep(5) # Take a longer pause if the API gets angry
86
+
87
+ # Update UI progress
88
+ if progress_bar:
89
+ progress_bar.progress((i + 1) / days, text=f"Fetched news for {date_str}...")
90
+
91
+ # Clear the progress bar when finished
92
+ if progress_bar:
93
+ progress_bar.empty()
94
+
95
+ # Convert the master list into a DataFrame
96
+ df_news = pd.DataFrame(all_news)
97
+
98
+ if df_news.empty:
99
+ print("⚠️ No news found in the specified window.")
100
+ return pd.DataFrame(columns=['Title', 'Date'])
101
+
102
+ # Convert Unix timestamp to YYYY-MM-DD Date object
103
+ df_news['Date'] = pd.to_datetime(df_news['datetime'], unit='s').dt.date
104
+
105
+ # Rename columns to match what Processor expects
106
+ df_news = df_news[['headline', 'Date']].rename(columns={'headline': 'Title'})
107
+
108
+ # Drop duplicates in case of overlapping API returns
109
+ df_news = df_news.drop_duplicates(subset=['Title', 'Date'])
110
+
111
+ print(f"✅ Successfully fetched {len(df_news)} historical headlines.")
112
+ return df_news
113
+
114
+ if __name__ == "__main__":
115
+ fetcher = DataFetcher()
116
+
117
+ # Test Market Fetch
118
+ market_df = fetcher.fetch_market_data(days=50)
119
+ print("\n--- Market Data Sample ---")
120
+ print(market_df.tail())
121
+
122
+ # Test News Fetch
123
+ news_df = fetcher.fetch_market_news(days=45)
124
+ print("\n--- Market News Sample ---")
125
+ print(news_df.head())
126
+ print(news_df.tail())
127
+ print(f"\nTotal Headlines Fetched: {len(news_df)}")
src/predictor.py ADDED
File without changes
src/processor.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import torch
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
5
+ import joblib
6
+
7
+ class Processor:
8
+ def __init__(self, scaler_path="models/robust_scaler.pkl"):
9
+ print("⚙️ Initializing AlphaProcessor...")
10
+ self.device = 0 if torch.cuda.is_available() else -1
11
+ self.model_name = "ProsusAI/finbert"
12
+
13
+ # Load Scaler (Required for normalization before GRU)
14
+ try:
15
+ self.scaler = joblib.load(scaler_path)
16
+ print(f"✅ Scaler loaded from {scaler_path}")
17
+ except:
18
+ print("⚠️ Scaler not found. Ensure robust_scaler.pkl is in models/ folder.")
19
+
20
+ # Initialize FinBERT Pipeline
21
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
22
+ self.model = AutoModelForSequenceClassification.from_pretrained(
23
+ self.model_name, use_safetensors=True
24
+ )
25
+ self.sentiment_pipe = pipeline(
26
+ "sentiment-analysis",
27
+ model=self.model,
28
+ tokenizer=self.tokenizer,
29
+ device=self.device
30
+ )
31
+
32
+ def process(self, df_market, df_news):
33
+ """
34
+ Modified to return metadata and historical features for Streamlit display.
35
+ """
36
+ # 1. Process Sentiment features from headlines
37
+ df_sent, df_news_scored = self._generate_sentiment_profile(df_news)
38
+
39
+ # 2. Merge with market data and engineer all 14 features
40
+ df_features = self._engineer_14_features(df_market, df_sent)
41
+
42
+ # 3. Extract metadata for the UI (latest day's values)
43
+ latest_metrics = {
44
+ "Sent_Mean": df_features['Sent_Mean'].iloc[-1],
45
+ "News_Volume": np.exp(df_features['News_Volume'].iloc[-1]) - 1, # Reverse log
46
+ "Panic_Interaction": df_features['Sent_x_VIX'].iloc[-1],
47
+ "RSI": df_features['RSI'].iloc[-1] * 100
48
+ }
49
+
50
+ # 4. Get the last 30 days and scale for the Model
51
+ final_window = df_features.tail(30).values
52
+ scaled_window = self.scaler.transform(final_window)
53
+ input_tensor = np.expand_dims(scaled_window, axis=0).astype('float32')
54
+
55
+ # FIX: We now return df_features so app.py can plot the historical 30-day sentiment
56
+ return input_tensor, latest_metrics, df_features, df_news_scored
57
+
58
+ def _generate_sentiment_profile(self, df_news):
59
+ print("🧠 Running FinBERT Batch Analysis...")
60
+ titles = df_news['Title'].astype(str).tolist()
61
+ results = self.sentiment_pipe(titles, batch_size=32, truncation=True)
62
+
63
+ scores = []
64
+ for res in results:
65
+ label, score = res['label'].lower(), res['score']
66
+ scores.append(score if label == 'positive' else -score if label == 'negative' else 0.0)
67
+
68
+ df_news['Score'] = scores # Add scores to the raw news df
69
+
70
+ # Ensure dates match format for grouping
71
+ df_news['Date'] = pd.to_datetime(df_news['Date']).dt.date
72
+ grouped = df_news.groupby('Date')['Score']
73
+
74
+ daily = pd.DataFrame({
75
+ 'Sent_Mean': grouped.mean(),
76
+ 'Sent_Intensity': grouped.apply(lambda x: x.abs().mean()),
77
+ 'News_Volume': np.log1p(grouped.count()),
78
+ 'Net_Bull': grouped.apply(lambda x: x.sum() / (len(x) + 1))
79
+ }).fillna(0.0)
80
+
81
+ # Convert index back to datetime for merging
82
+ daily.index = pd.to_datetime(daily.index)
83
+
84
+ return daily, df_news
85
+
86
+ def _engineer_14_features(self, df, df_sent):
87
+ data = df.copy()
88
+
89
+ # --- QUANT BRANCH (7 Features) ---
90
+ tp = (data['High'] + data['Low'] + data['Close']) / 3
91
+ vwap = (tp * data['Volume']).rolling(20).sum() / (data['Volume'].rolling(20).sum() + 1e-9)
92
+ data['VWAP_Dist'] = np.log(data['Close'] / vwap)
93
+
94
+ delta = data['Close'].diff()
95
+ gain = (delta.where(delta > 0, 0)).rolling(14).mean()
96
+ loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
97
+ data['RSI'] = (100 - (100 / (1 + (gain/(loss + 1e-9))))) / 100.0
98
+
99
+ ema_12, ema_26 = data['Close'].ewm(span=12).mean(), data['Close'].ewm(span=26).mean()
100
+ data['MACD_Hist'] = ((ema_12 - ema_26) - (ema_12 - ema_26).ewm(span=9).mean()) / data['Close']
101
+
102
+ data['VIX_Norm'] = data['VIX'] / 100.0
103
+ data['VIX_Change'] = data['VIX'].pct_change()
104
+
105
+ tr = pd.concat([data['High']-data['Low'], abs(data['High']-data['Close'].shift()),
106
+ abs(data['Low']-data['Close'].shift())], axis=1).max(axis=1)
107
+ data['ATR_Dist'] = np.tanh((data['Close'] - data['Close'].rolling(22).mean()) / (tr.rolling(14).mean() + 1e-9))
108
+ data['Realized_Vol'] = data['Close'].pct_change().rolling(10).std() * 10
109
+
110
+ # --- SENTIMENT BRANCH (7 Features) ---
111
+ # Ensure indices match for joining
112
+ data.index = pd.to_datetime(data.index)
113
+ data = data.join(df_sent, how='left').fillna(0.0)
114
+
115
+ data['Sent_Mean_Delta'] = data['Sent_Mean'].diff().fillna(0.0)
116
+ data['Sent_Mean_EMA'] = data['Sent_Mean'].ewm(span=3).mean()
117
+ data['Sent_x_VIX'] = data['Sent_Mean'] * data['VIX_Norm'] # Panic Interaction
118
+
119
+ feature_cols = [
120
+ 'VWAP_Dist', 'RSI', 'MACD_Hist', 'VIX_Norm', 'VIX_Change', 'ATR_Dist', 'Realized_Vol',
121
+ 'Sent_Mean', 'Sent_Intensity', 'News_Volume', 'Net_Bull', 'Sent_Mean_Delta', 'Sent_Mean_EMA', 'Sent_x_VIX'
122
+ ]
123
+ return data[feature_cols].dropna()
src/strategy.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/strategy.py
2
+
3
+ def get_market_regime(prediction_prob):
4
+ """
5
+ Translates model probability into actionable trading zones.
6
+ """
7
+ if prediction_prob >= 0.578:
8
+ return {
9
+ "zone": "GREEN ZONE (Sniper)",
10
+ "color": "green",
11
+ "icon": "🚀",
12
+ "action": "Move/Stay to 3x S&P 500 (SPXL/UPRO)",
13
+ "tag": "High Conviction Bullish"
14
+ }
15
+ elif 0.530 <= prediction_prob < 0.578:
16
+ return {
17
+ "zone": "YELLOW ZONE (Normal)",
18
+ "color": "orange",
19
+ "icon": "⚖️",
20
+ "action": "Move/Stay to 1x S&P 500 (SPY/VOO)",
21
+ "tag": "Standard Market Beta"
22
+ }
23
+ else:
24
+ return {
25
+ "zone": "RED ZONE (Cash)",
26
+ "color": "red",
27
+ "icon": "🛡️",
28
+ "action": "Move/Stay to CASH (0x)",
29
+ "tag": "Risk Aversion / Defensive"
30
+ }