Spaces:

DevKX
/

alpha-predict

Sleeping

App Files Files Community

DevKX commited on Feb 20

Commit

4cb21eb

verified ·

1 Parent(s): 2951b30

Upload 14 files

Browse files

Files changed (15) hide show

.gitattributes +1 -0
DockerFile +20 -0
app.py +116 -0
models/gru_booster_model.keras +3 -0
models/robust_scaler.pkl +3 -0
requirements.txt +17 -2
src/__init__.py +0 -0
src/__pycache__/__init__.cpython-310.pyc +0 -0
src/__pycache__/data_fetcher.cpython-310.pyc +0 -0
src/__pycache__/processor.cpython-310.pyc +0 -0
src/__pycache__/strategy.cpython-310.pyc +0 -0
src/data_fetcher.py +127 -0
src/predictor.py +0 -0
src/processor.py +123 -0
src/strategy.py +30 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+models/gru_booster_model.keras filter=lfs diff=lfs merge=lfs -text

DockerFile ADDED Viewed

	@@ -0,0 +1,20 @@

+# Use an official Python runtime as a parent image
+FROM python:3.10-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file into the container
+COPY requirements.txt .
+# Install your dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of your app's code
+COPY . .
+# Expose the port Streamlit runs on
+EXPOSE 8501
+# Command to run your app
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import streamlit as st
+import tensorflow as tf
+import pandas as pd
+import numpy as np
+from src.data_fetcher import DataFetcher
+from src.processor import Processor
+from src.strategy import get_market_regime
+st.set_page_config(page_title="Alpha Predict", page_icon="🏹", layout="wide")
+@st.cache_resource
+def load_alpha_model():
+    return tf.keras.models.load_model("models/gru_booster_model.keras")
+@st.cache_resource
+def load_processor():
+    return Processor()
+def main():
+    st.title("🏹 Alpha Predict")
+    st.markdown("---")
+    with st.sidebar:
+        st.header("📖 Strategy Logic")
+        st.markdown("""
+        **Objective:** Directional probability for the next session.
+        - **🟢 GREEN (≥ 57.8%)**: 3x Leverage (SPXL/UPRO)
+        - **🟡 YELLOW (53.0% - 57.7%)**: 1x Exposure (SPY/VOO)
+        - **🔴 RED (< 53.0%)**: Cash (0x)
+        """)
+        st.divider()
+        st.caption("Alpha Predict")
+    fetcher = DataFetcher()
+    processor = load_processor()
+    model = load_alpha_model()
+    if st.button("Generate Today's Signal", type="primary"):
+        with st.spinner("🔭 Analyzing Market Nervous System..."):
+            # 1. Fetch data (Fetch 60 days to allow for the 26-day MACD EMA dropping NaNs)
+            market_df = fetcher.fetch_market_data(days=60)
+            news_df = fetcher.fetch_market_news(days=45)
+            st.warning(f"Earliest news fetched: {news_df['Date'].min()} | Total Headlines: {len(news_df)}")
+            # 2. Process - Now unpacking 4 items (df_features gives us the history!)
+            input_tensor, metrics, df_features, scored_news = processor.process(market_df, news_df)
+            # 3. Predict
+            prediction_prob = float(model.predict(input_tensor)[0][0])
+            # 4. Get Strategy Regime
+            regime = get_market_regime(prediction_prob)
+            # 5. UI Metrics
+            latest_vix = market_df['VIX'].iloc[-1]
+            prev_vix = market_df['VIX'].iloc[-2]
+            current_price = market_df['Close'].iloc[-1]
+            col1, col2, col3 = st.columns(3)
+            col1.metric("S&P 500 Baseline", f"${current_price:,.2f}")
+            col2.metric("VIX (Fear Gauge)", f"{latest_vix:.2f}",
+                        delta=f"{latest_vix - prev_vix:+.2f} Fear", delta_color="inverse")
+            col3.metric("FinBERT Sentiment", f"{metrics['Sent_Mean']:+.2f}",
+                        delta=f"{int(metrics['News_Volume'])} Headlines")
+            with st.expander("📊 How to interpret these values?"):
+                c1, c2 = st.columns(2)
+                c1.markdown("**VIX:** <15 Calm, 15-25 Normal, >25 Panic.")
+                c2.markdown("**Sentiment:** >+0.1 Bullish, ±0.1 Neutral, <-0.1 Bearish.")
+            st.divider()
+            # --- REGIME DISPLAY ---
+            st.subheader(f"{regime['icon']} Current Regime: :{regime['color']}[{regime['zone']}]")
+            res1, res2 = st.columns([1, 2])
+            res1.metric("Bullish Probability", f"{prediction_prob:.2%}")
+            res2.info(f"**Recommended Action:** {regime['action']}")
+            # --- LOGIC BREAKDOWN ---
+            st.write("### 🧠 Logic Breakdown (Last 30 Days)")
+            e_col1, e_col2 = st.columns(2)
+            with e_col1:
+                st.write("**Volatility Regime (VIX)**")
+                vix_trend = market_df['VIX'].tail(30)
+                st.line_chart(vix_trend)
+                vix_slope = vix_trend.iloc[-1] - vix_trend.iloc[0]
+                if vix_slope > 2:
+                    st.warning(f"⚠️ Volatility is **trending up** (+{vix_slope:.1f}pts). The AI sees rising instability.")
+                elif vix_slope < -2:
+                    st.success(f"✅ Volatility is **cooling off** ({vix_slope:.1f}pts). This supports the bullish case.")
+                else:
+                    st.info("⚖️ Volatility is sideways. The model is focused on other factors.")
+            with e_col2:
+                st.write("**Sentiment Momentum (FinBERT)**")
+                # FIX: We now pull the historical trend from df_features!
+                sent_trend = df_features['Sent_Mean'].tail(30)
+                st.area_chart(sent_trend)
+                avg_30d = sent_trend.mean()
+                st.write(f"30-Day Avg Sentiment: **{avg_30d:+.2f}**")
+                if metrics['Sent_Mean'] > avg_30d:
+                    st.write("📈 Today's news is **stronger** than the monthly average.")
+                else:
+                    st.write("📉 Today's news is **weaker** than the monthly average.")
+if __name__ == "__main__":
+    main()

models/gru_booster_model.keras ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4de2e84278571c819c366d8b62dea18ee56128ccdd38f6879979e9b74e6c70f
+size 514054

models/robust_scaler.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:72212a8ecadcea9b19932600b07a132cda24935ff987afdd45e68c42ac970ff8
+size 727

requirements.txt CHANGED Viewed

@@ -1,3 +1,18 @@
-altair
 pandas
-streamlit

+# Core Data & Math
+numpy
 pandas
+yfinance
+scipy
+scikit-learn
+finnhub-python
+joblib
+# Deep Learning
+tensorflow>=2.16.1
+torch
+transformers
+accelerate
+# App & Environment
+streamlit
+python-dotenv

src/__init__.py ADDED Viewed

File without changes

src/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (138 Bytes). View file

src/__pycache__/data_fetcher.cpython-310.pyc ADDED Viewed

Binary file (3.28 kB). View file

src/__pycache__/processor.cpython-310.pyc ADDED Viewed

Binary file (4.27 kB). View file

src/__pycache__/strategy.cpython-310.pyc ADDED Viewed

Binary file (777 Bytes). View file

src/data_fetcher.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import os
+import time
+import yfinance as yf
+import pandas as pd
+import finnhub
+import streamlit as st
+from dotenv import load_dotenv
+from datetime import datetime, timedelta
+# Load environment variables
+load_dotenv()
+class DataFetcher:
+    def __init__(self, ticker="^GSPC", vix_ticker="^VIX"):
+        self.ticker = ticker
+        self.vix_ticker = vix_ticker
+        # Initialize Finnhub Client
+        api_key = os.getenv("FINNHUB_API_KEY")
+        if not api_key:
+            raise ValueError("❌ FINNHUB_API_KEY not found in .env file!")
+        self.finnhub_client = finnhub.Client(api_key=api_key)
+    def fetch_market_data(self, days=50):
+        """
+        Fetches raw OHLCV and VIX data from Yahoo Finance.
+        """
+        print(f"📡 Fetching last {days} days of {self.ticker} and {self.vix_ticker}...")
+        # Download Index Data
+        df = yf.download(self.ticker, period=f"{days}d", interval="1d", progress=False)
+        # Download VIX
+        df_vix = yf.download(self.vix_ticker, period=f"{days}d", interval="1d", progress=False)
+        # Handle yfinance MultiIndex columns if they exist
+        if isinstance(df.columns, pd.MultiIndex):
+            df.columns = df.columns.get_level_values(0)
+        if isinstance(df_vix.columns, pd.MultiIndex):
+            df_vix.columns = df_vix.columns.get_level_values(0)
+        # Merge VIX and ensure data is clean
+        df['VIX'] = df_vix['Close']
+        df = df.ffill()
+        return df
+    # 🛡️ STREAMLIT CACHE: Ignores '_self' so it doesn't try to hash the Finnhub client.
+    # ttl=3600 caches the news for 1 hour so repeated button clicks load instantly.
+    @st.cache_data(ttl=3600, show_spinner=False)
+    def fetch_market_news(_self, days=45):
+        """
+        Fetches historical market news by looping through days.
+        Uses 'SPY' as a proxy to allow historical date filtering on Finnhub.
+        """
+        print(f"📰 Fetching last {days} days of market headlines...")
+        all_news = []
+        end_date = datetime.now()
+        # Try to render a Streamlit progress bar if running inside app.py
+        try:
+            progress_bar = st.progress(0, text="Fetching historical news data (avoiding rate limits)...")
+        except:
+            progress_bar = None
+        # Loop backwards through time, day by day
+        for i in range(days):
+            target_date = end_date - timedelta(days=i)
+            date_str = target_date.strftime('%Y-%m-%d')
+            try:
+                # FINNHUB TRICK: Use 'SPY' company news to get historical market coverage
+                daily_news = _self.finnhub_client.company_news('SPY', _from=date_str, to=date_str)
+                if daily_news:
+                    all_news.extend(daily_news)
+                # 🛑 RATE LIMIT SHIELD: Finnhub free tier allows 60 requests/minute.
+                # Sleeping for 1.1 seconds guarantees we stay perfectly under the limit.
+                time.sleep(1.1)
+            except Exception as e:
+                print(f"⚠️ API Error on {date_str}: {e}")
+                time.sleep(5) # Take a longer pause if the API gets angry
+            # Update UI progress
+            if progress_bar:
+                progress_bar.progress((i + 1) / days, text=f"Fetched news for {date_str}...")
+        # Clear the progress bar when finished
+        if progress_bar:
+            progress_bar.empty()
+        # Convert the master list into a DataFrame
+        df_news = pd.DataFrame(all_news)
+        if df_news.empty:
+            print("⚠️ No news found in the specified window.")
+            return pd.DataFrame(columns=['Title', 'Date'])
+        # Convert Unix timestamp to YYYY-MM-DD Date object
+        df_news['Date'] = pd.to_datetime(df_news['datetime'], unit='s').dt.date
+        # Rename columns to match what Processor expects
+        df_news = df_news[['headline', 'Date']].rename(columns={'headline': 'Title'})
+        # Drop duplicates in case of overlapping API returns
+        df_news = df_news.drop_duplicates(subset=['Title', 'Date'])
+        print(f"✅ Successfully fetched {len(df_news)} historical headlines.")
+        return df_news
+if __name__ == "__main__":
+    fetcher = DataFetcher()
+    # Test Market Fetch
+    market_df = fetcher.fetch_market_data(days=50)
+    print("\n--- Market Data Sample ---")
+    print(market_df.tail())
+    # Test News Fetch
+    news_df = fetcher.fetch_market_news(days=45)
+    print("\n--- Market News Sample ---")
+    print(news_df.head())
+    print(news_df.tail())
+    print(f"\nTotal Headlines Fetched: {len(news_df)}")

src/predictor.py ADDED Viewed

File without changes

src/processor.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import pandas as pd
+import numpy as np
+import torch
+from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
+import joblib
+class Processor:
+    def __init__(self, scaler_path="models/robust_scaler.pkl"):
+        print("⚙️ Initializing AlphaProcessor...")
+        self.device = 0 if torch.cuda.is_available() else -1
+        self.model_name = "ProsusAI/finbert"
+        # Load Scaler (Required for normalization before GRU)
+        try:
+            self.scaler = joblib.load(scaler_path)
+            print(f"✅ Scaler loaded from {scaler_path}")
+        except:
+            print("⚠️ Scaler not found. Ensure robust_scaler.pkl is in models/ folder.")
+        # Initialize FinBERT Pipeline
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            self.model_name, use_safetensors=True
+        )
+        self.sentiment_pipe = pipeline(
+            "sentiment-analysis",
+            model=self.model,
+            tokenizer=self.tokenizer,
+            device=self.device
+        )
+    def process(self, df_market, df_news):
+        """
+        Modified to return metadata and historical features for Streamlit display.
+        """
+        # 1. Process Sentiment features from headlines
+        df_sent, df_news_scored = self._generate_sentiment_profile(df_news)
+        # 2. Merge with market data and engineer all 14 features
+        df_features = self._engineer_14_features(df_market, df_sent)
+        # 3. Extract metadata for the UI (latest day's values)
+        latest_metrics = {
+            "Sent_Mean": df_features['Sent_Mean'].iloc[-1],
+            "News_Volume": np.exp(df_features['News_Volume'].iloc[-1]) - 1, # Reverse log
+            "Panic_Interaction": df_features['Sent_x_VIX'].iloc[-1],
+            "RSI": df_features['RSI'].iloc[-1] * 100
+        }
+        # 4. Get the last 30 days and scale for the Model
+        final_window = df_features.tail(30).values
+        scaled_window = self.scaler.transform(final_window)
+        input_tensor = np.expand_dims(scaled_window, axis=0).astype('float32')
+        # FIX: We now return df_features so app.py can plot the historical 30-day sentiment
+        return input_tensor, latest_metrics, df_features, df_news_scored
+    def _generate_sentiment_profile(self, df_news):
+        print("🧠 Running FinBERT Batch Analysis...")
+        titles = df_news['Title'].astype(str).tolist()
+        results = self.sentiment_pipe(titles, batch_size=32, truncation=True)
+        scores = []
+        for res in results:
+            label, score = res['label'].lower(), res['score']
+            scores.append(score if label == 'positive' else -score if label == 'negative' else 0.0)
+        df_news['Score'] = scores # Add scores to the raw news df
+        # Ensure dates match format for grouping
+        df_news['Date'] = pd.to_datetime(df_news['Date']).dt.date
+        grouped = df_news.groupby('Date')['Score']
+        daily = pd.DataFrame({
+            'Sent_Mean': grouped.mean(),
+            'Sent_Intensity': grouped.apply(lambda x: x.abs().mean()),
+            'News_Volume': np.log1p(grouped.count()),
+            'Net_Bull': grouped.apply(lambda x: x.sum() / (len(x) + 1))
+        }).fillna(0.0)
+        # Convert index back to datetime for merging
+        daily.index = pd.to_datetime(daily.index)
+        return daily, df_news
+    def _engineer_14_features(self, df, df_sent):
+        data = df.copy()
+        # --- QUANT BRANCH (7 Features) ---
+        tp = (data['High'] + data['Low'] + data['Close']) / 3
+        vwap = (tp * data['Volume']).rolling(20).sum() / (data['Volume'].rolling(20).sum() + 1e-9)
+        data['VWAP_Dist'] = np.log(data['Close'] / vwap)
+        delta = data['Close'].diff()
+        gain = (delta.where(delta > 0, 0)).rolling(14).mean()
+        loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
+        data['RSI'] = (100 - (100 / (1 + (gain/(loss + 1e-9))))) / 100.0
+        ema_12, ema_26 = data['Close'].ewm(span=12).mean(), data['Close'].ewm(span=26).mean()
+        data['MACD_Hist'] = ((ema_12 - ema_26) - (ema_12 - ema_26).ewm(span=9).mean()) / data['Close']
+        data['VIX_Norm'] = data['VIX'] / 100.0
+        data['VIX_Change'] = data['VIX'].pct_change()
+        tr = pd.concat([data['High']-data['Low'], abs(data['High']-data['Close'].shift()),
+                       abs(data['Low']-data['Close'].shift())], axis=1).max(axis=1)
+        data['ATR_Dist'] = np.tanh((data['Close'] - data['Close'].rolling(22).mean()) / (tr.rolling(14).mean() + 1e-9))
+        data['Realized_Vol'] = data['Close'].pct_change().rolling(10).std() * 10
+        # --- SENTIMENT BRANCH (7 Features) ---
+        # Ensure indices match for joining
+        data.index = pd.to_datetime(data.index)
+        data = data.join(df_sent, how='left').fillna(0.0)
+        data['Sent_Mean_Delta'] = data['Sent_Mean'].diff().fillna(0.0)
+        data['Sent_Mean_EMA'] = data['Sent_Mean'].ewm(span=3).mean()
+        data['Sent_x_VIX'] = data['Sent_Mean'] * data['VIX_Norm'] # Panic Interaction
+        feature_cols = [
+            'VWAP_Dist', 'RSI', 'MACD_Hist', 'VIX_Norm', 'VIX_Change', 'ATR_Dist', 'Realized_Vol',
+            'Sent_Mean', 'Sent_Intensity', 'News_Volume', 'Net_Bull', 'Sent_Mean_Delta', 'Sent_Mean_EMA', 'Sent_x_VIX'
+        ]
+        return data[feature_cols].dropna()

src/strategy.py ADDED Viewed

	@@ -0,0 +1,30 @@

+# src/strategy.py
+def get_market_regime(prediction_prob):
+    """
+    Translates model probability into actionable trading zones.
+    """
+    if prediction_prob >= 0.578:
+        return {
+            "zone": "GREEN ZONE (Sniper)",
+            "color": "green",
+            "icon": "🚀",
+            "action": "Move/Stay to 3x S&P 500 (SPXL/UPRO)",
+            "tag": "High Conviction Bullish"
+        }
+    elif 0.530 <= prediction_prob < 0.578:
+        return {
+            "zone": "YELLOW ZONE (Normal)",
+            "color": "orange",
+            "icon": "⚖️",
+            "action": "Move/Stay to 1x S&P 500 (SPY/VOO)",
+            "tag": "Standard Market Beta"
+        }
+    else:
+        return {
+            "zone": "RED ZONE (Cash)",
+            "color": "red",
+            "icon": "🛡️",
+            "action": "Move/Stay to CASH (0x)",
+            "tag": "Risk Aversion / Defensive"
+        }