Spaces:

DevKX
/

alpha-predict

Sleeping

App Files Files Community

DevKX commited on Feb 20

Commit

6ccd6c8

verified ·

1 Parent(s): 6a10e86

Delete data_fetcher.py

Browse files

Files changed (1) hide show

data_fetcher.py +0 -142

data_fetcher.py DELETED Viewed

@@ -1,142 +0,0 @@
-import os
-import time
-import yfinance as yf
-import pandas as pd
-import finnhub
-import streamlit as st
-from dotenv import load_dotenv
-from datetime import datetime, timedelta
-# Load environment variables
-load_dotenv()
-class DataFetcher:
-    def __init__(self, ticker="^GSPC", vix_ticker="^VIX"):
-        self.ticker = ticker
-        self.vix_ticker = vix_ticker
-        # Initialize Finnhub Client
-        api_key = os.getenv("FINNHUB_API_KEY")
-        if not api_key:
-            raise ValueError("❌ FINNHUB_API_KEY not found in .env file!")
-        self.finnhub_client = finnhub.Client(api_key=api_key)
-    def fetch_market_data(self, days=50):
-        """
-        Fetches raw OHLCV and VIX data from Yahoo Finance.
-        Falls back to local CSV in the data/ folder if Yahoo blocks the server IP.
-        """
-        print(f"📡 Attempting to fetch last {days} days of {self.ticker} and {self.vix_ticker}...")
-        try:
-            # 1. TRY TO FETCH LIVE DATA
-            #df = yf.download(self.ticker, period=f"{days}d", interval="1d", progress=False)
-            #df_vix = yf.download(self.vix_ticker, period=f"{days}d", interval="1d", progress=False)
-            # Handle yfinance MultiIndex columns if they exist
-            if isinstance(df.columns, pd.MultiIndex):
-                df.columns = df.columns.get_level_values(0)
-            if isinstance(df_vix.columns, pd.MultiIndex):
-                df_vix.columns = df_vix.columns.get_level_values(0)
-            df['VIX'] = df_vix['Close']
-            df = df.ffill()
-            # If the dataframe is empty (Yahoo stealth-blocked us), force an error
-            if df.empty:
-                raise ValueError("Yahoo Finance returned empty data.")
-            return df
-        except Exception as e:
-            # 2. FALLBACK TO LOCAL CSV IF BLOCKED
-            print(f"⚠️ Live fetch failed ({e}). Loading backup data from data/ folder...")
-            # Load the CSV from your new data folder
-            backup_path = "data/market_data_backup.csv"
-            df_backup = pd.read_csv(backup_path, index_col=0, parse_dates=True)
-            # Return only the requested number of days
-            return df_backup.tail(days)
-    # 🛡️ STREAMLIT CACHE: Ignores '_self' so it doesn't try to hash the Finnhub client.
-    # ttl=3600 caches the news for 1 hour so repeated button clicks load instantly.
-    @st.cache_data(ttl=3600, show_spinner=False)
-    def fetch_market_news(_self, days=45):
-        """
-        Fetches historical market news by looping through days.
-        Uses 'SPY' as a proxy to allow historical date filtering on Finnhub.
-        """
-        print(f"📰 Fetching last {days} days of market headlines...")
-        all_news = []
-        end_date = datetime.now()
-        # Try to render a Streamlit progress bar if running inside app.py
-        try:
-            progress_bar = st.progress(0, text="Fetching historical news data (avoiding rate limits)...")
-        except:
-            progress_bar = None
-        # Loop backwards through time, day by day
-        for i in range(days):
-            target_date = end_date - timedelta(days=i)
-            date_str = target_date.strftime('%Y-%m-%d')
-            try:
-                # FINNHUB TRICK: Use 'SPY' company news to get historical market coverage
-                daily_news = _self.finnhub_client.company_news('SPY', _from=date_str, to=date_str)
-                if daily_news:
-                    all_news.extend(daily_news)
-                # 🛑 RATE LIMIT SHIELD: Finnhub free tier allows 60 requests/minute.
-                # Sleeping for 1.1 seconds guarantees we stay perfectly under the limit.
-                time.sleep(1.1)
-            except Exception as e:
-                print(f"⚠️ API Error on {date_str}: {e}")
-                time.sleep(5) # Take a longer pause if the API gets angry
-            # Update UI progress
-            if progress_bar:
-                progress_bar.progress((i + 1) / days, text=f"Fetched news for {date_str}...")
-        # Clear the progress bar when finished
-        if progress_bar:
-            progress_bar.empty()
-        # Convert the master list into a DataFrame
-        df_news = pd.DataFrame(all_news)
-        if df_news.empty:
-            print("⚠️ No news found in the specified window.")
-            return pd.DataFrame(columns=['Title', 'Date'])
-        # Convert Unix timestamp to YYYY-MM-DD Date object
-        df_news['Date'] = pd.to_datetime(df_news['datetime'], unit='s').dt.date
-        # Rename columns to match what Processor expects
-        df_news = df_news[['headline', 'Date']].rename(columns={'headline': 'Title'})
-        # Drop duplicates in case of overlapping API returns
-        df_news = df_news.drop_duplicates(subset=['Title', 'Date'])
-        print(f"✅ Successfully fetched {len(df_news)} historical headlines.")
-        return df_news
-if __name__ == "__main__":
-    fetcher = DataFetcher()
-    # Test Market Fetch
-    market_df = fetcher.fetch_market_data(days=50)
-    print("\n--- Market Data Sample ---")
-    print(market_df.tail())
-    # Test News Fetch
-    news_df = fetcher.fetch_market_news(days=45)
-    print("\n--- Market News Sample ---")
-    print(news_df.head())
-    print(news_df.tail())
-    print(f"\nTotal Headlines Fetched: {len(news_df)}")