Spaces:
Running
Running
File size: 10,837 Bytes
1f009b3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 | import os
import time
import yfinance as yf
import pandas as pd
import finnhub
import streamlit as st
import requests
from dotenv import load_dotenv
from datetime import datetime, timedelta
# Load environment variables
load_dotenv()
class DataFetcher:
def __init__(self, ticker="^GSPC", vix_ticker="%5EVIX"):
self.ticker = ticker
self.vix_ticker = vix_ticker
# Load API Keys
self.finnhub_key = os.getenv("FINNHUB_API_KEY")
self.fmp_key = os.getenv("FMP_API_KEY")
if not self.finnhub_key or not self.fmp_key:
print("β οΈ Warning: API Keys missing! Check your .env file or HF Secrets.")
# Initialize Finnhub Client for News
self.finnhub_client = finnhub.Client(api_key=self.finnhub_key)
def fetch_market_data(self, days=60):
"""Fetches live SPY data from the NEW FMP Stable API and merges VIX."""
if not self.fmp_key:
return self._load_backup(days)
try:
print(f"π‘ Fetching live data for {self.ticker} from FMP Stable API...")
spy_url = f"https://financialmodelingprep.com/stable/historical-price-eod/full?symbol={self.ticker}&apikey={self.fmp_key}"
spy_res = requests.get(spy_url, timeout=10).json()
if isinstance(spy_res, dict) and "Error Message" in spy_res:
print(f"π¨ FMP Error: {spy_res['Error Message']}")
return self._load_backup(days)
if not isinstance(spy_res, list) or len(spy_res) == 0:
return self._load_backup(days)
# Format main DataFrame
df = pd.DataFrame(spy_res)
# π‘οΈ THE FIX: Convert to datetime, strip timezones, and set to midnight
df['date'] = pd.to_datetime(df['date'])
if df['date'].dt.tz is not None:
df['date'] = df['date'].dt.tz_localize(None)
df['date'] = df['date'].dt.normalize()
df.set_index('date', inplace=True)
df = df.sort_index()[['open', 'high', 'low', 'close', 'volume']]
df.columns = [c.capitalize() for c in df.columns]
# Add VIX
df['VIX'] = self._get_vix_data()
df['VIX'] = df['VIX'].ffill().bfill()
print("β
Live market data fetched and merged successfully!")
return df.tail(days)
except Exception as e:
print(f"π¨ Major Fetch Error: {e}")
return self._load_backup(days)
def _get_vix_data(self):
"""Attempts to fetch VIX from Stable API, falls back to CSV if blocked."""
print("π‘ Attempting to fetch VIX from FMP Stable API...")
try:
vix_url = f"https://financialmodelingprep.com/stable/historical-price-eod/full?symbol={self.vix_ticker}&apikey={self.fmp_key}"
vix_res = requests.get(vix_url, timeout=5).json()
if isinstance(vix_res, list) and len(vix_res) > 0:
vix_df = pd.DataFrame(vix_res)
# π‘οΈ THE FIX: Strip timezones for VIX so it perfectly matches SPY
vix_df['date'] = pd.to_datetime(vix_df['date'])
if vix_df['date'].dt.tz is not None:
vix_df['date'] = vix_df['date'].dt.tz_localize(None)
vix_df['date'] = vix_df['date'].dt.normalize()
vix_df.set_index('date', inplace=True)
vix_df = vix_df.sort_index()
print("β
VIX fetched successfully from FMP!")
return vix_df['close']
except Exception as e:
print(f"β οΈ VIX API request failed: {e}")
print("β οΈ Pulling VIX from local backup...")
backup_path = "data/market_data_backup.csv"
if os.path.exists(backup_path):
backup_df = pd.read_csv(backup_path, index_col=0, parse_dates=True)
# Strip timezones from the backup CSV index as well!
if backup_df.index.tz is not None:
backup_df.index = backup_df.index.tz_localize(None)
backup_df.index = backup_df.index.normalize()
if 'VIX' in backup_df.columns:
return backup_df['VIX']
return 18.0
def _load_backup(self, days):
"""Failsafe method to load local CSV if API entirely blocks the request."""
print(f"π System: Loading localized market data backup...")
backup_path = "data/market_data_backup.csv"
if not os.path.exists(backup_path):
print("π¨ Market backup CSV not found!")
return pd.DataFrame()
df = pd.read_csv(backup_path, index_col=0, parse_dates=True)
return df.tail(days)
# def fetch_market_data(self, days=50):
# """
# Fetches market data using Finnhub (SPY as proxy) with a CSV fallback.
# """
# print(f"π‘ Attempting to fetch last {days} days from Finnhub (using SPY proxy)...")
# try:
# # 1. Setup Timestamps (Finnhub needs Unix seconds)
# end_ts = int(time.time())
# start_ts = int((datetime.now() - timedelta(days=days+10)).timestamp())
# # 2. Fetch SPY (S&P 500 Proxy)
# # '1' means daily candles
# res = self.finnhub_client.stock_candles('SPY', 'D', start_ts, end_ts)
# if res.get('s') != 'ok':
# raise ValueError(f"Finnhub API returned status: {res.get('s')}")
# # Convert Finnhub response to DataFrame
# df = pd.DataFrame({
# 'Date': pd.to_datetime(res['t'], unit='s'),
# 'Close': res['c'],
# 'Open': res['o'],
# 'High': res['h'],
# 'Low': res['l'],
# 'Volume': res['v']
# }).set_index('Date')
# # 3. Handle VIX (Finnhub free tier often blocks ^VIX)
# # We attempt it, but if it fails, we merge from our backup data
# try:
# vix_res = self.finnhub_client.stock_candles('VIX', 'D', start_ts, end_ts)
# if vix_res.get('s') == 'ok':
# df['VIX'] = vix_res['c']
# else:
# raise Exception("VIX not available on API")
# except:
# print("β οΈ VIX not available on Finnhub. Pulling VIX from backup...")
# backup_df = pd.read_csv("data/market_data_backup.csv", index_col=0, parse_dates=True)
# # Reindex backup to match the dates we just got from the API
# df['VIX'] = backup_df['VIX'].reindex(df.index, method='ffill')
# # Final cleanup
# df = df.ffill().dropna()
# if df.empty:
# raise ValueError("Resulting DataFrame is empty.")
# return df
# except Exception as e:
# print(f"β οΈ Finnhub fetch failed ({e}). Loading full backup from data/ folder...")
# backup_path = "data/market_data_backup.csv"
# if not os.path.exists(backup_path):
# print(f"π¨ FATAL: {backup_path} not found!")
# return pd.DataFrame() # This will trigger your safety check in Processor
# df_backup = pd.read_csv(backup_path, index_col=0, parse_dates=True)
# return df_backup.tail(days)
# π‘οΈ STREAMLIT CACHE: Ignores '_self' so it doesn't try to hash the Finnhub client.
# ttl=3600 caches the news for 1 hour so repeated button clicks load instantly.
@st.cache_data(ttl=3600, show_spinner=False)
def fetch_market_news(_self, days=45):
"""
Fetches historical market news by looping through days.
Uses 'SPY' as a proxy to allow historical date filtering on Finnhub.
"""
print(f"π° Fetching last {days} days of market headlines...")
all_news = []
end_date = datetime.now()
# Try to render a Streamlit progress bar if running inside app.py
try:
progress_bar = st.progress(0, text="Fetching historical news data (avoiding rate limits)...")
except:
progress_bar = None
# Loop backwards through time, day by day
for i in range(days):
target_date = end_date - timedelta(days=i)
date_str = target_date.strftime('%Y-%m-%d')
try:
# FINNHUB TRICK: Use 'SPY' company news to get historical market coverage
daily_news = _self.finnhub_client.company_news('SPY', _from=date_str, to=date_str)
if daily_news:
all_news.extend(daily_news)
# π RATE LIMIT SHIELD: Finnhub free tier allows 60 requests/minute.
# Sleeping for 1.1 seconds guarantees we stay perfectly under the limit.
time.sleep(1.1)
except Exception as e:
print(f"β οΈ API Error on {date_str}: {e}")
time.sleep(5) # Take a longer pause if the API gets angry
# Update UI progress
if progress_bar:
progress_bar.progress((i + 1) / days, text=f"Fetched news for {date_str}...")
# Clear the progress bar when finished
if progress_bar:
progress_bar.empty()
# Convert the master list into a DataFrame
df_news = pd.DataFrame(all_news)
if df_news.empty:
print("β οΈ No news found in the specified window.")
return pd.DataFrame(columns=['Title', 'Date'])
# Convert Unix timestamp to YYYY-MM-DD Date object
df_news['Date'] = pd.to_datetime(df_news['datetime'], unit='s').dt.date
# Rename columns to match what Processor expects
df_news = df_news[['headline', 'Date']].rename(columns={'headline': 'Title'})
# Drop duplicates in case of overlapping API returns
df_news = df_news.drop_duplicates(subset=['Title', 'Date'])
print(f"β
Successfully fetched {len(df_news)} historical headlines.")
return df_news
if __name__ == "__main__":
fetcher = DataFetcher()
# Test Market Fetch
market_df = fetcher.fetch_market_data(days=50)
print("\n--- Market Data Sample ---")
print(market_df.tail())
# Test News Fetch
news_df = fetcher.fetch_market_news(days=45)
print("\n--- Market News Sample ---")
print(news_df.head())
print(news_df.tail())
print(f"\nTotal Headlines Fetched: {len(news_df)}") |