Spaces:
Build error
Build error
| # Market data fetching service using yfinance | |
| """ | |
| This module handles fetching historical stock price data, calculating returns, | |
| volatility, and market index comparisons for evaluation purposes. | |
| """ | |
| import yfinance as yf | |
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| from typing import Dict, Optional, Tuple | |
| import logging | |
| import streamlit as st | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class MarketDataService: | |
| """ | |
| Service for fetching and processing market data for evaluation. | |
| """ | |
| def __init__(self, market_index: str = "^GSPC"): | |
| """ | |
| Initialize the market data service. | |
| Args: | |
| market_index: The market index ticker for macro adjustments (default: S&P 500) | |
| """ | |
| self.market_index = market_index | |
| # Cache for 1 hour | |
| def fetch_stock_data(_self, ticker: str, start_date: datetime, end_date: datetime) -> Optional[pd.DataFrame]: | |
| """ | |
| Fetch historical stock data for a given ticker and date range. | |
| Args: | |
| ticker: Stock ticker symbol | |
| start_date: Start date for data fetch | |
| end_date: End date for data fetch | |
| Returns: | |
| DataFrame with stock price data or None if failed | |
| """ | |
| try: | |
| logger.info(f"Fetching data for {ticker} from {start_date} to {end_date}") | |
| stock = yf.Ticker(ticker) | |
| data = stock.history(start=start_date, end=end_date) | |
| if data.empty: | |
| logger.warning(f"No data found for ticker {ticker}") | |
| return None | |
| return data | |
| except Exception as e: | |
| logger.error(f"Error fetching data for {ticker}: {str(e)}") | |
| return None | |
| def calculate_same_day_return(self, data: pd.DataFrame, news_date: datetime) -> Optional[float]: | |
| """ | |
| Calculate stock return on the same day the news was published (intraday). | |
| Args: | |
| data: Stock price DataFrame | |
| news_date: Date when news was published | |
| Returns: | |
| Intraday return percentage or None if calculation fails | |
| """ | |
| try: | |
| # Convert news_date to date only for comparison | |
| news_date_only = news_date.date() | |
| # Find the trading day that matches the news date | |
| data_dates = data.index.date | |
| matching_dates = [d for d in data_dates if d == news_date_only] | |
| if not matching_dates: | |
| # If no exact match, find the next trading day | |
| future_dates = [d for d in data_dates if d > news_date_only] | |
| if not future_dates: | |
| logger.warning(f"No trading data available for or after {news_date_only}") | |
| return None | |
| trading_date = future_dates[0] | |
| logger.info(f"News date {news_date_only} was not a trading day, using next trading day: {trading_date}") | |
| else: | |
| trading_date = matching_dates[0] | |
| # Get the day's data | |
| day_data = data[data.index.date == trading_date] | |
| if len(day_data) == 0: | |
| logger.warning(f"No trading data found for {trading_date}") | |
| return None | |
| # Calculate intraday return: (Close - Open) / Open * 100 | |
| open_price = day_data['Open'].iloc[0] | |
| close_price = day_data['Close'].iloc[-1] | |
| return_pct = ((close_price - open_price) / open_price) * 100 | |
| logger.info(f"Calculated same-day return for {trading_date}: {return_pct:.2f}% (Open: {open_price:.2f}, Close: {close_price:.2f})") | |
| return float(return_pct) | |
| except Exception as e: | |
| logger.error(f"Error calculating same-day return: {str(e)}") | |
| return None | |
| def calculate_next_24h_return(self, data: pd.DataFrame, news_date: datetime) -> Optional[float]: | |
| """ | |
| Calculate stock return over the next 24 hours after news publication. | |
| Args: | |
| data: Stock price DataFrame | |
| news_date: Date when news was published | |
| Returns: | |
| 24-hour return percentage or None if calculation fails | |
| """ | |
| try: | |
| # Convert news_date to date only for comparison | |
| news_date_only = news_date.date() | |
| # Find the trading day that matches the news date | |
| data_dates = data.index.date | |
| matching_dates = [d for d in data_dates if d == news_date_only] | |
| if not matching_dates: | |
| # If no exact match, find the next trading day | |
| future_dates = [d for d in data_dates if d > news_date_only] | |
| if not future_dates: | |
| logger.warning(f"No trading data available for or after {news_date_only}") | |
| return None | |
| start_trading_date = future_dates[0] | |
| else: | |
| start_trading_date = matching_dates[0] | |
| # Find the next trading day for 24h comparison | |
| future_dates = [d for d in data_dates if d > start_trading_date] | |
| if not future_dates: | |
| logger.warning(f"No next trading day available after {start_trading_date}") | |
| return None | |
| end_trading_date = future_dates[0] | |
| # Get start and end prices | |
| start_data = data[data.index.date == start_trading_date] | |
| end_data = data[data.index.date == end_trading_date] | |
| if len(start_data) == 0 or len(end_data) == 0: | |
| logger.warning(f"Insufficient data for 24h return calculation") | |
| return None | |
| # Use close of start day and close of next day | |
| start_price = start_data['Close'].iloc[-1] | |
| end_price = end_data['Close'].iloc[-1] | |
| return_pct = ((end_price - start_price) / start_price) * 100 | |
| logger.info(f"Calculated 24h return from {start_trading_date} to {end_trading_date}: {return_pct:.2f}%") | |
| return float(return_pct) | |
| except Exception as e: | |
| logger.error(f"Error calculating 24h return: {str(e)}") | |
| return None | |
| def calculate_return(self, data: pd.DataFrame, news_date: datetime, hours: int = 24) -> Optional[float]: | |
| """ | |
| Legacy method - now returns same-day return for compatibility. | |
| Use calculate_same_day_return() or calculate_next_24h_return() for specific needs. | |
| """ | |
| return self.calculate_same_day_return(data, news_date) | |
| def calculate_volatility(self, data: pd.DataFrame, days: int = 14) -> Optional[float]: | |
| """ | |
| Calculate rolling volatility for the stock. | |
| Args: | |
| data: Stock price DataFrame | |
| days: Number of days for volatility calculation | |
| Returns: | |
| Volatility percentage or None if calculation fails | |
| """ | |
| try: | |
| if len(data) < days: | |
| logger.warning(f"Insufficient data for {days}-day volatility calculation") | |
| return None | |
| # Calculate daily returns | |
| data['Daily_Return'] = data['Close'].pct_change() | |
| # Calculate rolling volatility (annualized) | |
| volatility = data['Daily_Return'].rolling(window=days).std() * np.sqrt(252) * 100 | |
| # Return the most recent volatility | |
| recent_volatility = volatility.dropna().iloc[-1] | |
| logger.info(f"Calculated {days}-day volatility: {recent_volatility:.2f}%") | |
| return float(recent_volatility) | |
| except Exception as e: | |
| logger.error(f"Error calculating volatility: {str(e)}") | |
| return None | |
| def get_market_return(self, news_date: datetime, hours: int = 24) -> Optional[float]: | |
| """ | |
| Get market index return for the same day as news publication. | |
| Args: | |
| news_date: Date when news was published | |
| hours: Deprecated parameter (kept for compatibility) | |
| Returns: | |
| Market return percentage for the news day or None if calculation fails | |
| """ | |
| try: | |
| # Fetch market data | |
| start_date = news_date - timedelta(days=5) # Buffer for weekends | |
| end_date = news_date + timedelta(days=5) | |
| market_data = self.fetch_stock_data(self.market_index, start_date, end_date) | |
| if market_data is None: | |
| return None | |
| return self.calculate_return(market_data, news_date, hours) | |
| except Exception as e: | |
| logger.error(f"Error getting market return: {str(e)}") | |
| return None | |
| def get_stock_evaluation_data(self, ticker: str, news_date: datetime) -> Dict: | |
| """ | |
| Get comprehensive stock data for evaluation including both same-day and 24h returns. | |
| Args: | |
| ticker: Stock ticker symbol | |
| news_date: Date when news was published | |
| Returns: | |
| Dictionary containing all relevant market data | |
| """ | |
| try: | |
| # Define date range (get extra days for volatility calculation) | |
| start_date = news_date - timedelta(days=30) | |
| end_date = news_date + timedelta(days=5) | |
| # Fetch stock data | |
| stock_data = self.fetch_stock_data(ticker, start_date, end_date) | |
| if stock_data is None: | |
| return {"error": f"Could not fetch data for ticker {ticker}"} | |
| # Calculate both same-day and 24h returns | |
| same_day_return = self.calculate_same_day_return(stock_data, news_date) | |
| next_24h_return = self.calculate_next_24h_return(stock_data, news_date) | |
| volatility_14d = self.calculate_volatility(stock_data, 14) | |
| # Get market returns for both periods | |
| market_same_day = self.get_market_return(news_date, 0) # Same day | |
| market_24h = self.get_market_return(news_date, 24) # 24h | |
| # Calculate alpha-adjusted returns | |
| alpha_same_day = None | |
| alpha_24h = None | |
| if same_day_return is not None and market_same_day is not None: | |
| alpha_same_day = same_day_return - market_same_day | |
| if next_24h_return is not None and market_24h is not None: | |
| alpha_24h = next_24h_return - market_24h | |
| return { | |
| "ticker": ticker, | |
| "return_same_day": same_day_return, | |
| "return_next_24h": next_24h_return, | |
| "return_24h": same_day_return, # Keep for compatibility with existing code | |
| "volatility_14d": volatility_14d, | |
| "market_return_same_day": market_same_day, | |
| "market_return_24h": market_24h, | |
| "market_return": market_same_day, # Keep for compatibility | |
| "alpha_same_day": alpha_same_day, | |
| "alpha_24h": alpha_24h, | |
| "alpha_adjusted": alpha_same_day, # Keep for compatibility | |
| "data_points": len(stock_data), | |
| "date_range": { | |
| "start": stock_data.index[0].strftime("%Y-%m-%d"), | |
| "end": stock_data.index[-1].strftime("%Y-%m-%d") | |
| } | |
| } | |
| except Exception as e: | |
| logger.error(f"Error getting evaluation data: {str(e)}") | |
| return {"error": str(e)} | |