Spaces:

mingdom
/

folio

Sleeping

File size: 10,197 Bytes

ce4bc73

"""
Yahoo Finance data fetcher using yfinance.

This module provides a YFinanceDataFetcher class that mirrors the functionality
of the DataFetcher class in src/v2/data_fetcher.py but uses yfinance as the data source.
"""

import logging
import os

import pandas as pd

import yfinance as yf
from src.stockdata import DataFetcherInterface

logger = logging.getLogger(__name__)


class YFinanceDataFetcher(DataFetcherInterface):
    """Class to fetch stock data from Yahoo Finance API using yfinance"""

    # Default period for beta calculations (3 months provides more current market behavior)
    beta_period = "3m"

    def __init__(self, cache_dir=".cache_yf", cache_ttl=None):
        """
        Initialize the YFinanceDataFetcher.

        Args:
            cache_dir (str): Directory to store cached data
            cache_ttl (int, optional): Cache TTL in seconds. If None, uses config or default.
        """
        self.cache_dir = cache_dir

        # Create cache directory if it doesn't exist
        os.makedirs(cache_dir, exist_ok=True)

        # Get cache TTL from config or use default (1 day)
        if cache_ttl is None:
            try:
                from src.v2.config import config

                self.cache_ttl = config.get("app.cache.ttl", 86400)
            except ImportError:
                self.cache_ttl = 86400
        else:
            self.cache_ttl = cache_ttl

    def fetch_data(self, ticker, period="3m", interval="1d"):
        """
        Fetch stock data for a ticker from Yahoo Finance.

        Args:
            ticker (str): Stock ticker symbol
            period (str): Time period ('1y', '5y', etc.)
            interval (str): Data interval ('1d', '1wk', etc.)

        Returns:
            pandas.DataFrame: DataFrame with stock data
        """
        # Check cache first
        cache_path = self._get_cache_path(ticker, period, interval)

        # Use the centralized cache validation logic
        from src.stockdata import should_use_cache

        should_use, reason = should_use_cache(cache_path, self.cache_ttl)

        if should_use:
            logger.info(f"Loading {ticker} data from cache: {reason}")
            try:
                return pd.read_csv(cache_path, index_col=0, parse_dates=True)
            except Exception as e:
                logger.warning(f"Error reading cache for {ticker}: {e}")
                # Continue to fetch from API
        else:
            logger.info(f"Cache for {ticker} is not valid: {reason}")

        # Fetch from yfinance
        try:
            logger.info(f"Fetching data for {ticker} from Yahoo Finance")
            df = self._fetch_from_yfinance(ticker, period, interval)

            # Save to cache
            df.to_csv(cache_path)

            return df
        except (ValueError, pd.errors.EmptyDataError) as e:
            # These are expected errors that can happen with valid inputs
            # For example, a valid ticker that has no data available
            logger.warning(f"Data fetch error for {ticker}: {e}")

            # Only use expired cache for expected data errors, not for programming errors
            if os.path.exists(cache_path):
                logger.warning(f"Using expired cache for {ticker} as fallback")
                try:
                    return pd.read_csv(cache_path, index_col=0, parse_dates=True)
                except (pd.errors.ParserError, pd.errors.EmptyDataError) as cache_e:
                    logger.error(f"Error reading cache for {ticker}: {cache_e}")
                    # Re-raise the original error since cache fallback failed
                    raise e from cache_e

            # Re-raise the original exception if no cache fallback
            raise
        except (ImportError, NameError, AttributeError, TypeError, SyntaxError) as e:
            # These are programming errors that should never be caught silently
            logger.critical(f"Critical error in data fetcher: {e}", exc_info=True)
            raise
        except Exception as e:
            # For other unexpected errors, log and re-raise
            logger.error(
                f"Unexpected error fetching data for {ticker}: {e}", exc_info=True
            )
            raise

    def fetch_market_data(self, market_index="SPY", period=None, interval="1d"):
        """
        Fetch market index data for beta calculations.

        Args:
            market_index (str): Market index ticker symbol (default: 'SPY' for S&P 500 ETF)
            period (str, optional): Time period ('1y', '5y', etc.). If None, uses the class beta_period.
            interval (str): Data interval ('1d', '1wk', etc.)

        Returns:
            pandas.DataFrame: DataFrame with market index data
        """
        # Use the class beta_period if period is None
        if period is None:
            period = self.beta_period
            logger.info(f"Using default beta period: {period}")

        # Call fetch_data with the market index ticker
        return self.fetch_data(market_index, period, interval)

    def _fetch_from_yfinance(self, ticker, period="1y", interval="1d"):
        """
        Fetch data from Yahoo Finance using yfinance.

        Args:
            ticker (str): Stock ticker symbol
            period (str): Time period ('1y', '5y', etc.)
            interval (str): Data interval ('1d', '1wk', etc.)

        Returns:
            pandas.DataFrame: DataFrame with stock data
        """
        # Map period to yfinance format if needed
        # yfinance already accepts '1y', '5y', etc.
        yf_period = self._map_period_to_yfinance(period)

        # Fetch data
        try:
            ticker_obj = yf.Ticker(ticker)
            df = ticker_obj.history(period=yf_period, interval=interval)

            if df.empty:
                raise ValueError(f"No historical data found for {ticker}")

            # Rename columns to match expected format
            # yfinance returns columns with capitalized names already, but let's ensure consistency
            column_mapping = {
                "Open": "Open",
                "High": "High",
                "Low": "Low",
                "Close": "Close",
                "Volume": "Volume",
                "Dividends": "Dividends",
                "Stock Splits": "Stock Splits",
            }

            # Only rename columns that exist
            rename_cols = {k: v for k, v in column_mapping.items() if k in df.columns}
            df = df.rename(columns=rename_cols)

            # Ensure index is named 'date'
            df.index.name = "date"

            # Convert timezone-aware timestamps to naive timestamps
            # This is important for compatibility with the current implementation
            if df.index.tzinfo is not None:
                df.index = df.index.tz_localize(None)

            return df

        except Exception as e:
            # Map yfinance-specific errors to consistent error messages
            if "No data found" in str(e):
                raise ValueError(f"No historical data found for {ticker}") from e
            elif "Invalid ticker" in str(e):
                raise ValueError(f"Invalid ticker: {ticker}") from e
            else:
                # Re-raise with more context
                raise ValueError(f"Error fetching data for {ticker}: {e}") from e

    def _map_period_to_yfinance(self, period):
        """
        Map period string to yfinance format.

        Args:
            period (str): Period string ('1y', '5y', etc.)

        Returns:
            str: Period string in yfinance format
        """
        # yfinance accepts these period formats:
        # 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max

        # Initialize result with default value
        result = "1y"  # Default value

        # Check if period is already in yfinance format
        valid_periods = [
            "1d",
            "5d",
            "1mo",
            "3mo",
            "6mo",
            "1y",
            "2y",
            "5y",
            "10y",
            "ytd",
            "max",
        ]
        if period in valid_periods:
            result = period
        elif period.endswith("y"):
            try:
                years = int(period[:-1])
                if years == 1:
                    result = "1y"
                elif years == 2:
                    result = "2y"
                elif years <= 5:
                    result = "5y"
                else:
                    result = "10y"
            except ValueError:
                # Keep default value
                logger.warning(f"Invalid year format: {period}, defaulting to '1y'")
        elif period.endswith("m"):
            try:
                months = int(period[:-1])
                if months <= 1:
                    result = "1mo"
                elif months <= 3:
                    result = "3mo"
                elif months <= 6:
                    result = "6mo"
                else:
                    result = "1y"
            except ValueError:
                # Keep default value
                logger.warning(f"Invalid month format: {period}, defaulting to '1y'")
        elif period.endswith("d"):
            try:
                days = int(period[:-1])
                if days <= 1:
                    result = "1d"
                elif days <= 5:
                    result = "5d"
                else:
                    result = "1mo"
            except ValueError:
                # Keep default value
                logger.warning(f"Invalid day format: {period}, defaulting to '1y'")
        else:
            # Default to 1y if period format is not recognized
            logger.warning(f"Unrecognized period format: {period}, defaulting to '1y'")

        return result

    def _get_cache_path(self, ticker, period, interval):
        """
        Get the path to the cache file for a ticker.

        Args:
            ticker (str): Stock ticker symbol
            period (str): Time period
            interval (str): Data interval

        Returns:
            str: Path to cache file
        """
        return os.path.join(self.cache_dir, f"{ticker}_{period}_{interval}.csv")