File size: 10,197 Bytes
ce4bc73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 |
"""
Yahoo Finance data fetcher using yfinance.
This module provides a YFinanceDataFetcher class that mirrors the functionality
of the DataFetcher class in src/v2/data_fetcher.py but uses yfinance as the data source.
"""
import logging
import os
import pandas as pd
import yfinance as yf
from src.stockdata import DataFetcherInterface
logger = logging.getLogger(__name__)
class YFinanceDataFetcher(DataFetcherInterface):
"""Class to fetch stock data from Yahoo Finance API using yfinance"""
# Default period for beta calculations (3 months provides more current market behavior)
beta_period = "3m"
def __init__(self, cache_dir=".cache_yf", cache_ttl=None):
"""
Initialize the YFinanceDataFetcher.
Args:
cache_dir (str): Directory to store cached data
cache_ttl (int, optional): Cache TTL in seconds. If None, uses config or default.
"""
self.cache_dir = cache_dir
# Create cache directory if it doesn't exist
os.makedirs(cache_dir, exist_ok=True)
# Get cache TTL from config or use default (1 day)
if cache_ttl is None:
try:
from src.v2.config import config
self.cache_ttl = config.get("app.cache.ttl", 86400)
except ImportError:
self.cache_ttl = 86400
else:
self.cache_ttl = cache_ttl
def fetch_data(self, ticker, period="3m", interval="1d"):
"""
Fetch stock data for a ticker from Yahoo Finance.
Args:
ticker (str): Stock ticker symbol
period (str): Time period ('1y', '5y', etc.)
interval (str): Data interval ('1d', '1wk', etc.)
Returns:
pandas.DataFrame: DataFrame with stock data
"""
# Check cache first
cache_path = self._get_cache_path(ticker, period, interval)
# Use the centralized cache validation logic
from src.stockdata import should_use_cache
should_use, reason = should_use_cache(cache_path, self.cache_ttl)
if should_use:
logger.info(f"Loading {ticker} data from cache: {reason}")
try:
return pd.read_csv(cache_path, index_col=0, parse_dates=True)
except Exception as e:
logger.warning(f"Error reading cache for {ticker}: {e}")
# Continue to fetch from API
else:
logger.info(f"Cache for {ticker} is not valid: {reason}")
# Fetch from yfinance
try:
logger.info(f"Fetching data for {ticker} from Yahoo Finance")
df = self._fetch_from_yfinance(ticker, period, interval)
# Save to cache
df.to_csv(cache_path)
return df
except (ValueError, pd.errors.EmptyDataError) as e:
# These are expected errors that can happen with valid inputs
# For example, a valid ticker that has no data available
logger.warning(f"Data fetch error for {ticker}: {e}")
# Only use expired cache for expected data errors, not for programming errors
if os.path.exists(cache_path):
logger.warning(f"Using expired cache for {ticker} as fallback")
try:
return pd.read_csv(cache_path, index_col=0, parse_dates=True)
except (pd.errors.ParserError, pd.errors.EmptyDataError) as cache_e:
logger.error(f"Error reading cache for {ticker}: {cache_e}")
# Re-raise the original error since cache fallback failed
raise e from cache_e
# Re-raise the original exception if no cache fallback
raise
except (ImportError, NameError, AttributeError, TypeError, SyntaxError) as e:
# These are programming errors that should never be caught silently
logger.critical(f"Critical error in data fetcher: {e}", exc_info=True)
raise
except Exception as e:
# For other unexpected errors, log and re-raise
logger.error(
f"Unexpected error fetching data for {ticker}: {e}", exc_info=True
)
raise
def fetch_market_data(self, market_index="SPY", period=None, interval="1d"):
"""
Fetch market index data for beta calculations.
Args:
market_index (str): Market index ticker symbol (default: 'SPY' for S&P 500 ETF)
period (str, optional): Time period ('1y', '5y', etc.). If None, uses the class beta_period.
interval (str): Data interval ('1d', '1wk', etc.)
Returns:
pandas.DataFrame: DataFrame with market index data
"""
# Use the class beta_period if period is None
if period is None:
period = self.beta_period
logger.info(f"Using default beta period: {period}")
# Call fetch_data with the market index ticker
return self.fetch_data(market_index, period, interval)
def _fetch_from_yfinance(self, ticker, period="1y", interval="1d"):
"""
Fetch data from Yahoo Finance using yfinance.
Args:
ticker (str): Stock ticker symbol
period (str): Time period ('1y', '5y', etc.)
interval (str): Data interval ('1d', '1wk', etc.)
Returns:
pandas.DataFrame: DataFrame with stock data
"""
# Map period to yfinance format if needed
# yfinance already accepts '1y', '5y', etc.
yf_period = self._map_period_to_yfinance(period)
# Fetch data
try:
ticker_obj = yf.Ticker(ticker)
df = ticker_obj.history(period=yf_period, interval=interval)
if df.empty:
raise ValueError(f"No historical data found for {ticker}")
# Rename columns to match expected format
# yfinance returns columns with capitalized names already, but let's ensure consistency
column_mapping = {
"Open": "Open",
"High": "High",
"Low": "Low",
"Close": "Close",
"Volume": "Volume",
"Dividends": "Dividends",
"Stock Splits": "Stock Splits",
}
# Only rename columns that exist
rename_cols = {k: v for k, v in column_mapping.items() if k in df.columns}
df = df.rename(columns=rename_cols)
# Ensure index is named 'date'
df.index.name = "date"
# Convert timezone-aware timestamps to naive timestamps
# This is important for compatibility with the current implementation
if df.index.tzinfo is not None:
df.index = df.index.tz_localize(None)
return df
except Exception as e:
# Map yfinance-specific errors to consistent error messages
if "No data found" in str(e):
raise ValueError(f"No historical data found for {ticker}") from e
elif "Invalid ticker" in str(e):
raise ValueError(f"Invalid ticker: {ticker}") from e
else:
# Re-raise with more context
raise ValueError(f"Error fetching data for {ticker}: {e}") from e
def _map_period_to_yfinance(self, period):
"""
Map period string to yfinance format.
Args:
period (str): Period string ('1y', '5y', etc.)
Returns:
str: Period string in yfinance format
"""
# yfinance accepts these period formats:
# 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max
# Initialize result with default value
result = "1y" # Default value
# Check if period is already in yfinance format
valid_periods = [
"1d",
"5d",
"1mo",
"3mo",
"6mo",
"1y",
"2y",
"5y",
"10y",
"ytd",
"max",
]
if period in valid_periods:
result = period
elif period.endswith("y"):
try:
years = int(period[:-1])
if years == 1:
result = "1y"
elif years == 2:
result = "2y"
elif years <= 5:
result = "5y"
else:
result = "10y"
except ValueError:
# Keep default value
logger.warning(f"Invalid year format: {period}, defaulting to '1y'")
elif period.endswith("m"):
try:
months = int(period[:-1])
if months <= 1:
result = "1mo"
elif months <= 3:
result = "3mo"
elif months <= 6:
result = "6mo"
else:
result = "1y"
except ValueError:
# Keep default value
logger.warning(f"Invalid month format: {period}, defaulting to '1y'")
elif period.endswith("d"):
try:
days = int(period[:-1])
if days <= 1:
result = "1d"
elif days <= 5:
result = "5d"
else:
result = "1mo"
except ValueError:
# Keep default value
logger.warning(f"Invalid day format: {period}, defaulting to '1y'")
else:
# Default to 1y if period format is not recognized
logger.warning(f"Unrecognized period format: {period}, defaulting to '1y'")
return result
def _get_cache_path(self, ticker, period, interval):
"""
Get the path to the cache file for a ticker.
Args:
ticker (str): Stock ticker symbol
period (str): Time period
interval (str): Data interval
Returns:
str: Path to cache file
"""
return os.path.join(self.cache_dir, f"{ticker}_{period}_{interval}.csv")
|