File size: 10,197 Bytes
ce4bc73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
"""
Yahoo Finance data fetcher using yfinance.

This module provides a YFinanceDataFetcher class that mirrors the functionality
of the DataFetcher class in src/v2/data_fetcher.py but uses yfinance as the data source.
"""

import logging
import os

import pandas as pd

import yfinance as yf
from src.stockdata import DataFetcherInterface

logger = logging.getLogger(__name__)


class YFinanceDataFetcher(DataFetcherInterface):
    """Class to fetch stock data from Yahoo Finance API using yfinance"""

    # Default period for beta calculations (3 months provides more current market behavior)
    beta_period = "3m"

    def __init__(self, cache_dir=".cache_yf", cache_ttl=None):
        """
        Initialize the YFinanceDataFetcher.

        Args:
            cache_dir (str): Directory to store cached data
            cache_ttl (int, optional): Cache TTL in seconds. If None, uses config or default.
        """
        self.cache_dir = cache_dir

        # Create cache directory if it doesn't exist
        os.makedirs(cache_dir, exist_ok=True)

        # Get cache TTL from config or use default (1 day)
        if cache_ttl is None:
            try:
                from src.v2.config import config

                self.cache_ttl = config.get("app.cache.ttl", 86400)
            except ImportError:
                self.cache_ttl = 86400
        else:
            self.cache_ttl = cache_ttl

    def fetch_data(self, ticker, period="3m", interval="1d"):
        """
        Fetch stock data for a ticker from Yahoo Finance.

        Args:
            ticker (str): Stock ticker symbol
            period (str): Time period ('1y', '5y', etc.)
            interval (str): Data interval ('1d', '1wk', etc.)

        Returns:
            pandas.DataFrame: DataFrame with stock data
        """
        # Check cache first
        cache_path = self._get_cache_path(ticker, period, interval)

        # Use the centralized cache validation logic
        from src.stockdata import should_use_cache

        should_use, reason = should_use_cache(cache_path, self.cache_ttl)

        if should_use:
            logger.info(f"Loading {ticker} data from cache: {reason}")
            try:
                return pd.read_csv(cache_path, index_col=0, parse_dates=True)
            except Exception as e:
                logger.warning(f"Error reading cache for {ticker}: {e}")
                # Continue to fetch from API
        else:
            logger.info(f"Cache for {ticker} is not valid: {reason}")

        # Fetch from yfinance
        try:
            logger.info(f"Fetching data for {ticker} from Yahoo Finance")
            df = self._fetch_from_yfinance(ticker, period, interval)

            # Save to cache
            df.to_csv(cache_path)

            return df
        except (ValueError, pd.errors.EmptyDataError) as e:
            # These are expected errors that can happen with valid inputs
            # For example, a valid ticker that has no data available
            logger.warning(f"Data fetch error for {ticker}: {e}")

            # Only use expired cache for expected data errors, not for programming errors
            if os.path.exists(cache_path):
                logger.warning(f"Using expired cache for {ticker} as fallback")
                try:
                    return pd.read_csv(cache_path, index_col=0, parse_dates=True)
                except (pd.errors.ParserError, pd.errors.EmptyDataError) as cache_e:
                    logger.error(f"Error reading cache for {ticker}: {cache_e}")
                    # Re-raise the original error since cache fallback failed
                    raise e from cache_e

            # Re-raise the original exception if no cache fallback
            raise
        except (ImportError, NameError, AttributeError, TypeError, SyntaxError) as e:
            # These are programming errors that should never be caught silently
            logger.critical(f"Critical error in data fetcher: {e}", exc_info=True)
            raise
        except Exception as e:
            # For other unexpected errors, log and re-raise
            logger.error(
                f"Unexpected error fetching data for {ticker}: {e}", exc_info=True
            )
            raise

    def fetch_market_data(self, market_index="SPY", period=None, interval="1d"):
        """
        Fetch market index data for beta calculations.

        Args:
            market_index (str): Market index ticker symbol (default: 'SPY' for S&P 500 ETF)
            period (str, optional): Time period ('1y', '5y', etc.). If None, uses the class beta_period.
            interval (str): Data interval ('1d', '1wk', etc.)

        Returns:
            pandas.DataFrame: DataFrame with market index data
        """
        # Use the class beta_period if period is None
        if period is None:
            period = self.beta_period
            logger.info(f"Using default beta period: {period}")

        # Call fetch_data with the market index ticker
        return self.fetch_data(market_index, period, interval)

    def _fetch_from_yfinance(self, ticker, period="1y", interval="1d"):
        """
        Fetch data from Yahoo Finance using yfinance.

        Args:
            ticker (str): Stock ticker symbol
            period (str): Time period ('1y', '5y', etc.)
            interval (str): Data interval ('1d', '1wk', etc.)

        Returns:
            pandas.DataFrame: DataFrame with stock data
        """
        # Map period to yfinance format if needed
        # yfinance already accepts '1y', '5y', etc.
        yf_period = self._map_period_to_yfinance(period)

        # Fetch data
        try:
            ticker_obj = yf.Ticker(ticker)
            df = ticker_obj.history(period=yf_period, interval=interval)

            if df.empty:
                raise ValueError(f"No historical data found for {ticker}")

            # Rename columns to match expected format
            # yfinance returns columns with capitalized names already, but let's ensure consistency
            column_mapping = {
                "Open": "Open",
                "High": "High",
                "Low": "Low",
                "Close": "Close",
                "Volume": "Volume",
                "Dividends": "Dividends",
                "Stock Splits": "Stock Splits",
            }

            # Only rename columns that exist
            rename_cols = {k: v for k, v in column_mapping.items() if k in df.columns}
            df = df.rename(columns=rename_cols)

            # Ensure index is named 'date'
            df.index.name = "date"

            # Convert timezone-aware timestamps to naive timestamps
            # This is important for compatibility with the current implementation
            if df.index.tzinfo is not None:
                df.index = df.index.tz_localize(None)

            return df

        except Exception as e:
            # Map yfinance-specific errors to consistent error messages
            if "No data found" in str(e):
                raise ValueError(f"No historical data found for {ticker}") from e
            elif "Invalid ticker" in str(e):
                raise ValueError(f"Invalid ticker: {ticker}") from e
            else:
                # Re-raise with more context
                raise ValueError(f"Error fetching data for {ticker}: {e}") from e

    def _map_period_to_yfinance(self, period):
        """
        Map period string to yfinance format.

        Args:
            period (str): Period string ('1y', '5y', etc.)

        Returns:
            str: Period string in yfinance format
        """
        # yfinance accepts these period formats:
        # 1d, 5d, 1mo, 3mo, 6mo, 1y, 2y, 5y, 10y, ytd, max

        # Initialize result with default value
        result = "1y"  # Default value

        # Check if period is already in yfinance format
        valid_periods = [
            "1d",
            "5d",
            "1mo",
            "3mo",
            "6mo",
            "1y",
            "2y",
            "5y",
            "10y",
            "ytd",
            "max",
        ]
        if period in valid_periods:
            result = period
        elif period.endswith("y"):
            try:
                years = int(period[:-1])
                if years == 1:
                    result = "1y"
                elif years == 2:
                    result = "2y"
                elif years <= 5:
                    result = "5y"
                else:
                    result = "10y"
            except ValueError:
                # Keep default value
                logger.warning(f"Invalid year format: {period}, defaulting to '1y'")
        elif period.endswith("m"):
            try:
                months = int(period[:-1])
                if months <= 1:
                    result = "1mo"
                elif months <= 3:
                    result = "3mo"
                elif months <= 6:
                    result = "6mo"
                else:
                    result = "1y"
            except ValueError:
                # Keep default value
                logger.warning(f"Invalid month format: {period}, defaulting to '1y'")
        elif period.endswith("d"):
            try:
                days = int(period[:-1])
                if days <= 1:
                    result = "1d"
                elif days <= 5:
                    result = "5d"
                else:
                    result = "1mo"
            except ValueError:
                # Keep default value
                logger.warning(f"Invalid day format: {period}, defaulting to '1y'")
        else:
            # Default to 1y if period format is not recognized
            logger.warning(f"Unrecognized period format: {period}, defaulting to '1y'")

        return result

    def _get_cache_path(self, ticker, period, interval):
        """
        Get the path to the cache file for a ticker.

        Args:
            ticker (str): Stock ticker symbol
            period (str): Time period
            interval (str): Data interval

        Returns:
            str: Path to cache file
        """
        return os.path.join(self.cache_dir, f"{ticker}_{period}_{interval}.csv")