Spaces:
Sleeping
Sleeping
| """Yahoo Finance data provider implementation.""" | |
| import time | |
| from datetime import datetime | |
| from typing import Any, Dict, List | |
| import pandas as pd | |
| import yfinance as yf | |
| from utils.errors import ( | |
| NoDataReturnedError, | |
| TickerNotFoundError, | |
| wrap_provider_error, | |
| ) | |
| from utils.retry import exponential_backoff | |
| from .base import DataProvider, ProviderException | |
| class YahooFinanceProvider(DataProvider): | |
| """Yahoo Finance data provider using yfinance library.""" | |
| # Yahoo Finance interval mapping | |
| INTERVAL_MAPPING = { | |
| "1m": "1m", | |
| "5m": "5m", | |
| "15m": "15m", | |
| "30m": "30m", | |
| "1h": "1h", | |
| "4h": "4h", | |
| "1d": "1d", | |
| "1w": "1wk", # Yahoo Finance uses "1wk" for weekly data | |
| "1mo": "1mo", # Monthly data | |
| "3mo": "1wk", # 3-month view: use weekly data (quarterly data insufficient: only 33 bars available) | |
| "1y": "1wk", # 1-year view: use weekly data for better performance | |
| "5y": "1mo", # 5-year view: use monthly data for better performance | |
| } | |
| def fetch_ohlc( | |
| self, ticker: str, timeframe: str, start_date: str, end_date: str | |
| ) -> pd.DataFrame: | |
| """Fetch OHLC data from Yahoo Finance with exponential backoff retry logic.""" | |
| try: | |
| interval = self.INTERVAL_MAPPING.get(timeframe, timeframe) | |
| df = yf.download( | |
| tickers=ticker, | |
| start=start_date, | |
| end=end_date, | |
| interval=interval, | |
| progress=False, | |
| auto_adjust=True, | |
| ) | |
| # Check if download returned empty DataFrame | |
| if df.empty: | |
| raise NoDataReturnedError(ticker, timeframe, "Yahoo Finance") | |
| # Handle MultiIndex columns if present (happens even with single tickers) | |
| if isinstance(df.columns, pd.MultiIndex): | |
| try: | |
| # Try to extract using the ticker name at level 1 | |
| df = df.xs(ticker, axis=1, level=1) | |
| except KeyError: | |
| # If ticker name doesn't match exactly, just drop the second level | |
| # This can happen with special characters in ticker names | |
| df = df.droplevel(1, axis=1) | |
| # Check if extraction resulted in empty DataFrame | |
| if df.empty: | |
| raise NoDataReturnedError(ticker, timeframe, "Yahoo Finance") | |
| # Reset index to make timestamp a column (before lowercasing) | |
| df = df.reset_index() | |
| # Normalize column names to lowercase | |
| df.columns = df.columns.str.lower() | |
| # Rename date/datetime column to timestamp | |
| if "date" in df.columns: | |
| df = df.rename(columns={"date": "timestamp"}) | |
| elif "datetime" in df.columns: | |
| df = df.rename(columns={"datetime": "timestamp"}) | |
| # Validate OHLC data | |
| df = self._validate_ohlc(df) | |
| return df | |
| except (NoDataReturnedError, TickerNotFoundError): | |
| # Don't wrap these errors - they're already user-friendly | |
| raise | |
| except ProviderException: | |
| # Re-raise provider exceptions as-is | |
| raise | |
| except Exception as e: | |
| # Wrap all other exceptions | |
| raise wrap_provider_error("Yahoo Finance", ticker, "fetch_ohlc", e) | |
| def fetch_fundamentals(self, ticker: str) -> Dict[str, Any]: | |
| """ | |
| Fetch fundamental data from Yahoo Finance including financial statements. | |
| Returns a FundamentalMetrics object with balance sheet, income statement, | |
| and cash flow statement data. | |
| """ | |
| try: | |
| stock = yf.Ticker(ticker) | |
| info = stock.info | |
| # Import models for financial statements | |
| from config.models import ( | |
| BalanceSheet, | |
| CashFlowMetrics, | |
| CashFlowStatement, | |
| FinancialStatements, | |
| FundamentalMetrics, | |
| GrowthMetrics, | |
| IncomeStatement, | |
| LeverageMetrics, | |
| ProfitabilityMetrics, | |
| ValuationMetrics, | |
| ) | |
| # Extract latest financial statement data | |
| # yfinance provides quarterly and annual data - use latest quarterly | |
| try: | |
| # Balance sheet data (quarterly) | |
| balance_sheet_df = stock.quarterly_balance_sheet | |
| if not balance_sheet_df.empty: | |
| latest_bs = balance_sheet_df.iloc[:, 0] # Latest quarter | |
| balance_sheet = BalanceSheet( | |
| total_assets=float(latest_bs.get("Total Assets", 0)) / 1_000_000 | |
| if pd.notna(latest_bs.get("Total Assets")) | |
| else None, | |
| total_liabilities=float( | |
| latest_bs.get("Total Liabilities Net Minority Interest", 0) | |
| ) | |
| / 1_000_000 | |
| if pd.notna( | |
| latest_bs.get("Total Liabilities Net Minority Interest") | |
| ) | |
| else None, | |
| total_equity=float( | |
| latest_bs.get("Total Equity Gross Minority Interest", 0) | |
| ) | |
| / 1_000_000 | |
| if pd.notna( | |
| latest_bs.get("Total Equity Gross Minority Interest") | |
| ) | |
| else None, | |
| working_capital=float(latest_bs.get("Working Capital", 0)) | |
| / 1_000_000 | |
| if pd.notna(latest_bs.get("Working Capital")) | |
| else None, | |
| cash_and_equivalents=float( | |
| latest_bs.get("Cash And Cash Equivalents", 0) | |
| ) | |
| / 1_000_000 | |
| if pd.notna(latest_bs.get("Cash And Cash Equivalents")) | |
| else None, | |
| retained_earnings=float(latest_bs.get("Retained Earnings", 0)) | |
| / 1_000_000 | |
| if pd.notna(latest_bs.get("Retained Earnings")) | |
| else None, | |
| ) | |
| else: | |
| balance_sheet = BalanceSheet() | |
| except Exception: | |
| balance_sheet = BalanceSheet() | |
| try: | |
| # Income statement data (quarterly) | |
| income_stmt_df = stock.quarterly_income_stmt | |
| if not income_stmt_df.empty: | |
| latest_inc = income_stmt_df.iloc[:, 0] # Latest quarter | |
| income_statement = IncomeStatement( | |
| total_revenue=float(latest_inc.get("Total Revenue", 0)) | |
| / 1_000_000 | |
| if pd.notna(latest_inc.get("Total Revenue")) | |
| else None, | |
| gross_profit=float(latest_inc.get("Gross Profit", 0)) | |
| / 1_000_000 | |
| if pd.notna(latest_inc.get("Gross Profit")) | |
| else None, | |
| net_income=float(latest_inc.get("Net Income", 0)) / 1_000_000 | |
| if pd.notna(latest_inc.get("Net Income")) | |
| else None, | |
| ebitda=float(latest_inc.get("EBITDA", 0)) / 1_000_000 | |
| if pd.notna(latest_inc.get("EBITDA")) | |
| else None, | |
| basic_eps=float(latest_inc.get("Basic EPS", 0)) | |
| if pd.notna(latest_inc.get("Basic EPS")) | |
| else None, | |
| ) | |
| else: | |
| income_statement = IncomeStatement() | |
| except Exception: | |
| income_statement = IncomeStatement() | |
| try: | |
| # Cash flow statement data (quarterly) | |
| cashflow_df = stock.quarterly_cashflow | |
| if not cashflow_df.empty: | |
| latest_cf = cashflow_df.iloc[:, 0] # Latest quarter | |
| cash_flow_statement = CashFlowStatement( | |
| operating_cash_flow=float( | |
| latest_cf.get("Operating Cash Flow", 0) | |
| ) | |
| / 1_000_000 | |
| if pd.notna(latest_cf.get("Operating Cash Flow")) | |
| else None, | |
| free_cash_flow=float(latest_cf.get("Free Cash Flow", 0)) | |
| / 1_000_000 | |
| if pd.notna(latest_cf.get("Free Cash Flow")) | |
| else None, | |
| capex=float(latest_cf.get("Capital Expenditure", 0)) / 1_000_000 | |
| if pd.notna(latest_cf.get("Capital Expenditure")) | |
| else None, | |
| ) | |
| else: | |
| cash_flow_statement = CashFlowStatement() | |
| except Exception: | |
| cash_flow_statement = CashFlowStatement() | |
| # Create complete financial statements object | |
| financial_statements = FinancialStatements( | |
| balance_sheet=balance_sheet, | |
| income_statement=income_statement, | |
| cash_flow_statement=cash_flow_statement, | |
| ) | |
| # Create FundamentalMetrics object | |
| fundamental_metrics = FundamentalMetrics( | |
| ticker=ticker, | |
| as_of_date=datetime.now().isoformat(), | |
| valuation=ValuationMetrics( | |
| pe_ratio=info.get("trailingPE"), | |
| pb_ratio=info.get("priceToBook"), | |
| ps_ratio=info.get("priceToSalesTrailing12Months"), | |
| market_cap=info.get("marketCap") / 1_000_000 | |
| if info.get("marketCap") | |
| else None, | |
| ), | |
| profitability=ProfitabilityMetrics( | |
| gross_margin=info.get("grossMargins"), | |
| operating_margin=info.get("operatingMargins"), | |
| net_margin=info.get("profitMargins"), | |
| roe=info.get("returnOnEquity"), | |
| ), | |
| growth=GrowthMetrics( | |
| revenue_growth_yoy=info.get("revenueGrowth"), | |
| earnings_growth_yoy=info.get("earningsGrowth"), | |
| ), | |
| cash_flow=CashFlowMetrics( | |
| free_cash_flow=cash_flow_statement.free_cash_flow, | |
| operating_cash_flow=cash_flow_statement.operating_cash_flow, | |
| capex=cash_flow_statement.capex, | |
| ), | |
| leverage=LeverageMetrics( | |
| debt_to_equity=info.get("debtToEquity"), | |
| total_debt=balance_sheet.total_liabilities, | |
| total_equity=balance_sheet.total_equity, | |
| ), | |
| financial_statements=financial_statements, | |
| data_sources={"provider": "yahoo_finance"}, | |
| ) | |
| return fundamental_metrics | |
| except Exception as e: | |
| raise wrap_provider_error("Yahoo Finance", ticker, "fetch_fundamentals", e) | |
| def fetch_news(self, ticker: str, limit: int = 10) -> List[Dict[str, Any]]: | |
| """Fetch news from Yahoo Finance.""" | |
| try: | |
| stock = yf.Ticker(ticker) | |
| raw_news = stock.news | |
| # Handle case where news might be None or not a list | |
| if not raw_news: | |
| return [] | |
| # Ensure raw_news is a list | |
| if not isinstance(raw_news, list): | |
| logger.warning( | |
| f"Unexpected news data type for {ticker}: {type(raw_news)}" | |
| ) | |
| return [] | |
| # Limit to requested number of articles | |
| news = raw_news[:limit] | |
| formatted_news = [] | |
| for article in news: | |
| # Skip if article is not a dictionary | |
| if not isinstance(article, dict): | |
| logger.warning( | |
| f"Skipping non-dict article for {ticker}: {type(article)}" | |
| ) | |
| continue | |
| formatted_news.append( | |
| { | |
| "title": article.get("title", ""), | |
| "source": article.get("publisher", "Yahoo Finance"), | |
| "url": article.get("link", ""), | |
| "published_at": datetime.fromtimestamp( | |
| article.get("providerPublishTime", 0) | |
| ).isoformat(), | |
| "summary": article.get( | |
| "title", "" | |
| ), # Yahoo doesn't provide full summary | |
| } | |
| ) | |
| return formatted_news | |
| except Exception as e: | |
| raise wrap_provider_error("Yahoo Finance", ticker, "fetch_news", e) | |
| def is_available(self) -> bool: | |
| """Check if Yahoo Finance is reachable.""" | |
| try: | |
| # Try to fetch a known ticker | |
| test_df = yf.download("AAPL", period="1d", progress=False) | |
| return not test_df.empty | |
| except: | |
| return False | |