trading-tools / data /providers /yahoo_finance.py
Deploy Bot
Deploy Trading Analysis Platform to HuggingFace Spaces
a1bf219
"""Yahoo Finance data provider implementation."""
import time
from datetime import datetime
from typing import Any, Dict, List
import pandas as pd
import yfinance as yf
from utils.errors import (
NoDataReturnedError,
TickerNotFoundError,
wrap_provider_error,
)
from utils.retry import exponential_backoff
from .base import DataProvider, ProviderException
class YahooFinanceProvider(DataProvider):
"""Yahoo Finance data provider using yfinance library."""
# Yahoo Finance interval mapping
INTERVAL_MAPPING = {
"1m": "1m",
"5m": "5m",
"15m": "15m",
"30m": "30m",
"1h": "1h",
"4h": "4h",
"1d": "1d",
"1w": "1wk", # Yahoo Finance uses "1wk" for weekly data
"1mo": "1mo", # Monthly data
"3mo": "1wk", # 3-month view: use weekly data (quarterly data insufficient: only 33 bars available)
"1y": "1wk", # 1-year view: use weekly data for better performance
"5y": "1mo", # 5-year view: use monthly data for better performance
}
@exponential_backoff(
max_retries=3,
base_delay=1.0,
max_delay=10.0,
retry_on=(Exception,),
skip_on=(NoDataReturnedError, TickerNotFoundError),
)
def fetch_ohlc(
self, ticker: str, timeframe: str, start_date: str, end_date: str
) -> pd.DataFrame:
"""Fetch OHLC data from Yahoo Finance with exponential backoff retry logic."""
try:
interval = self.INTERVAL_MAPPING.get(timeframe, timeframe)
df = yf.download(
tickers=ticker,
start=start_date,
end=end_date,
interval=interval,
progress=False,
auto_adjust=True,
)
# Check if download returned empty DataFrame
if df.empty:
raise NoDataReturnedError(ticker, timeframe, "Yahoo Finance")
# Handle MultiIndex columns if present (happens even with single tickers)
if isinstance(df.columns, pd.MultiIndex):
try:
# Try to extract using the ticker name at level 1
df = df.xs(ticker, axis=1, level=1)
except KeyError:
# If ticker name doesn't match exactly, just drop the second level
# This can happen with special characters in ticker names
df = df.droplevel(1, axis=1)
# Check if extraction resulted in empty DataFrame
if df.empty:
raise NoDataReturnedError(ticker, timeframe, "Yahoo Finance")
# Reset index to make timestamp a column (before lowercasing)
df = df.reset_index()
# Normalize column names to lowercase
df.columns = df.columns.str.lower()
# Rename date/datetime column to timestamp
if "date" in df.columns:
df = df.rename(columns={"date": "timestamp"})
elif "datetime" in df.columns:
df = df.rename(columns={"datetime": "timestamp"})
# Validate OHLC data
df = self._validate_ohlc(df)
return df
except (NoDataReturnedError, TickerNotFoundError):
# Don't wrap these errors - they're already user-friendly
raise
except ProviderException:
# Re-raise provider exceptions as-is
raise
except Exception as e:
# Wrap all other exceptions
raise wrap_provider_error("Yahoo Finance", ticker, "fetch_ohlc", e)
def fetch_fundamentals(self, ticker: str) -> Dict[str, Any]:
"""
Fetch fundamental data from Yahoo Finance including financial statements.
Returns a FundamentalMetrics object with balance sheet, income statement,
and cash flow statement data.
"""
try:
stock = yf.Ticker(ticker)
info = stock.info
# Import models for financial statements
from config.models import (
BalanceSheet,
CashFlowMetrics,
CashFlowStatement,
FinancialStatements,
FundamentalMetrics,
GrowthMetrics,
IncomeStatement,
LeverageMetrics,
ProfitabilityMetrics,
ValuationMetrics,
)
# Extract latest financial statement data
# yfinance provides quarterly and annual data - use latest quarterly
try:
# Balance sheet data (quarterly)
balance_sheet_df = stock.quarterly_balance_sheet
if not balance_sheet_df.empty:
latest_bs = balance_sheet_df.iloc[:, 0] # Latest quarter
balance_sheet = BalanceSheet(
total_assets=float(latest_bs.get("Total Assets", 0)) / 1_000_000
if pd.notna(latest_bs.get("Total Assets"))
else None,
total_liabilities=float(
latest_bs.get("Total Liabilities Net Minority Interest", 0)
)
/ 1_000_000
if pd.notna(
latest_bs.get("Total Liabilities Net Minority Interest")
)
else None,
total_equity=float(
latest_bs.get("Total Equity Gross Minority Interest", 0)
)
/ 1_000_000
if pd.notna(
latest_bs.get("Total Equity Gross Minority Interest")
)
else None,
working_capital=float(latest_bs.get("Working Capital", 0))
/ 1_000_000
if pd.notna(latest_bs.get("Working Capital"))
else None,
cash_and_equivalents=float(
latest_bs.get("Cash And Cash Equivalents", 0)
)
/ 1_000_000
if pd.notna(latest_bs.get("Cash And Cash Equivalents"))
else None,
retained_earnings=float(latest_bs.get("Retained Earnings", 0))
/ 1_000_000
if pd.notna(latest_bs.get("Retained Earnings"))
else None,
)
else:
balance_sheet = BalanceSheet()
except Exception:
balance_sheet = BalanceSheet()
try:
# Income statement data (quarterly)
income_stmt_df = stock.quarterly_income_stmt
if not income_stmt_df.empty:
latest_inc = income_stmt_df.iloc[:, 0] # Latest quarter
income_statement = IncomeStatement(
total_revenue=float(latest_inc.get("Total Revenue", 0))
/ 1_000_000
if pd.notna(latest_inc.get("Total Revenue"))
else None,
gross_profit=float(latest_inc.get("Gross Profit", 0))
/ 1_000_000
if pd.notna(latest_inc.get("Gross Profit"))
else None,
net_income=float(latest_inc.get("Net Income", 0)) / 1_000_000
if pd.notna(latest_inc.get("Net Income"))
else None,
ebitda=float(latest_inc.get("EBITDA", 0)) / 1_000_000
if pd.notna(latest_inc.get("EBITDA"))
else None,
basic_eps=float(latest_inc.get("Basic EPS", 0))
if pd.notna(latest_inc.get("Basic EPS"))
else None,
)
else:
income_statement = IncomeStatement()
except Exception:
income_statement = IncomeStatement()
try:
# Cash flow statement data (quarterly)
cashflow_df = stock.quarterly_cashflow
if not cashflow_df.empty:
latest_cf = cashflow_df.iloc[:, 0] # Latest quarter
cash_flow_statement = CashFlowStatement(
operating_cash_flow=float(
latest_cf.get("Operating Cash Flow", 0)
)
/ 1_000_000
if pd.notna(latest_cf.get("Operating Cash Flow"))
else None,
free_cash_flow=float(latest_cf.get("Free Cash Flow", 0))
/ 1_000_000
if pd.notna(latest_cf.get("Free Cash Flow"))
else None,
capex=float(latest_cf.get("Capital Expenditure", 0)) / 1_000_000
if pd.notna(latest_cf.get("Capital Expenditure"))
else None,
)
else:
cash_flow_statement = CashFlowStatement()
except Exception:
cash_flow_statement = CashFlowStatement()
# Create complete financial statements object
financial_statements = FinancialStatements(
balance_sheet=balance_sheet,
income_statement=income_statement,
cash_flow_statement=cash_flow_statement,
)
# Create FundamentalMetrics object
fundamental_metrics = FundamentalMetrics(
ticker=ticker,
as_of_date=datetime.now().isoformat(),
valuation=ValuationMetrics(
pe_ratio=info.get("trailingPE"),
pb_ratio=info.get("priceToBook"),
ps_ratio=info.get("priceToSalesTrailing12Months"),
market_cap=info.get("marketCap") / 1_000_000
if info.get("marketCap")
else None,
),
profitability=ProfitabilityMetrics(
gross_margin=info.get("grossMargins"),
operating_margin=info.get("operatingMargins"),
net_margin=info.get("profitMargins"),
roe=info.get("returnOnEquity"),
),
growth=GrowthMetrics(
revenue_growth_yoy=info.get("revenueGrowth"),
earnings_growth_yoy=info.get("earningsGrowth"),
),
cash_flow=CashFlowMetrics(
free_cash_flow=cash_flow_statement.free_cash_flow,
operating_cash_flow=cash_flow_statement.operating_cash_flow,
capex=cash_flow_statement.capex,
),
leverage=LeverageMetrics(
debt_to_equity=info.get("debtToEquity"),
total_debt=balance_sheet.total_liabilities,
total_equity=balance_sheet.total_equity,
),
financial_statements=financial_statements,
data_sources={"provider": "yahoo_finance"},
)
return fundamental_metrics
except Exception as e:
raise wrap_provider_error("Yahoo Finance", ticker, "fetch_fundamentals", e)
def fetch_news(self, ticker: str, limit: int = 10) -> List[Dict[str, Any]]:
"""Fetch news from Yahoo Finance."""
try:
stock = yf.Ticker(ticker)
raw_news = stock.news
# Handle case where news might be None or not a list
if not raw_news:
return []
# Ensure raw_news is a list
if not isinstance(raw_news, list):
logger.warning(
f"Unexpected news data type for {ticker}: {type(raw_news)}"
)
return []
# Limit to requested number of articles
news = raw_news[:limit]
formatted_news = []
for article in news:
# Skip if article is not a dictionary
if not isinstance(article, dict):
logger.warning(
f"Skipping non-dict article for {ticker}: {type(article)}"
)
continue
formatted_news.append(
{
"title": article.get("title", ""),
"source": article.get("publisher", "Yahoo Finance"),
"url": article.get("link", ""),
"published_at": datetime.fromtimestamp(
article.get("providerPublishTime", 0)
).isoformat(),
"summary": article.get(
"title", ""
), # Yahoo doesn't provide full summary
}
)
return formatted_news
except Exception as e:
raise wrap_provider_error("Yahoo Finance", ticker, "fetch_news", e)
def is_available(self) -> bool:
"""Check if Yahoo Finance is reachable."""
try:
# Try to fetch a known ticker
test_df = yf.download("AAPL", period="1d", progress=False)
return not test_df.empty
except:
return False