"""Abstract base class for data providers.""" from abc import ABC, abstractmethod from datetime import datetime from typing import Any, Dict, List, Literal, Optional import pandas as pd class ProviderException(Exception): """Custom exception for provider errors.""" pass AssetType = Literal["stock", "crypto", "commodity", "index", "forex", "unknown"] class DataProvider(ABC): """Abstract base class for market data providers.""" @abstractmethod def fetch_ohlc( self, ticker: str, timeframe: str, start_date: str, end_date: str ) -> pd.DataFrame: """ Fetch OHLC price data. Args: ticker: Asset ticker symbol (e.g., "AAPL", "BTC-USD") timeframe: Candlestick timeframe ("1m", "5m", "15m", "30m", "1h", "4h", "1d") start_date: Start date in YYYY-MM-DD format end_date: End date in YYYY-MM-DD format Returns: DataFrame with columns: timestamp, open, high, low, close, volume Raises: ProviderException: If data fetch fails """ pass @abstractmethod def fetch_fundamentals(self, ticker: str) -> Dict[str, Any]: """ Fetch fundamental data (financials, earnings). Args: ticker: Stock ticker symbol Returns: Dictionary with fundamental data (market_cap, pe_ratio, revenue, etc.) Raises: ProviderException: If data fetch fails NotImplementedError: If provider doesn't support fundamentals """ pass @abstractmethod def fetch_news(self, ticker: str, limit: int = 10) -> List[Dict[str, Any]]: """ Fetch recent news articles. Args: ticker: Asset ticker symbol limit: Maximum number of articles to return Returns: List of news articles with title, source, published_at, summary Raises: ProviderException: If data fetch fails """ pass @abstractmethod def is_available(self) -> bool: """ Check if provider is reachable. Returns: True if provider is available, False otherwise """ pass def _validate_ohlc(self, df: pd.DataFrame) -> pd.DataFrame: """ Validate OHLC data integrity. Args: df: DataFrame with OHLC data Returns: Validated DataFrame Raises: ProviderException: If validation fails """ if df.empty: raise ProviderException("No data returned from provider") required_columns = ["open", "high", "low", "close", "volume"] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: raise ProviderException(f"Missing required columns: {missing_columns}") # Drop rows with NaN values in critical columns df = df.dropna(subset=["open", "high", "low", "close"]) if df.empty: raise ProviderException("No valid data after removing NaN values") # Validate OHLC relationships invalid_rows = ( (df["low"] > df["open"]) | (df["low"] > df["close"]) | (df["high"] < df["open"]) | (df["high"] < df["close"]) | (df["high"] < df["low"]) ) if invalid_rows.any(): raise ProviderException( f"Invalid OHLC relationships in {invalid_rows.sum()} rows" ) return df @staticmethod def detect_asset_type(ticker: str) -> AssetType: """ Detect asset type based on ticker format. Args: ticker: Asset ticker symbol Returns: Asset type: "stock", "crypto", "commodity", "index", "forex", or "unknown" Examples: - "AAPL" -> "stock" - "BTC-USD" -> "crypto" - "ETH-USD" -> "crypto" - "GC=F" -> "commodity" (Gold futures) - "CL=F" -> "commodity" (Crude oil futures) - "^GSPC" -> "index" (S&P 500) - "^DJI" -> "index" (Dow Jones) - "EURUSD=X" -> "forex" """ ticker_upper = ticker.upper() # Index detection (starts with ^) if ticker_upper.startswith("^"): return "index" # Commodity futures detection (ends with =F) if ticker_upper.endswith("=F"): return "commodity" # Forex detection (ends with =X or common forex pairs) if ticker_upper.endswith("=X"): return "forex" # Crypto detection (contains -USD, -USDT, -USDC, etc.) crypto_suffixes = ["-USD", "-USDT", "-USDC", "-BUSD", "-EUR", "-GBP"] if any(ticker_upper.endswith(suffix) for suffix in crypto_suffixes): return "crypto" # Common crypto prefixes crypto_prefixes = ["BTC", "ETH", "BNB", "ADA", "SOL", "XRP", "DOGE", "MATIC"] ticker_prefix = ticker_upper.split("-")[0] if ticker_prefix in crypto_prefixes: return "crypto" # Default to stock for standard ticker symbols # Most stocks are 1-5 uppercase letters without special characters if ticker_upper.replace(".", "").isalpha() and len(ticker_upper) <= 5: return "stock" # Unknown for anything else return "unknown" @staticmethod def get_asset_characteristics(asset_type: AssetType) -> Dict[str, Any]: """ Get characteristics and constraints for different asset types. Args: asset_type: Type of asset Returns: Dictionary with asset characteristics including: - market_hours: Trading hours (24/7 vs market hours) - has_fundamentals: Whether fundamental analysis applies - volatility: Typical volatility level - analysis_focus: Primary analysis considerations """ characteristics = { "stock": { "market_hours": "9:30 AM - 4:00 PM ET (Mon-Fri)", "has_fundamentals": True, "volatility": "moderate", "analysis_focus": [ "Company financials", "Earnings reports", "Technical patterns", "Sector trends", ], "fundamental_factors": [ "revenue_growth", "earnings_per_share", "pe_ratio", "debt_to_equity", ], }, "crypto": { "market_hours": "24/7 (365 days)", "has_fundamentals": False, "volatility": "high", "analysis_focus": [ "Market sentiment", "Technical patterns", "Trading volume", "News and social media", ], "fundamental_factors": [ "market_sentiment", "adoption_rate", "regulatory_news", "whale_activity", ], }, "commodity": { "market_hours": "Various (depends on commodity)", "has_fundamentals": False, "volatility": "moderate-high", "analysis_focus": [ "Supply and demand", "Geopolitical events", "Seasonal patterns", "Technical levels", ], "fundamental_factors": [ "supply_demand_balance", "inventory_levels", "geopolitical_risk", "weather_patterns", ], }, "index": { "market_hours": "9:30 AM - 4:00 PM ET (Mon-Fri)", "has_fundamentals": False, "volatility": "moderate", "analysis_focus": [ "Sector rotation", "Macro sentiment", "Technical patterns", "Breadth indicators", ], "fundamental_factors": [ "sector_performance", "market_breadth", "economic_indicators", "constituent_earnings", ], }, "forex": { "market_hours": "24/5 (Sun 5 PM - Fri 5 PM ET)", "has_fundamentals": False, "volatility": "moderate", "analysis_focus": [ "Interest rate differentials", "Economic data", "Central bank policy", "Technical levels", ], "fundamental_factors": [ "interest_rates", "gdp_growth", "inflation", "central_bank_policy", ], }, "unknown": { "market_hours": "unknown", "has_fundamentals": False, "volatility": "unknown", "analysis_focus": ["Technical patterns", "Price action"], "fundamental_factors": [], }, } return characteristics.get(asset_type, characteristics["unknown"])