Spaces:
Running
Running
| """ | |
| Multi-layer ticker validation: sanitise β format β local DB β live yfinance API. | |
| All error responses carry a structured error code in the ``code`` field. | |
| Implements a graceful fallback chain when external services are degraded. | |
| """ | |
| import logging | |
| import re | |
| from dataclasses import dataclass, field | |
| from functools import lru_cache | |
| import yfinance as yf | |
| from ticker_db import find_similar_tickers, get_company_name, is_known_ticker | |
| logger = logging.getLogger(__name__) | |
| # --------------------------------------------------------------------------- | |
| # Structured error codes | |
| # --------------------------------------------------------------------------- | |
| class ErrorCode: | |
| EMPTY_INPUT = "EMPTY_INPUT" | |
| INVALID_FORMAT = "INVALID_FORMAT" | |
| RESERVED_WORD = "RESERVED_WORD" | |
| TICKER_NOT_FOUND = "TICKER_NOT_FOUND" | |
| TICKER_DELISTED = "TICKER_DELISTED" | |
| API_TIMEOUT = "API_TIMEOUT" | |
| API_ERROR = "API_ERROR" | |
| RATE_LIMITED = "RATE_LIMITED" | |
| DATA_FETCH_FAILED = "DATA_FETCH_FAILED" | |
| INTERNAL_ERROR = "INTERNAL_ERROR" | |
| # --------------------------------------------------------------------------- | |
| # Known non-stock inputs | |
| # --------------------------------------------------------------------------- | |
| _RESERVED_WORDS = {"HELP", "TEST", "NULL", "NONE", "NA", "N/A"} | |
| _CRYPTO_TICKERS = { | |
| "BTC", "ETH", "XRP", "LTC", "BNB", "SOL", "ADA", "DOT", | |
| "AVAX", "DOGE", "MATIC", "SHIB", "TRX", "LINK", "ATOM", "USDT", "USDC", | |
| } | |
| _CRYPTO_MESSAGE = ( | |
| "IRIS-AI analyzes stocks and ETFs. " | |
| "For cryptocurrency analysis, please use a crypto-specific platform." | |
| ) | |
| # OTC exchange identifiers returned by yfinance | |
| _OTC_EXCHANGES = {"PNK", "OTC", "OTCQB", "OTCQX", "PINK", "GREY", "EXPERT"} | |
| _MAX_RAW_LENGTH = 20 # chars before any processing | |
| # --------------------------------------------------------------------------- | |
| # Result dataclass | |
| # --------------------------------------------------------------------------- | |
| class TickerValidationResult: | |
| valid: bool | |
| ticker: str | |
| company_name: str = "" | |
| error: str = "" | |
| code: str = "" # structured error code (empty on success) | |
| warning: str = "" # non-fatal advisory (e.g. OTC market) | |
| suggestions: list[str] = field(default_factory=list) | |
| source: str = "" # "cache" | "local_db" | "api" | "special_symbol" | "" | |
| # --------------------------------------------------------------------------- | |
| # Layer 0 β input sanitisation | |
| # --------------------------------------------------------------------------- | |
| def sanitize_ticker_input(raw: str) -> str: | |
| """Return a cleaned, uppercase ticker string from arbitrary user input. | |
| Steps applied in order: | |
| 1. Enforce a 20-character hard cap before any further processing. | |
| 2. Remove leading ``$`` or ``#`` characters. | |
| 3. Remove ``ticker:`` prefix (case-insensitive). | |
| 4. Remove common trailing words: ``stock``, ``etf``, ``shares``. | |
| 5. Collapse all internal whitespace so "A A P L" becomes "AAPL". | |
| 6. Uppercase. | |
| """ | |
| s = str(raw or "").strip() | |
| if len(s) > _MAX_RAW_LENGTH: | |
| s = s[:_MAX_RAW_LENGTH] | |
| s = re.sub(r"^[\$#]+", "", s) | |
| s = re.sub(r"^ticker:", "", s, flags=re.IGNORECASE) | |
| s = re.sub(r"\s+(stock|etf|shares)$", "", s, flags=re.IGNORECASE) | |
| s = re.sub(r"\s+", "", s) | |
| return s.upper() | |
| # --------------------------------------------------------------------------- | |
| # Layer 1 β format validation | |
| # --------------------------------------------------------------------------- | |
| # Standard US tickers: 1-5 letters, optionally ONE dot + 1-2 letters. | |
| # Covers BRK.B, BRK.A, class shares, etc. | |
| _STANDARD_TICKER_RE = re.compile(r"^[A-Z]{1,5}(\.[A-Z]{1,2})?$") | |
| # Preferred share tickers: base symbol + hyphen + series code. | |
| # Covers T-PA, BAC-PB, WFC-PL, JPM-PC, etc. | |
| _PREFERRED_TICKER_RE = re.compile(r"^[A-Z]{1,5}-[A-Z0-9]{1,3}$") | |
| # Yahoo special symbols: | |
| # - Indices: ^GSPC, ^IXIC, ^DJI | |
| # - Futures: CL=F, GC=F, SI=F, HG=F | |
| # - Composite symbols: DX-Y.NYB | |
| _INDEX_TICKER_RE = re.compile(r"^\^[A-Z0-9.\-]{1,14}$") | |
| _FUTURES_TICKER_RE = re.compile(r"^[A-Z0-9]{1,8}=F$") | |
| _COMPOSITE_TICKER_RE = re.compile(r"^[A-Z0-9]{1,8}-[A-Z0-9]{1,8}\.[A-Z]{1,6}$") | |
| def _is_special_market_symbol(ticker: str) -> bool: | |
| return bool( | |
| _INDEX_TICKER_RE.fullmatch(ticker) | |
| or _FUTURES_TICKER_RE.fullmatch(ticker) | |
| or _COMPOSITE_TICKER_RE.fullmatch(ticker) | |
| ) | |
| def validate_ticker_format(ticker: str) -> TickerValidationResult: | |
| """Check that *ticker* has a valid format (sanitises input first).""" | |
| normalized = sanitize_ticker_input(ticker) | |
| if not normalized: | |
| return TickerValidationResult( | |
| valid=False, ticker=normalized, code=ErrorCode.EMPTY_INPUT, | |
| error="Please enter a stock ticker symbol.", | |
| ) | |
| if normalized in _CRYPTO_TICKERS: | |
| return TickerValidationResult( | |
| valid=False, ticker=normalized, code=ErrorCode.RESERVED_WORD, | |
| error=_CRYPTO_MESSAGE, | |
| ) | |
| is_standard = bool(_STANDARD_TICKER_RE.fullmatch(normalized)) | |
| is_preferred = bool(_PREFERRED_TICKER_RE.fullmatch(normalized)) | |
| is_special = _is_special_market_symbol(normalized) | |
| if not (is_standard or is_preferred or is_special): | |
| return TickerValidationResult( | |
| valid=False, ticker=normalized, code=ErrorCode.INVALID_FORMAT, | |
| error=( | |
| f'"{normalized}" is not a valid ticker format. ' | |
| "Use stock format (e.g., AAPL, BRK.B), preferred shares (e.g., T-PA, BAC-PB), " | |
| "or special market symbols (e.g., ^GSPC, CL=F)." | |
| ), | |
| ) | |
| if normalized in _RESERVED_WORDS: | |
| return TickerValidationResult( | |
| valid=False, ticker=normalized, code=ErrorCode.RESERVED_WORD, | |
| error=f'"{normalized}" is a reserved word, not a stock ticker.', | |
| ) | |
| return TickerValidationResult(valid=True, ticker=normalized) | |
| # --------------------------------------------------------------------------- | |
| # Layers 2 + 3 β local DB then live yfinance API (cached) | |
| # --------------------------------------------------------------------------- | |
| def _cached_api_lookup(ticker: str) -> TickerValidationResult: | |
| """Hit yfinance for *ticker* (already normalised uppercase). | |
| lru_cache stores only successful returns; raised exceptions are never cached, | |
| so transient network failures do not permanently poison the cache. | |
| """ | |
| in_local_db = is_known_ticker(ticker) | |
| # info call β may raise on network error; let it propagate so the caller | |
| # can apply the graceful-degradation fallback chain. | |
| info = yf.Ticker(ticker).info | |
| company_name = info.get("shortName") or info.get("longName") or "" | |
| is_special_symbol = _is_special_market_symbol(ticker) | |
| if not company_name and not in_local_db and not is_special_symbol: | |
| return TickerValidationResult( | |
| valid=False, ticker=ticker, code=ErrorCode.TICKER_NOT_FOUND, | |
| error=f'Ticker "{ticker}" was not found. Please check the symbol and try again.', | |
| suggestions=find_similar_tickers(ticker), | |
| ) | |
| # history call β a network failure here doesn't mean the ticker is bad | |
| history_empty = False | |
| try: | |
| hist = yf.Ticker(ticker).history(period="5d") | |
| history_empty = hist is None or (hasattr(hist, "empty") and hist.empty) | |
| except Exception: | |
| history_empty = False # service hiccup; don't penalise the ticker | |
| if history_empty and company_name: | |
| return TickerValidationResult( | |
| valid=False, ticker=ticker, company_name=company_name, | |
| code=ErrorCode.TICKER_DELISTED, | |
| error=( | |
| f'"{ticker}" ({company_name}) appears to be delisted ' | |
| "or has no recent trading data." | |
| ), | |
| ) | |
| # OTC / pink-sheet advisory | |
| exchange = (info.get("exchange") or info.get("market") or "").upper() | |
| warning = "" | |
| if exchange in _OTC_EXCHANGES: | |
| warning = f"Note: {ticker} trades on the OTC market. Data may be limited." | |
| source = "special_symbol" if is_special_symbol and not in_local_db else ("local_db" if in_local_db else "api") | |
| return TickerValidationResult( | |
| valid=True, ticker=ticker, | |
| company_name=company_name or ("(special market symbol)" if is_special_symbol else "(verified offline)"), | |
| warning=warning, | |
| source=source, | |
| ) | |
| def validate_ticker_exists(ticker: str) -> TickerValidationResult: | |
| """Full existence check with graceful-degradation fallback chain. | |
| Fallback behaviour when services are degraded: | |
| - API down + ticker in local DB β valid with warning | |
| - API down + ticker NOT in DB β rejection with API_TIMEOUT / API_ERROR | |
| - DB corrupted/missing + API up β rely on API only | |
| - Both services down β rejection explaining both are unavailable | |
| """ | |
| fmt = validate_ticker_format(ticker) | |
| if not fmt.valid: | |
| return fmt | |
| normalized = fmt.ticker | |
| is_special_symbol = _is_special_market_symbol(normalized) | |
| # Probe local DB (may fail if DB file is corrupted or missing) | |
| in_local_db = False | |
| local_db_available = True | |
| try: | |
| in_local_db = is_known_ticker(normalized) | |
| except Exception: | |
| local_db_available = False | |
| logger.warning("Local ticker DB unavailable when checking %s", normalized) | |
| try: | |
| if in_local_db and not is_special_symbol: | |
| return TickerValidationResult( | |
| valid=True, | |
| ticker=normalized, | |
| company_name=get_company_name(normalized) or "(verified offline)", | |
| source="local_db", | |
| ) | |
| return _cached_api_lookup(normalized) | |
| except Exception as exc: | |
| exc_str = str(exc).lower() | |
| is_timeout = ( | |
| "timeout" in exc_str | |
| or "timed out" in exc_str | |
| or isinstance(exc, TimeoutError) | |
| ) | |
| api_code = ErrorCode.API_TIMEOUT if is_timeout else ErrorCode.API_ERROR | |
| logger.warning("yfinance lookup failed for %s: %s", normalized, exc) | |
| # Both services unavailable | |
| if not local_db_available: | |
| return TickerValidationResult( | |
| valid=False, ticker=normalized, code=ErrorCode.API_ERROR, | |
| error=( | |
| "Validation services are temporarily unavailable. " | |
| "Please try again shortly." | |
| ), | |
| ) | |
| # API down but ticker confirmed in local DB β degrade gracefully | |
| if in_local_db: | |
| return TickerValidationResult( | |
| valid=True, ticker=normalized, | |
| company_name="(verified offline)", | |
| source="local_db", | |
| warning=( | |
| "Ticker verified from local database. " | |
| "Real-time data verification is temporarily unavailable." | |
| ), | |
| ) | |
| # API down for special symbols: allow format-validated pass-through. | |
| if is_special_symbol: | |
| return TickerValidationResult( | |
| valid=True, | |
| ticker=normalized, | |
| company_name="(special market symbol)", | |
| source="special_symbol", | |
| warning="Special market symbol accepted while live validation is temporarily unavailable.", | |
| ) | |
| # API down, ticker not in local DB - cannot verify | |
| return TickerValidationResult( | |
| valid=False, ticker=normalized, code=api_code, | |
| error=( | |
| "Cannot verify this ticker right now. " | |
| "Please try again in a few minutes or use a well-known ticker." | |
| ), | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Public entry point | |
| # --------------------------------------------------------------------------- | |
| def validate_ticker(ticker: str) -> TickerValidationResult: | |
| """Orchestrate the full validation flow and return a single result.""" | |
| return validate_ticker_exists(ticker) | |