IRIS-AI_DEMO / ticker_validator.py
Brajmovech's picture
Sync IRIS-AI model and almanac updates
4adb2a8
"""
Multi-layer ticker validation: sanitise β†’ format β†’ local DB β†’ live yfinance API.
All error responses carry a structured error code in the ``code`` field.
Implements a graceful fallback chain when external services are degraded.
"""
import logging
import re
from dataclasses import dataclass, field
from functools import lru_cache
import yfinance as yf
from ticker_db import find_similar_tickers, get_company_name, is_known_ticker
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Structured error codes
# ---------------------------------------------------------------------------
class ErrorCode:
EMPTY_INPUT = "EMPTY_INPUT"
INVALID_FORMAT = "INVALID_FORMAT"
RESERVED_WORD = "RESERVED_WORD"
TICKER_NOT_FOUND = "TICKER_NOT_FOUND"
TICKER_DELISTED = "TICKER_DELISTED"
API_TIMEOUT = "API_TIMEOUT"
API_ERROR = "API_ERROR"
RATE_LIMITED = "RATE_LIMITED"
DATA_FETCH_FAILED = "DATA_FETCH_FAILED"
INTERNAL_ERROR = "INTERNAL_ERROR"
# ---------------------------------------------------------------------------
# Known non-stock inputs
# ---------------------------------------------------------------------------
_RESERVED_WORDS = {"HELP", "TEST", "NULL", "NONE", "NA", "N/A"}
_CRYPTO_TICKERS = {
"BTC", "ETH", "XRP", "LTC", "BNB", "SOL", "ADA", "DOT",
"AVAX", "DOGE", "MATIC", "SHIB", "TRX", "LINK", "ATOM", "USDT", "USDC",
}
_CRYPTO_MESSAGE = (
"IRIS-AI analyzes stocks and ETFs. "
"For cryptocurrency analysis, please use a crypto-specific platform."
)
# OTC exchange identifiers returned by yfinance
_OTC_EXCHANGES = {"PNK", "OTC", "OTCQB", "OTCQX", "PINK", "GREY", "EXPERT"}
_MAX_RAW_LENGTH = 20 # chars before any processing
# ---------------------------------------------------------------------------
# Result dataclass
# ---------------------------------------------------------------------------
@dataclass
class TickerValidationResult:
valid: bool
ticker: str
company_name: str = ""
error: str = ""
code: str = "" # structured error code (empty on success)
warning: str = "" # non-fatal advisory (e.g. OTC market)
suggestions: list[str] = field(default_factory=list)
source: str = "" # "cache" | "local_db" | "api" | "special_symbol" | ""
# ---------------------------------------------------------------------------
# Layer 0 – input sanitisation
# ---------------------------------------------------------------------------
def sanitize_ticker_input(raw: str) -> str:
"""Return a cleaned, uppercase ticker string from arbitrary user input.
Steps applied in order:
1. Enforce a 20-character hard cap before any further processing.
2. Remove leading ``$`` or ``#`` characters.
3. Remove ``ticker:`` prefix (case-insensitive).
4. Remove common trailing words: ``stock``, ``etf``, ``shares``.
5. Collapse all internal whitespace so "A A P L" becomes "AAPL".
6. Uppercase.
"""
s = str(raw or "").strip()
if len(s) > _MAX_RAW_LENGTH:
s = s[:_MAX_RAW_LENGTH]
s = re.sub(r"^[\$#]+", "", s)
s = re.sub(r"^ticker:", "", s, flags=re.IGNORECASE)
s = re.sub(r"\s+(stock|etf|shares)$", "", s, flags=re.IGNORECASE)
s = re.sub(r"\s+", "", s)
return s.upper()
# ---------------------------------------------------------------------------
# Layer 1 – format validation
# ---------------------------------------------------------------------------
# Standard US tickers: 1-5 letters, optionally ONE dot + 1-2 letters.
# Covers BRK.B, BRK.A, class shares, etc.
_STANDARD_TICKER_RE = re.compile(r"^[A-Z]{1,5}(\.[A-Z]{1,2})?$")
# Preferred share tickers: base symbol + hyphen + series code.
# Covers T-PA, BAC-PB, WFC-PL, JPM-PC, etc.
_PREFERRED_TICKER_RE = re.compile(r"^[A-Z]{1,5}-[A-Z0-9]{1,3}$")
# Yahoo special symbols:
# - Indices: ^GSPC, ^IXIC, ^DJI
# - Futures: CL=F, GC=F, SI=F, HG=F
# - Composite symbols: DX-Y.NYB
_INDEX_TICKER_RE = re.compile(r"^\^[A-Z0-9.\-]{1,14}$")
_FUTURES_TICKER_RE = re.compile(r"^[A-Z0-9]{1,8}=F$")
_COMPOSITE_TICKER_RE = re.compile(r"^[A-Z0-9]{1,8}-[A-Z0-9]{1,8}\.[A-Z]{1,6}$")
def _is_special_market_symbol(ticker: str) -> bool:
return bool(
_INDEX_TICKER_RE.fullmatch(ticker)
or _FUTURES_TICKER_RE.fullmatch(ticker)
or _COMPOSITE_TICKER_RE.fullmatch(ticker)
)
def validate_ticker_format(ticker: str) -> TickerValidationResult:
"""Check that *ticker* has a valid format (sanitises input first)."""
normalized = sanitize_ticker_input(ticker)
if not normalized:
return TickerValidationResult(
valid=False, ticker=normalized, code=ErrorCode.EMPTY_INPUT,
error="Please enter a stock ticker symbol.",
)
if normalized in _CRYPTO_TICKERS:
return TickerValidationResult(
valid=False, ticker=normalized, code=ErrorCode.RESERVED_WORD,
error=_CRYPTO_MESSAGE,
)
is_standard = bool(_STANDARD_TICKER_RE.fullmatch(normalized))
is_preferred = bool(_PREFERRED_TICKER_RE.fullmatch(normalized))
is_special = _is_special_market_symbol(normalized)
if not (is_standard or is_preferred or is_special):
return TickerValidationResult(
valid=False, ticker=normalized, code=ErrorCode.INVALID_FORMAT,
error=(
f'"{normalized}" is not a valid ticker format. '
"Use stock format (e.g., AAPL, BRK.B), preferred shares (e.g., T-PA, BAC-PB), "
"or special market symbols (e.g., ^GSPC, CL=F)."
),
)
if normalized in _RESERVED_WORDS:
return TickerValidationResult(
valid=False, ticker=normalized, code=ErrorCode.RESERVED_WORD,
error=f'"{normalized}" is a reserved word, not a stock ticker.',
)
return TickerValidationResult(valid=True, ticker=normalized)
# ---------------------------------------------------------------------------
# Layers 2 + 3 – local DB then live yfinance API (cached)
# ---------------------------------------------------------------------------
@lru_cache(maxsize=512)
def _cached_api_lookup(ticker: str) -> TickerValidationResult:
"""Hit yfinance for *ticker* (already normalised uppercase).
lru_cache stores only successful returns; raised exceptions are never cached,
so transient network failures do not permanently poison the cache.
"""
in_local_db = is_known_ticker(ticker)
# info call β€” may raise on network error; let it propagate so the caller
# can apply the graceful-degradation fallback chain.
info = yf.Ticker(ticker).info
company_name = info.get("shortName") or info.get("longName") or ""
is_special_symbol = _is_special_market_symbol(ticker)
if not company_name and not in_local_db and not is_special_symbol:
return TickerValidationResult(
valid=False, ticker=ticker, code=ErrorCode.TICKER_NOT_FOUND,
error=f'Ticker "{ticker}" was not found. Please check the symbol and try again.',
suggestions=find_similar_tickers(ticker),
)
# history call β€” a network failure here doesn't mean the ticker is bad
history_empty = False
try:
hist = yf.Ticker(ticker).history(period="5d")
history_empty = hist is None or (hasattr(hist, "empty") and hist.empty)
except Exception:
history_empty = False # service hiccup; don't penalise the ticker
if history_empty and company_name:
return TickerValidationResult(
valid=False, ticker=ticker, company_name=company_name,
code=ErrorCode.TICKER_DELISTED,
error=(
f'"{ticker}" ({company_name}) appears to be delisted '
"or has no recent trading data."
),
)
# OTC / pink-sheet advisory
exchange = (info.get("exchange") or info.get("market") or "").upper()
warning = ""
if exchange in _OTC_EXCHANGES:
warning = f"Note: {ticker} trades on the OTC market. Data may be limited."
source = "special_symbol" if is_special_symbol and not in_local_db else ("local_db" if in_local_db else "api")
return TickerValidationResult(
valid=True, ticker=ticker,
company_name=company_name or ("(special market symbol)" if is_special_symbol else "(verified offline)"),
warning=warning,
source=source,
)
def validate_ticker_exists(ticker: str) -> TickerValidationResult:
"""Full existence check with graceful-degradation fallback chain.
Fallback behaviour when services are degraded:
- API down + ticker in local DB β†’ valid with warning
- API down + ticker NOT in DB β†’ rejection with API_TIMEOUT / API_ERROR
- DB corrupted/missing + API up β†’ rely on API only
- Both services down β†’ rejection explaining both are unavailable
"""
fmt = validate_ticker_format(ticker)
if not fmt.valid:
return fmt
normalized = fmt.ticker
is_special_symbol = _is_special_market_symbol(normalized)
# Probe local DB (may fail if DB file is corrupted or missing)
in_local_db = False
local_db_available = True
try:
in_local_db = is_known_ticker(normalized)
except Exception:
local_db_available = False
logger.warning("Local ticker DB unavailable when checking %s", normalized)
try:
if in_local_db and not is_special_symbol:
return TickerValidationResult(
valid=True,
ticker=normalized,
company_name=get_company_name(normalized) or "(verified offline)",
source="local_db",
)
return _cached_api_lookup(normalized)
except Exception as exc:
exc_str = str(exc).lower()
is_timeout = (
"timeout" in exc_str
or "timed out" in exc_str
or isinstance(exc, TimeoutError)
)
api_code = ErrorCode.API_TIMEOUT if is_timeout else ErrorCode.API_ERROR
logger.warning("yfinance lookup failed for %s: %s", normalized, exc)
# Both services unavailable
if not local_db_available:
return TickerValidationResult(
valid=False, ticker=normalized, code=ErrorCode.API_ERROR,
error=(
"Validation services are temporarily unavailable. "
"Please try again shortly."
),
)
# API down but ticker confirmed in local DB β€” degrade gracefully
if in_local_db:
return TickerValidationResult(
valid=True, ticker=normalized,
company_name="(verified offline)",
source="local_db",
warning=(
"Ticker verified from local database. "
"Real-time data verification is temporarily unavailable."
),
)
# API down for special symbols: allow format-validated pass-through.
if is_special_symbol:
return TickerValidationResult(
valid=True,
ticker=normalized,
company_name="(special market symbol)",
source="special_symbol",
warning="Special market symbol accepted while live validation is temporarily unavailable.",
)
# API down, ticker not in local DB - cannot verify
return TickerValidationResult(
valid=False, ticker=normalized, code=api_code,
error=(
"Cannot verify this ticker right now. "
"Please try again in a few minutes or use a well-known ticker."
),
)
# ---------------------------------------------------------------------------
# Public entry point
# ---------------------------------------------------------------------------
def validate_ticker(ticker: str) -> TickerValidationResult:
"""Orchestrate the full validation flow and return a single result."""
return validate_ticker_exists(ticker)