Spaces:
Runtime error
Runtime error
| """News headline fetching β Alpaca News API (historical) with yfinance fallback.""" | |
| from __future__ import annotations | |
| import logging | |
| from datetime import datetime, timedelta, timezone | |
| import pandas as pd | |
| logger = logging.getLogger(__name__) | |
| # ββ Alpaca News API (historical, date-aware) βββββββββββββββββββββββββββββββββββ | |
| def fetch_headlines_alpaca( | |
| api_key: str, | |
| api_secret: str, | |
| symbol: str, | |
| start: datetime | None = None, | |
| end: datetime | None = None, | |
| max_articles: int = 50, | |
| ) -> list[tuple[str, float]]: | |
| """Fetch headlines via Alpaca News API with optional date range. | |
| Returns list of (headline: str, unix_timestamp: float) tuples. | |
| Supports historical backtesting by specifying start/end dates. | |
| """ | |
| if not api_key or not api_secret: | |
| return [] | |
| try: | |
| from alpaca.data.historical.news import NewsClient | |
| from alpaca.data.requests import NewsRequest | |
| client = NewsClient(api_key=api_key, secret_key=api_secret) | |
| now = datetime.now(tz=timezone.utc) | |
| if end is None: | |
| end = now | |
| if start is None: | |
| start = end - timedelta(days=7) | |
| request = NewsRequest( | |
| symbols=symbol, | |
| start=start, | |
| end=end, | |
| limit=min(max_articles, 100), # Alpaca max is 100 per page | |
| ) | |
| response = client.get_news(request) | |
| items = getattr(response, "news", response) if response else [] | |
| headlines: list[tuple[str, float]] = [] | |
| for item in items: | |
| title = getattr(item, "headline", "") or getattr(item, "title", "") | |
| if not title: | |
| continue | |
| created = getattr(item, "created_at", None) or getattr(item, "updated_at", None) | |
| if created: | |
| if isinstance(created, str): | |
| ts = pd.Timestamp(created).timestamp() | |
| elif isinstance(created, (int, float)): | |
| ts = float(created) | |
| else: | |
| ts = pd.Timestamp(created).timestamp() | |
| else: | |
| ts = now.timestamp() | |
| headlines.append((title, float(ts))) | |
| logger.debug("Alpaca News: got %d headlines for %s (%s to %s)", | |
| len(headlines), symbol, start, end) | |
| return headlines | |
| except Exception as exc: | |
| logger.warning("Alpaca News fetch failed for %s: %s", symbol, exc) | |
| return [] | |
| def fetch_headlines_yfinance(symbol: str, max_articles: int = 20) -> list[str]: | |
| """Fetch headlines from yfinance built-in news feed.""" | |
| try: | |
| import yfinance as yf | |
| ticker = yf.Ticker(symbol) | |
| news = ticker.news or [] | |
| headlines = [] | |
| for item in news[:max_articles]: | |
| title = item.get("title") or (item.get("content", {}) or {}).get("title", "") | |
| if title: | |
| headlines.append(title) | |
| logger.debug("yfinance news: got %d headlines for %s", len(headlines), symbol) | |
| return headlines | |
| except Exception as exc: | |
| logger.warning("yfinance news failed for %s: %s", symbol, exc) | |
| return [] | |
| # ββ Unified fetcher βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def fetch_headlines( | |
| symbol: str, | |
| max_articles: int = 20, | |
| ) -> list[str]: | |
| """Fetch headlines, using yfinance (Alpaca news returns tuples, not plain strings).""" | |
| return fetch_headlines_yfinance(symbol, max_articles) | |
| def fetch_headlines_with_timestamps( | |
| symbol: str, | |
| days_ago: int = 0, | |
| alpaca_key: str = "", | |
| alpaca_secret: str = "", | |
| max_articles: int = 50, | |
| ) -> list[tuple[str, float]]: | |
| """Fetch headlines with Unix timestamps for temporal weighting. | |
| For backtesting: pass days_ago > 0 to get news from a specific historical date. | |
| Returns list of (headline: str, unix_timestamp: float) tuples. | |
| Priority: Alpaca (supports historical dates) > yfinance. | |
| """ | |
| now = datetime.now(tz=timezone.utc) | |
| target_date = now - timedelta(days=days_ago) | |
| # Try Alpaca first (only supports historical if API keys are set) | |
| if alpaca_key and alpaca_secret: | |
| # Alpaca can fetch news for any historical date in range | |
| day_start = target_date.replace(hour=0, minute=0, second=0, microsecond=0) | |
| day_end = day_start.replace(hour=23, minute=59, second=59) | |
| headlines = fetch_headlines_alpaca(alpaca_key, alpaca_secret, symbol, | |
| start=day_start, end=day_end, | |
| max_articles=max_articles) | |
| if headlines: | |
| return headlines | |
| # yfinance fallback (no timestamp info, approximate) | |
| headlines = fetch_headlines_yfinance(symbol, max_articles) | |
| now_ts = now.timestamp() | |
| return [(h, now_ts - (i * 3600)) for i, h in enumerate(headlines)] | |