File size: 5,047 Bytes
d5b7ee9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
"""News headline fetching β€” Alpaca News API (historical) with yfinance fallback."""

from __future__ import annotations

import logging
from datetime import datetime, timedelta, timezone

import pandas as pd

logger = logging.getLogger(__name__)


# ── Alpaca News API (historical, date-aware) ───────────────────────────────────

def fetch_headlines_alpaca(
    api_key: str,
    api_secret: str,
    symbol: str,
    start: datetime | None = None,
    end: datetime | None = None,
    max_articles: int = 50,
) -> list[tuple[str, float]]:
    """Fetch headlines via Alpaca News API with optional date range.

    Returns list of (headline: str, unix_timestamp: float) tuples.
    Supports historical backtesting by specifying start/end dates.
    """
    if not api_key or not api_secret:
        return []
    try:
        from alpaca.data.historical.news import NewsClient
        from alpaca.data.requests import NewsRequest

        client = NewsClient(api_key=api_key, secret_key=api_secret)

        now = datetime.now(tz=timezone.utc)
        if end is None:
            end = now
        if start is None:
            start = end - timedelta(days=7)

        request = NewsRequest(
            symbols=symbol,
            start=start,
            end=end,
            limit=min(max_articles, 100),  # Alpaca max is 100 per page
        )
        response = client.get_news(request)
        items = getattr(response, "news", response) if response else []

        headlines: list[tuple[str, float]] = []
        for item in items:
            title = getattr(item, "headline", "") or getattr(item, "title", "")
            if not title:
                continue
            created = getattr(item, "created_at", None) or getattr(item, "updated_at", None)
            if created:
                if isinstance(created, str):
                    ts = pd.Timestamp(created).timestamp()
                elif isinstance(created, (int, float)):
                    ts = float(created)
                else:
                    ts = pd.Timestamp(created).timestamp()
            else:
                ts = now.timestamp()
            headlines.append((title, float(ts)))

        logger.debug("Alpaca News: got %d headlines for %s (%s to %s)",
                      len(headlines), symbol, start, end)
        return headlines
    except Exception as exc:
        logger.warning("Alpaca News fetch failed for %s: %s", symbol, exc)
        return []


def fetch_headlines_yfinance(symbol: str, max_articles: int = 20) -> list[str]:
    """Fetch headlines from yfinance built-in news feed."""
    try:
        import yfinance as yf

        ticker = yf.Ticker(symbol)
        news = ticker.news or []
        headlines = []
        for item in news[:max_articles]:
            title = item.get("title") or (item.get("content", {}) or {}).get("title", "")
            if title:
                headlines.append(title)
        logger.debug("yfinance news: got %d headlines for %s", len(headlines), symbol)
        return headlines
    except Exception as exc:
        logger.warning("yfinance news failed for %s: %s", symbol, exc)
        return []


# ── Unified fetcher ───────────────────────────────────────────────────────────

def fetch_headlines(
    symbol: str,
    max_articles: int = 20,
) -> list[str]:
    """Fetch headlines, using yfinance (Alpaca news returns tuples, not plain strings)."""
    return fetch_headlines_yfinance(symbol, max_articles)


def fetch_headlines_with_timestamps(
    symbol: str,
    days_ago: int = 0,
    alpaca_key: str = "",
    alpaca_secret: str = "",
    max_articles: int = 50,
) -> list[tuple[str, float]]:
    """Fetch headlines with Unix timestamps for temporal weighting.

    For backtesting: pass days_ago > 0 to get news from a specific historical date.
    Returns list of (headline: str, unix_timestamp: float) tuples.

    Priority: Alpaca (supports historical dates) > yfinance.
    """
    now = datetime.now(tz=timezone.utc)
    target_date = now - timedelta(days=days_ago)

    # Try Alpaca first (only supports historical if API keys are set)
    if alpaca_key and alpaca_secret:
        # Alpaca can fetch news for any historical date in range
        day_start = target_date.replace(hour=0, minute=0, second=0, microsecond=0)
        day_end = day_start.replace(hour=23, minute=59, second=59)
        headlines = fetch_headlines_alpaca(alpaca_key, alpaca_secret, symbol,
                                           start=day_start, end=day_end,
                                           max_articles=max_articles)
        if headlines:
            return headlines

    # yfinance fallback (no timestamp info, approximate)
    headlines = fetch_headlines_yfinance(symbol, max_articles)
    now_ts = now.timestamp()
    return [(h, now_ts - (i * 3600)) for i, h in enumerate(headlines)]