File size: 10,645 Bytes
f1de52c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
#!/usr/bin/env python3
# ===========================================
#  Alpha Sentiment Engine β€” PROTOTYPE (Day 2)
# ===========================================
#  GOAL: Pull stock price + headlines, then
#        score each headline with FinBERT AI.
#
#  Before running, install dependencies:
#    pip install requests pandas transformers torch
#
#  Then just run:
#    python prototype.py
# ===========================================

import requests
import pandas as pd
from datetime import datetime

# These two lines load the FinBERT AI tools:
# - AutoTokenizer:  converts text β†’ numbers the AI can read
# - AutoModel...:   the actual AI brain that does the scoring
from transformers import AutoTokenizer, AutoModelForSequenceClassification  # type: ignore
import torch

# ───────────────────────────────────────────
#  πŸ”‘  PASTE YOUR API KEYS HERE
# ───────────────────────────────────────────
#  1. Alpha Vantage (free): https://www.alphavantage.co/support/#api-key
#  2. NewsAPI       (free): https://newsapi.org/register
# ───────────────────────────────────────────
ALPHA_VANTAGE_KEY: str = "4UNJIJF65URP0KDY"
NEWS_API_KEY: str = "5f08b14eae02463f86d53dfd190fe74f"

# The stock ticker you want to look up
TICKER: str = "AAPL"

# ───────────────────────────────────────────
#  Ticker β†’ Company Name mapping
# ───────────────────────────────────────────
#  NewsAPI can't search "AAPL" β€” no article says that.
#  We need the real company name so it finds actual headlines.
#  Add more tickers here as you need them!
# ───────────────────────────────────────────
COMPANY_NAMES: dict[str, str] = {
    "AAPL": "Apple",
    "TSLA": "Tesla",
    "MSFT": "Microsoft",
    "GOOGL": "Google",
    "AMZN": "Amazon",
    "NVDA": "Nvidia",
    "META": "Meta",
}


# ──────────────────────────────────────────────────────────
#  PART 1 β€” Get the current stock price from Alpha Vantage
# ──────────────────────────────────────────────────────────
def get_stock_price(ticker: str) -> dict:
    """
    Fetch the latest stock price for a given ticker symbol.

    Returns a dict like:
      {"price": 189.45, "volume": 52341234, "updated": "2026-02-21 12:30:00"}
    """

    url: str = "https://www.alphavantage.co/query"
    params: dict = {
        "function": "GLOBAL_QUOTE",   # gives us the latest price
        "symbol": ticker,
        "apikey": ALPHA_VANTAGE_KEY,
    }

    print(f"πŸ“‘ Fetching stock price for {ticker}...")
    response = requests.get(url, params=params)
    data: dict = response.json()

    # Alpha Vantage wraps everything under "Global Quote"
    quote: dict = data.get("Global Quote", {})

    if not quote:
        print(f"   ⚠️  No data returned. Check your API key and ticker.")
        return {"price": 0.0, "volume": 0, "updated": "N/A"}

    return {
        "price": float(quote.get("05. price", 0)),
        "volume": int(quote.get("06. volume", 0)),
        "updated": quote.get("07. latest trading day", "N/A"),
    }


# ──────────────────────────────────────────────────────────
#  PART 2 β€” Get the top 5 news headlines from NewsAPI
# ──────────────────────────────────────────────────────────
def get_news_headlines(query: str, count: int = 5) -> list[dict]:
    """
    Fetch the latest news headlines for a search query.

    Returns a list of dicts like:
      [{"title": "Apple beats...", "source": "Reuters", "url": "https://..."}]
    """

    url: str = "https://newsapi.org/v2/everything"
    params: dict = {
        "q": query,
        "sortBy": "publishedAt",   # newest first
        "pageSize": count,
        "language": "en",
        "apiKey": NEWS_API_KEY,
    }

    print(f"πŸ“° Fetching top {count} headlines for '{query}'...")
    response = requests.get(url, params=params)
    data: dict = response.json()

    articles: list = data.get("articles", [])

    if not articles:
        print(f"   ⚠️  No articles found. Check your API key.")
        return []

    # Pull out just what we need from each article
    headlines: list[dict] = []
    for article in articles:
        headlines.append({
            "title": article.get("title", "No title"),
            "source": article.get("source", {}).get("name", "Unknown"),
            "published": article.get("publishedAt", "N/A"),
            "url": article.get("url", ""),
        })

    return headlines


# ──────────────────────────────────────────────────────────
#  PART 2.5 β€” Score a headline with FinBERT AI
# ──────────────────────────────────────────────────────────
#  This is the NEW part! We load the AI model once,
#  then use it to score every headline.
# ──────────────────────────────────────────────────────────

# Load the model ONCE when the script starts (not inside the function).
# This takes a few seconds the first time (downloads ~440 MB).
# After that, it's cached on your Mac.
print("πŸ€– Loading FinBERT AI model (first time may take a minute)...")
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
model.eval()   # tell PyTorch "we're just scoring, not training"
print("βœ… FinBERT loaded and ready!\n")


def score_sentiment(headline: str) -> float:
    """
    Score a single headline using FinBERT.

    How it works (4 micro-steps):
      1. Tokenize: turn the headline text into numbers
      2. Run the model: feed numbers into the AI
      3. Get probabilities: [positive, negative, neutral]
      4. Calculate score: positive - negative = score from -1 to +1

    Args:
        headline: the news headline text

    Returns:
        A float from -1.0 (very negative) to +1.0 (very positive)
    """

    # Step 1: Turn headline text β†’ numbers (tokens)
    inputs = tokenizer(headline, return_tensors="pt", padding=True, truncation=True, max_length=512)

    # Step 2: Run the AI model (torch.no_grad = save memory, we're not training)
    with torch.no_grad():
        outputs = model(**inputs)

    # Step 3: Convert raw scores β†’ probabilities (they'll add up to 1.0)
    #   outputs.logits shape: [1, 3] β†’ [positive, negative, neutral]
    probs = torch.softmax(outputs.logits, dim=1)
    positive: float = probs[0][0].item()
    negative: float = probs[0][1].item()
    # neutral = probs[0][2].item()  ← we don't need this

    # Step 4: Single score = positive minus negative
    score: float = round(positive - negative, 4)
    return score


def sentiment_emoji(score: float) -> str:
    """Return a colored emoji based on the sentiment score."""
    if score > 0.15:
        return "🟒"
    elif score < -0.15:
        return "πŸ”΄"
    else:
        return "🟑"


# ──────────────────────────────────────────────────────────
#  PART 3 β€” Print everything cleanly
# ──────────────────────────────────────────────────────────
def main() -> None:
    """The main function β€” ties everything together."""

    print()
    print("=" * 60)
    print("  πŸš€  Alpha Sentiment Engine β€” Prototype")
    print(f"  πŸ“…  {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("=" * 60)
    print()

    # ---- Stock Price ----
    price_data: dict = get_stock_price(TICKER)

    print()
    print(f"  πŸ’°  {TICKER} Stock Price")
    print(f"  ─────────────────────────")
    print(f"  Price:      ${price_data['price']:.2f}")
    print(f"  Volume:     {price_data['volume']:,}")
    print(f"  Last Trade: {price_data['updated']}")
    print()

    # ---- News Headlines ----
    # Use the company name (not ticker) for searching β€” "Apple stock" works,
    # "AAPL" does not, because articles use the company name.
    company: str = COMPANY_NAMES.get(TICKER, TICKER)
    search_term: str = f"{company} stock"
    headlines: list[dict] = get_news_headlines(search_term)

    print(f"  πŸ“°  Top {len(headlines)} Headlines for {TICKER} (with AI Sentiment)")
    print(f"  ─────────────────────────")
    for i, article in enumerate(headlines, start=1):
        # ✨ NEW: Score each headline with FinBERT!
        score: float = score_sentiment(article["title"])
        emoji: str = sentiment_emoji(score)

        print(f"  {i}. {article['title']}")
        print(f"     Source: {article['source']}  |  {article['published'][:10]}")
        print(f"     {emoji} Sentiment: {score:+.4f}")
        print()

        # Save the score in the article dict (for the table later)
        article["sentiment"] = score

    # ---- Show as a pandas DataFrame (bonus!) ----
    if headlines:
        print("  πŸ“Š  Headlines + Sentiment Table")
        print(f"  ─────────────────────────")
        df = pd.DataFrame(headlines)
        print(df[["title", "source", "sentiment"]].to_string(index=False))
        print()

    print("=" * 60)
    print("  βœ…  Prototype complete! Price + News + AI Sentiment")
    print("  πŸ‘‰  Next step: build the async pipeline with Redis + Celery.")
    print("=" * 60)
    print()


if __name__ == "__main__":
    main()