Spaces:
Sleeping
Sleeping
File size: 10,645 Bytes
f1de52c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 | #!/usr/bin/env python3
# ===========================================
# Alpha Sentiment Engine β PROTOTYPE (Day 2)
# ===========================================
# GOAL: Pull stock price + headlines, then
# score each headline with FinBERT AI.
#
# Before running, install dependencies:
# pip install requests pandas transformers torch
#
# Then just run:
# python prototype.py
# ===========================================
import requests
import pandas as pd
from datetime import datetime
# These two lines load the FinBERT AI tools:
# - AutoTokenizer: converts text β numbers the AI can read
# - AutoModel...: the actual AI brain that does the scoring
from transformers import AutoTokenizer, AutoModelForSequenceClassification # type: ignore
import torch
# βββββββββββββββββββββββββββββββββββββββββββ
# π PASTE YOUR API KEYS HERE
# βββββββββββββββββββββββββββββββββββββββββββ
# 1. Alpha Vantage (free): https://www.alphavantage.co/support/#api-key
# 2. NewsAPI (free): https://newsapi.org/register
# βββββββββββββββββββββββββββββββββββββββββββ
ALPHA_VANTAGE_KEY: str = "4UNJIJF65URP0KDY"
NEWS_API_KEY: str = "5f08b14eae02463f86d53dfd190fe74f"
# The stock ticker you want to look up
TICKER: str = "AAPL"
# βββββββββββββββββββββββββββββββββββββββββββ
# Ticker β Company Name mapping
# βββββββββββββββββββββββββββββββββββββββββββ
# NewsAPI can't search "AAPL" β no article says that.
# We need the real company name so it finds actual headlines.
# Add more tickers here as you need them!
# βββββββββββββββββββββββββββββββββββββββββββ
COMPANY_NAMES: dict[str, str] = {
"AAPL": "Apple",
"TSLA": "Tesla",
"MSFT": "Microsoft",
"GOOGL": "Google",
"AMZN": "Amazon",
"NVDA": "Nvidia",
"META": "Meta",
}
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# PART 1 β Get the current stock price from Alpha Vantage
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def get_stock_price(ticker: str) -> dict:
"""
Fetch the latest stock price for a given ticker symbol.
Returns a dict like:
{"price": 189.45, "volume": 52341234, "updated": "2026-02-21 12:30:00"}
"""
url: str = "https://www.alphavantage.co/query"
params: dict = {
"function": "GLOBAL_QUOTE", # gives us the latest price
"symbol": ticker,
"apikey": ALPHA_VANTAGE_KEY,
}
print(f"π‘ Fetching stock price for {ticker}...")
response = requests.get(url, params=params)
data: dict = response.json()
# Alpha Vantage wraps everything under "Global Quote"
quote: dict = data.get("Global Quote", {})
if not quote:
print(f" β οΈ No data returned. Check your API key and ticker.")
return {"price": 0.0, "volume": 0, "updated": "N/A"}
return {
"price": float(quote.get("05. price", 0)),
"volume": int(quote.get("06. volume", 0)),
"updated": quote.get("07. latest trading day", "N/A"),
}
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# PART 2 β Get the top 5 news headlines from NewsAPI
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def get_news_headlines(query: str, count: int = 5) -> list[dict]:
"""
Fetch the latest news headlines for a search query.
Returns a list of dicts like:
[{"title": "Apple beats...", "source": "Reuters", "url": "https://..."}]
"""
url: str = "https://newsapi.org/v2/everything"
params: dict = {
"q": query,
"sortBy": "publishedAt", # newest first
"pageSize": count,
"language": "en",
"apiKey": NEWS_API_KEY,
}
print(f"π° Fetching top {count} headlines for '{query}'...")
response = requests.get(url, params=params)
data: dict = response.json()
articles: list = data.get("articles", [])
if not articles:
print(f" β οΈ No articles found. Check your API key.")
return []
# Pull out just what we need from each article
headlines: list[dict] = []
for article in articles:
headlines.append({
"title": article.get("title", "No title"),
"source": article.get("source", {}).get("name", "Unknown"),
"published": article.get("publishedAt", "N/A"),
"url": article.get("url", ""),
})
return headlines
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# PART 2.5 β Score a headline with FinBERT AI
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# This is the NEW part! We load the AI model once,
# then use it to score every headline.
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Load the model ONCE when the script starts (not inside the function).
# This takes a few seconds the first time (downloads ~440 MB).
# After that, it's cached on your Mac.
print("π€ Loading FinBERT AI model (first time may take a minute)...")
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
model.eval() # tell PyTorch "we're just scoring, not training"
print("β
FinBERT loaded and ready!\n")
def score_sentiment(headline: str) -> float:
"""
Score a single headline using FinBERT.
How it works (4 micro-steps):
1. Tokenize: turn the headline text into numbers
2. Run the model: feed numbers into the AI
3. Get probabilities: [positive, negative, neutral]
4. Calculate score: positive - negative = score from -1 to +1
Args:
headline: the news headline text
Returns:
A float from -1.0 (very negative) to +1.0 (very positive)
"""
# Step 1: Turn headline text β numbers (tokens)
inputs = tokenizer(headline, return_tensors="pt", padding=True, truncation=True, max_length=512)
# Step 2: Run the AI model (torch.no_grad = save memory, we're not training)
with torch.no_grad():
outputs = model(**inputs)
# Step 3: Convert raw scores β probabilities (they'll add up to 1.0)
# outputs.logits shape: [1, 3] β [positive, negative, neutral]
probs = torch.softmax(outputs.logits, dim=1)
positive: float = probs[0][0].item()
negative: float = probs[0][1].item()
# neutral = probs[0][2].item() β we don't need this
# Step 4: Single score = positive minus negative
score: float = round(positive - negative, 4)
return score
def sentiment_emoji(score: float) -> str:
"""Return a colored emoji based on the sentiment score."""
if score > 0.15:
return "π’"
elif score < -0.15:
return "π΄"
else:
return "π‘"
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# PART 3 β Print everything cleanly
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def main() -> None:
"""The main function β ties everything together."""
print()
print("=" * 60)
print(" π Alpha Sentiment Engine β Prototype")
print(f" π
{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 60)
print()
# ---- Stock Price ----
price_data: dict = get_stock_price(TICKER)
print()
print(f" π° {TICKER} Stock Price")
print(f" βββββββββββββββββββββββββ")
print(f" Price: ${price_data['price']:.2f}")
print(f" Volume: {price_data['volume']:,}")
print(f" Last Trade: {price_data['updated']}")
print()
# ---- News Headlines ----
# Use the company name (not ticker) for searching β "Apple stock" works,
# "AAPL" does not, because articles use the company name.
company: str = COMPANY_NAMES.get(TICKER, TICKER)
search_term: str = f"{company} stock"
headlines: list[dict] = get_news_headlines(search_term)
print(f" π° Top {len(headlines)} Headlines for {TICKER} (with AI Sentiment)")
print(f" βββββββββββββββββββββββββ")
for i, article in enumerate(headlines, start=1):
# β¨ NEW: Score each headline with FinBERT!
score: float = score_sentiment(article["title"])
emoji: str = sentiment_emoji(score)
print(f" {i}. {article['title']}")
print(f" Source: {article['source']} | {article['published'][:10]}")
print(f" {emoji} Sentiment: {score:+.4f}")
print()
# Save the score in the article dict (for the table later)
article["sentiment"] = score
# ---- Show as a pandas DataFrame (bonus!) ----
if headlines:
print(" π Headlines + Sentiment Table")
print(f" βββββββββββββββββββββββββ")
df = pd.DataFrame(headlines)
print(df[["title", "source", "sentiment"]].to_string(index=False))
print()
print("=" * 60)
print(" β
Prototype complete! Price + News + AI Sentiment")
print(" π Next step: build the async pipeline with Redis + Celery.")
print("=" * 60)
print()
if __name__ == "__main__":
main()
|