alpha-sentiment-engine / prototype.py
malikparth05's picture
Cloud Data JSON Sync
f1de52c
#!/usr/bin/env python3
# ===========================================
# Alpha Sentiment Engine β€” PROTOTYPE (Day 2)
# ===========================================
# GOAL: Pull stock price + headlines, then
# score each headline with FinBERT AI.
#
# Before running, install dependencies:
# pip install requests pandas transformers torch
#
# Then just run:
# python prototype.py
# ===========================================
import requests
import pandas as pd
from datetime import datetime
# These two lines load the FinBERT AI tools:
# - AutoTokenizer: converts text β†’ numbers the AI can read
# - AutoModel...: the actual AI brain that does the scoring
from transformers import AutoTokenizer, AutoModelForSequenceClassification # type: ignore
import torch
# ───────────────────────────────────────────
# πŸ”‘ PASTE YOUR API KEYS HERE
# ───────────────────────────────────────────
# 1. Alpha Vantage (free): https://www.alphavantage.co/support/#api-key
# 2. NewsAPI (free): https://newsapi.org/register
# ───────────────────────────────────────────
ALPHA_VANTAGE_KEY: str = "4UNJIJF65URP0KDY"
NEWS_API_KEY: str = "5f08b14eae02463f86d53dfd190fe74f"
# The stock ticker you want to look up
TICKER: str = "AAPL"
# ───────────────────────────────────────────
# Ticker β†’ Company Name mapping
# ───────────────────────────────────────────
# NewsAPI can't search "AAPL" β€” no article says that.
# We need the real company name so it finds actual headlines.
# Add more tickers here as you need them!
# ───────────────────────────────────────────
COMPANY_NAMES: dict[str, str] = {
"AAPL": "Apple",
"TSLA": "Tesla",
"MSFT": "Microsoft",
"GOOGL": "Google",
"AMZN": "Amazon",
"NVDA": "Nvidia",
"META": "Meta",
}
# ──────────────────────────────────────────────────────────
# PART 1 β€” Get the current stock price from Alpha Vantage
# ──────────────────────────────────────────────────────────
def get_stock_price(ticker: str) -> dict:
"""
Fetch the latest stock price for a given ticker symbol.
Returns a dict like:
{"price": 189.45, "volume": 52341234, "updated": "2026-02-21 12:30:00"}
"""
url: str = "https://www.alphavantage.co/query"
params: dict = {
"function": "GLOBAL_QUOTE", # gives us the latest price
"symbol": ticker,
"apikey": ALPHA_VANTAGE_KEY,
}
print(f"πŸ“‘ Fetching stock price for {ticker}...")
response = requests.get(url, params=params)
data: dict = response.json()
# Alpha Vantage wraps everything under "Global Quote"
quote: dict = data.get("Global Quote", {})
if not quote:
print(f" ⚠️ No data returned. Check your API key and ticker.")
return {"price": 0.0, "volume": 0, "updated": "N/A"}
return {
"price": float(quote.get("05. price", 0)),
"volume": int(quote.get("06. volume", 0)),
"updated": quote.get("07. latest trading day", "N/A"),
}
# ──────────────────────────────────────────────────────────
# PART 2 β€” Get the top 5 news headlines from NewsAPI
# ──────────────────────────────────────────────────────────
def get_news_headlines(query: str, count: int = 5) -> list[dict]:
"""
Fetch the latest news headlines for a search query.
Returns a list of dicts like:
[{"title": "Apple beats...", "source": "Reuters", "url": "https://..."}]
"""
url: str = "https://newsapi.org/v2/everything"
params: dict = {
"q": query,
"sortBy": "publishedAt", # newest first
"pageSize": count,
"language": "en",
"apiKey": NEWS_API_KEY,
}
print(f"πŸ“° Fetching top {count} headlines for '{query}'...")
response = requests.get(url, params=params)
data: dict = response.json()
articles: list = data.get("articles", [])
if not articles:
print(f" ⚠️ No articles found. Check your API key.")
return []
# Pull out just what we need from each article
headlines: list[dict] = []
for article in articles:
headlines.append({
"title": article.get("title", "No title"),
"source": article.get("source", {}).get("name", "Unknown"),
"published": article.get("publishedAt", "N/A"),
"url": article.get("url", ""),
})
return headlines
# ──────────────────────────────────────────────────────────
# PART 2.5 β€” Score a headline with FinBERT AI
# ──────────────────────────────────────────────────────────
# This is the NEW part! We load the AI model once,
# then use it to score every headline.
# ──────────────────────────────────────────────────────────
# Load the model ONCE when the script starts (not inside the function).
# This takes a few seconds the first time (downloads ~440 MB).
# After that, it's cached on your Mac.
print("πŸ€– Loading FinBERT AI model (first time may take a minute)...")
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
model.eval() # tell PyTorch "we're just scoring, not training"
print("βœ… FinBERT loaded and ready!\n")
def score_sentiment(headline: str) -> float:
"""
Score a single headline using FinBERT.
How it works (4 micro-steps):
1. Tokenize: turn the headline text into numbers
2. Run the model: feed numbers into the AI
3. Get probabilities: [positive, negative, neutral]
4. Calculate score: positive - negative = score from -1 to +1
Args:
headline: the news headline text
Returns:
A float from -1.0 (very negative) to +1.0 (very positive)
"""
# Step 1: Turn headline text β†’ numbers (tokens)
inputs = tokenizer(headline, return_tensors="pt", padding=True, truncation=True, max_length=512)
# Step 2: Run the AI model (torch.no_grad = save memory, we're not training)
with torch.no_grad():
outputs = model(**inputs)
# Step 3: Convert raw scores β†’ probabilities (they'll add up to 1.0)
# outputs.logits shape: [1, 3] β†’ [positive, negative, neutral]
probs = torch.softmax(outputs.logits, dim=1)
positive: float = probs[0][0].item()
negative: float = probs[0][1].item()
# neutral = probs[0][2].item() ← we don't need this
# Step 4: Single score = positive minus negative
score: float = round(positive - negative, 4)
return score
def sentiment_emoji(score: float) -> str:
"""Return a colored emoji based on the sentiment score."""
if score > 0.15:
return "🟒"
elif score < -0.15:
return "πŸ”΄"
else:
return "🟑"
# ──────────────────────────────────────────────────────────
# PART 3 β€” Print everything cleanly
# ──────────────────────────────────────────────────────────
def main() -> None:
"""The main function β€” ties everything together."""
print()
print("=" * 60)
print(" πŸš€ Alpha Sentiment Engine β€” Prototype")
print(f" πŸ“… {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 60)
print()
# ---- Stock Price ----
price_data: dict = get_stock_price(TICKER)
print()
print(f" πŸ’° {TICKER} Stock Price")
print(f" ─────────────────────────")
print(f" Price: ${price_data['price']:.2f}")
print(f" Volume: {price_data['volume']:,}")
print(f" Last Trade: {price_data['updated']}")
print()
# ---- News Headlines ----
# Use the company name (not ticker) for searching β€” "Apple stock" works,
# "AAPL" does not, because articles use the company name.
company: str = COMPANY_NAMES.get(TICKER, TICKER)
search_term: str = f"{company} stock"
headlines: list[dict] = get_news_headlines(search_term)
print(f" πŸ“° Top {len(headlines)} Headlines for {TICKER} (with AI Sentiment)")
print(f" ─────────────────────────")
for i, article in enumerate(headlines, start=1):
# ✨ NEW: Score each headline with FinBERT!
score: float = score_sentiment(article["title"])
emoji: str = sentiment_emoji(score)
print(f" {i}. {article['title']}")
print(f" Source: {article['source']} | {article['published'][:10]}")
print(f" {emoji} Sentiment: {score:+.4f}")
print()
# Save the score in the article dict (for the table later)
article["sentiment"] = score
# ---- Show as a pandas DataFrame (bonus!) ----
if headlines:
print(" πŸ“Š Headlines + Sentiment Table")
print(f" ─────────────────────────")
df = pd.DataFrame(headlines)
print(df[["title", "source", "sentiment"]].to_string(index=False))
print()
print("=" * 60)
print(" βœ… Prototype complete! Price + News + AI Sentiment")
print(" πŸ‘‰ Next step: build the async pipeline with Redis + Celery.")
print("=" * 60)
print()
if __name__ == "__main__":
main()