Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| # =========================================== | |
| # Alpha Sentiment Engine β PROTOTYPE (Day 2) | |
| # =========================================== | |
| # GOAL: Pull stock price + headlines, then | |
| # score each headline with FinBERT AI. | |
| # | |
| # Before running, install dependencies: | |
| # pip install requests pandas transformers torch | |
| # | |
| # Then just run: | |
| # python prototype.py | |
| # =========================================== | |
| import requests | |
| import pandas as pd | |
| from datetime import datetime | |
| # These two lines load the FinBERT AI tools: | |
| # - AutoTokenizer: converts text β numbers the AI can read | |
| # - AutoModel...: the actual AI brain that does the scoring | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification # type: ignore | |
| import torch | |
| # βββββββββββββββββββββββββββββββββββββββββββ | |
| # π PASTE YOUR API KEYS HERE | |
| # βββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. Alpha Vantage (free): https://www.alphavantage.co/support/#api-key | |
| # 2. NewsAPI (free): https://newsapi.org/register | |
| # βββββββββββββββββββββββββββββββββββββββββββ | |
| ALPHA_VANTAGE_KEY: str = "4UNJIJF65URP0KDY" | |
| NEWS_API_KEY: str = "5f08b14eae02463f86d53dfd190fe74f" | |
| # The stock ticker you want to look up | |
| TICKER: str = "AAPL" | |
| # βββββββββββββββββββββββββββββββββββββββββββ | |
| # Ticker β Company Name mapping | |
| # βββββββββββββββββββββββββββββββββββββββββββ | |
| # NewsAPI can't search "AAPL" β no article says that. | |
| # We need the real company name so it finds actual headlines. | |
| # Add more tickers here as you need them! | |
| # βββββββββββββββββββββββββββββββββββββββββββ | |
| COMPANY_NAMES: dict[str, str] = { | |
| "AAPL": "Apple", | |
| "TSLA": "Tesla", | |
| "MSFT": "Microsoft", | |
| "GOOGL": "Google", | |
| "AMZN": "Amazon", | |
| "NVDA": "Nvidia", | |
| "META": "Meta", | |
| } | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PART 1 β Get the current stock price from Alpha Vantage | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def get_stock_price(ticker: str) -> dict: | |
| """ | |
| Fetch the latest stock price for a given ticker symbol. | |
| Returns a dict like: | |
| {"price": 189.45, "volume": 52341234, "updated": "2026-02-21 12:30:00"} | |
| """ | |
| url: str = "https://www.alphavantage.co/query" | |
| params: dict = { | |
| "function": "GLOBAL_QUOTE", # gives us the latest price | |
| "symbol": ticker, | |
| "apikey": ALPHA_VANTAGE_KEY, | |
| } | |
| print(f"π‘ Fetching stock price for {ticker}...") | |
| response = requests.get(url, params=params) | |
| data: dict = response.json() | |
| # Alpha Vantage wraps everything under "Global Quote" | |
| quote: dict = data.get("Global Quote", {}) | |
| if not quote: | |
| print(f" β οΈ No data returned. Check your API key and ticker.") | |
| return {"price": 0.0, "volume": 0, "updated": "N/A"} | |
| return { | |
| "price": float(quote.get("05. price", 0)), | |
| "volume": int(quote.get("06. volume", 0)), | |
| "updated": quote.get("07. latest trading day", "N/A"), | |
| } | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PART 2 β Get the top 5 news headlines from NewsAPI | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def get_news_headlines(query: str, count: int = 5) -> list[dict]: | |
| """ | |
| Fetch the latest news headlines for a search query. | |
| Returns a list of dicts like: | |
| [{"title": "Apple beats...", "source": "Reuters", "url": "https://..."}] | |
| """ | |
| url: str = "https://newsapi.org/v2/everything" | |
| params: dict = { | |
| "q": query, | |
| "sortBy": "publishedAt", # newest first | |
| "pageSize": count, | |
| "language": "en", | |
| "apiKey": NEWS_API_KEY, | |
| } | |
| print(f"π° Fetching top {count} headlines for '{query}'...") | |
| response = requests.get(url, params=params) | |
| data: dict = response.json() | |
| articles: list = data.get("articles", []) | |
| if not articles: | |
| print(f" β οΈ No articles found. Check your API key.") | |
| return [] | |
| # Pull out just what we need from each article | |
| headlines: list[dict] = [] | |
| for article in articles: | |
| headlines.append({ | |
| "title": article.get("title", "No title"), | |
| "source": article.get("source", {}).get("name", "Unknown"), | |
| "published": article.get("publishedAt", "N/A"), | |
| "url": article.get("url", ""), | |
| }) | |
| return headlines | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PART 2.5 β Score a headline with FinBERT AI | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # This is the NEW part! We load the AI model once, | |
| # then use it to score every headline. | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Load the model ONCE when the script starts (not inside the function). | |
| # This takes a few seconds the first time (downloads ~440 MB). | |
| # After that, it's cached on your Mac. | |
| print("π€ Loading FinBERT AI model (first time may take a minute)...") | |
| tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert") | |
| model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert") | |
| model.eval() # tell PyTorch "we're just scoring, not training" | |
| print("β FinBERT loaded and ready!\n") | |
| def score_sentiment(headline: str) -> float: | |
| """ | |
| Score a single headline using FinBERT. | |
| How it works (4 micro-steps): | |
| 1. Tokenize: turn the headline text into numbers | |
| 2. Run the model: feed numbers into the AI | |
| 3. Get probabilities: [positive, negative, neutral] | |
| 4. Calculate score: positive - negative = score from -1 to +1 | |
| Args: | |
| headline: the news headline text | |
| Returns: | |
| A float from -1.0 (very negative) to +1.0 (very positive) | |
| """ | |
| # Step 1: Turn headline text β numbers (tokens) | |
| inputs = tokenizer(headline, return_tensors="pt", padding=True, truncation=True, max_length=512) | |
| # Step 2: Run the AI model (torch.no_grad = save memory, we're not training) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| # Step 3: Convert raw scores β probabilities (they'll add up to 1.0) | |
| # outputs.logits shape: [1, 3] β [positive, negative, neutral] | |
| probs = torch.softmax(outputs.logits, dim=1) | |
| positive: float = probs[0][0].item() | |
| negative: float = probs[0][1].item() | |
| # neutral = probs[0][2].item() β we don't need this | |
| # Step 4: Single score = positive minus negative | |
| score: float = round(positive - negative, 4) | |
| return score | |
| def sentiment_emoji(score: float) -> str: | |
| """Return a colored emoji based on the sentiment score.""" | |
| if score > 0.15: | |
| return "π’" | |
| elif score < -0.15: | |
| return "π΄" | |
| else: | |
| return "π‘" | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PART 3 β Print everything cleanly | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def main() -> None: | |
| """The main function β ties everything together.""" | |
| print() | |
| print("=" * 60) | |
| print(" π Alpha Sentiment Engine β Prototype") | |
| print(f" π {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| print("=" * 60) | |
| print() | |
| # ---- Stock Price ---- | |
| price_data: dict = get_stock_price(TICKER) | |
| print() | |
| print(f" π° {TICKER} Stock Price") | |
| print(f" βββββββββββββββββββββββββ") | |
| print(f" Price: ${price_data['price']:.2f}") | |
| print(f" Volume: {price_data['volume']:,}") | |
| print(f" Last Trade: {price_data['updated']}") | |
| print() | |
| # ---- News Headlines ---- | |
| # Use the company name (not ticker) for searching β "Apple stock" works, | |
| # "AAPL" does not, because articles use the company name. | |
| company: str = COMPANY_NAMES.get(TICKER, TICKER) | |
| search_term: str = f"{company} stock" | |
| headlines: list[dict] = get_news_headlines(search_term) | |
| print(f" π° Top {len(headlines)} Headlines for {TICKER} (with AI Sentiment)") | |
| print(f" βββββββββββββββββββββββββ") | |
| for i, article in enumerate(headlines, start=1): | |
| # β¨ NEW: Score each headline with FinBERT! | |
| score: float = score_sentiment(article["title"]) | |
| emoji: str = sentiment_emoji(score) | |
| print(f" {i}. {article['title']}") | |
| print(f" Source: {article['source']} | {article['published'][:10]}") | |
| print(f" {emoji} Sentiment: {score:+.4f}") | |
| print() | |
| # Save the score in the article dict (for the table later) | |
| article["sentiment"] = score | |
| # ---- Show as a pandas DataFrame (bonus!) ---- | |
| if headlines: | |
| print(" π Headlines + Sentiment Table") | |
| print(f" βββββββββββββββββββββββββ") | |
| df = pd.DataFrame(headlines) | |
| print(df[["title", "source", "sentiment"]].to_string(index=False)) | |
| print() | |
| print("=" * 60) | |
| print(" β Prototype complete! Price + News + AI Sentiment") | |
| print(" π Next step: build the async pipeline with Redis + Celery.") | |
| print("=" * 60) | |
| print() | |
| if __name__ == "__main__": | |
| main() | |