Spaces:
Sleeping
Sleeping
| from datetime import datetime, timedelta, timezone | |
| from pathlib import Path | |
| import sys | |
| import pandas as pd | |
| sys.path.insert(0, str(Path(__file__).resolve().parents[1])) | |
| from data.scraper import NewsScraper | |
| from engine.analytics import AnalyticsEngine | |
| def _headline_frame(rows): | |
| return pd.DataFrame(rows) | |
| def test_scraper_init(): | |
| scraper = NewsScraper(limit=10) | |
| assert scraper.limit == 10 | |
| def test_scraper_query_diversity(): | |
| scraper = NewsScraper(limit=600) | |
| queries = scraper._build_queries("TSLA") | |
| assert len(queries) >= 50 | |
| def test_summary_does_not_eager_load_models(): | |
| engine = AnalyticsEngine() | |
| assert engine.finbert is None | |
| assert engine.distilroberta is None | |
| assert engine.ranker is None | |
| now = datetime.now(timezone.utc) | |
| df = _headline_frame( | |
| [ | |
| { | |
| "title": "TSLA beats estimates and raises guidance", | |
| "timestamp": now.isoformat(), | |
| "ensemble_pol": 0.82, | |
| "finbert_pol": 0.9, | |
| "roberta_pol": 0.75, | |
| "finbert_score": 0.95, | |
| "roberta_score": 0.88, | |
| "agreement": 1.0, | |
| "conviction": 0.8, | |
| "significance": 0.9, | |
| } | |
| ] | |
| ) | |
| summary = engine.get_summary(df) | |
| assert summary["direction_call"] in {"UP", "MIXED"} | |
| assert summary["direction_score"] >= 50 | |
| assert summary["event_support"] > 0.5 | |
| assert engine.finbert is None | |
| assert engine.distilroberta is None | |
| assert engine.ranker is None | |
| def test_positive_direction_summary_is_bullish(): | |
| engine = AnalyticsEngine() | |
| now = datetime.now(timezone.utc) | |
| df = _headline_frame( | |
| [ | |
| { | |
| "title": "TSLA beats estimates and raises guidance after record deliveries", | |
| "timestamp": now.isoformat(), | |
| "ensemble_pol": 0.86, | |
| "finbert_pol": 0.91, | |
| "roberta_pol": 0.79, | |
| "finbert_score": 0.97, | |
| "roberta_score": 0.9, | |
| "agreement": 1.0, | |
| "conviction": 0.82, | |
| "significance": 0.95, | |
| }, | |
| { | |
| "title": "Analyst upgrades TSLA and raises price target", | |
| "timestamp": (now - timedelta(hours=3)).isoformat(), | |
| "ensemble_pol": 0.72, | |
| "finbert_pol": 0.8, | |
| "roberta_pol": 0.63, | |
| "finbert_score": 0.92, | |
| "roberta_score": 0.84, | |
| "agreement": 1.0, | |
| "conviction": 0.71, | |
| "significance": 0.88, | |
| }, | |
| { | |
| "title": "TSLA wins major battery contract in growth push", | |
| "timestamp": (now - timedelta(hours=8)).isoformat(), | |
| "ensemble_pol": 0.61, | |
| "finbert_pol": 0.68, | |
| "roberta_pol": 0.53, | |
| "finbert_score": 0.88, | |
| "roberta_score": 0.8, | |
| "agreement": 1.0, | |
| "conviction": 0.61, | |
| "significance": 0.82, | |
| }, | |
| ] | |
| ) | |
| summary = engine.get_summary(df) | |
| assert summary["direction_call"] == "UP" | |
| assert summary["direction_score"] >= 60 | |
| assert "Bullish" in summary["state_title"] | |
| assert summary["bullish_pressure"] > summary["bearish_pressure"] | |
| assert summary["state_explanation"] | |
| assert summary["bullish_drivers"] | |
| def test_negative_direction_summary_is_bearish(): | |
| engine = AnalyticsEngine() | |
| now = datetime.now(timezone.utc) | |
| df = _headline_frame( | |
| [ | |
| { | |
| "title": "TSLA cuts guidance as revenue falls below estimates", | |
| "timestamp": now.isoformat(), | |
| "ensemble_pol": -0.88, | |
| "finbert_pol": -0.93, | |
| "roberta_pol": -0.81, | |
| "finbert_score": 0.97, | |
| "roberta_score": 0.9, | |
| "agreement": 1.0, | |
| "conviction": 0.86, | |
| "significance": 0.96, | |
| }, | |
| { | |
| "title": "SEC investigation and lawsuit deepen pressure on TSLA stock", | |
| "timestamp": (now - timedelta(hours=2)).isoformat(), | |
| "ensemble_pol": -0.78, | |
| "finbert_pol": -0.85, | |
| "roberta_pol": -0.68, | |
| "finbert_score": 0.94, | |
| "roberta_score": 0.86, | |
| "agreement": 1.0, | |
| "conviction": 0.76, | |
| "significance": 0.91, | |
| }, | |
| { | |
| "title": "Analyst downgrade sends TSLA lower on demand fears", | |
| "timestamp": (now - timedelta(hours=5)).isoformat(), | |
| "ensemble_pol": -0.67, | |
| "finbert_pol": -0.72, | |
| "roberta_pol": -0.59, | |
| "finbert_score": 0.89, | |
| "roberta_score": 0.82, | |
| "agreement": 1.0, | |
| "conviction": 0.64, | |
| "significance": 0.84, | |
| }, | |
| ] | |
| ) | |
| summary = engine.get_summary(df) | |
| assert summary["direction_call"] == "DOWN" | |
| assert summary["direction_score"] <= 40 | |
| assert "Bearish" in summary["state_title"] | |
| assert summary["bearish_pressure"] > summary["bullish_pressure"] | |
| assert summary["bearish_risks"] | |
| def test_mixed_flow_lowers_confidence(): | |
| engine = AnalyticsEngine() | |
| now = datetime.now(timezone.utc) | |
| df = _headline_frame( | |
| [ | |
| { | |
| "title": "TSLA beats estimates but warns on margin headwinds", | |
| "timestamp": now.isoformat(), | |
| "ensemble_pol": 0.18, | |
| "finbert_pol": 0.24, | |
| "roberta_pol": 0.1, | |
| "finbert_score": 0.81, | |
| "roberta_score": 0.76, | |
| "agreement": 1.0, | |
| "conviction": 0.2, | |
| "significance": 0.72, | |
| }, | |
| { | |
| "title": "Analyst downgrade offsets recent TSLA rally", | |
| "timestamp": (now - timedelta(hours=4)).isoformat(), | |
| "ensemble_pol": -0.22, | |
| "finbert_pol": -0.28, | |
| "roberta_pol": -0.14, | |
| "finbert_score": 0.82, | |
| "roberta_score": 0.74, | |
| "agreement": 1.0, | |
| "conviction": 0.22, | |
| "significance": 0.75, | |
| }, | |
| { | |
| "title": "Investors await TSLA delivery update as outlook remains uncertain", | |
| "timestamp": (now - timedelta(hours=9)).isoformat(), | |
| "ensemble_pol": 0.02, | |
| "finbert_pol": 0.04, | |
| "roberta_pol": 0.0, | |
| "finbert_score": 0.7, | |
| "roberta_score": 0.66, | |
| "agreement": 1.0, | |
| "conviction": 0.04, | |
| "significance": 0.63, | |
| }, | |
| ] | |
| ) | |
| summary = engine.get_summary(df) | |
| assert summary["direction_confidence"] < 70 | |
| assert summary["direction_call"] == "MIXED" | |
| assert 35 <= summary["direction_score"] <= 65 | |
| def test_single_generic_headline_is_not_overcalled(): | |
| engine = AnalyticsEngine() | |
| now = datetime.now(timezone.utc) | |
| df = _headline_frame( | |
| [ | |
| { | |
| "title": "TSLA launches new product for mass market buyers", | |
| "timestamp": now.isoformat(), | |
| "ensemble_pol": 0.35, | |
| "finbert_pol": 0.38, | |
| "roberta_pol": 0.3, | |
| "finbert_score": 0.82, | |
| "roberta_score": 0.79, | |
| "agreement": 1.0, | |
| "conviction": 0.3, | |
| "significance": 0.68, | |
| } | |
| ] | |
| ) | |
| summary = engine.get_summary(df) | |
| assert summary["direction_call"] == "MIXED" | |
| assert 45 <= summary["direction_score"] <= 58 | |
| assert summary["headline_concentration"] >= 0.95 | |
| assert summary["effective_articles"] <= 1.1 | |
| def test_major_singleton_event_can_escape_midpoint_bias(): | |
| engine = AnalyticsEngine() | |
| now = datetime.now(timezone.utc) | |
| df = _headline_frame( | |
| [ | |
| { | |
| "title": "AAPL beats estimates and raises guidance for next quarter", | |
| "timestamp": now.isoformat(), | |
| "ensemble_pol": 0.8, | |
| "finbert_pol": 0.86, | |
| "roberta_pol": 0.72, | |
| "finbert_score": 0.95, | |
| "roberta_score": 0.87, | |
| "agreement": 1.0, | |
| "conviction": 0.77, | |
| "significance": 0.93, | |
| } | |
| ] | |
| ) | |
| summary = engine.get_summary(df) | |
| assert summary["direction_call"] == "UP" | |
| assert summary["direction_score"] >= 57 | |
| assert summary["vibe"] >= 7 | |
| assert summary["event_support"] >= 0.72 | |
| def test_stale_signal_needs_fresh_confirmation(): | |
| engine = AnalyticsEngine() | |
| now = datetime.now(timezone.utc) | |
| df = _headline_frame( | |
| [ | |
| { | |
| "title": "TSLA beats estimates and raises guidance", | |
| "timestamp": (now - timedelta(days=5)).isoformat(), | |
| "ensemble_pol": 0.82, | |
| "finbert_pol": 0.9, | |
| "roberta_pol": 0.74, | |
| "finbert_score": 0.95, | |
| "roberta_score": 0.87, | |
| "agreement": 1.0, | |
| "conviction": 0.79, | |
| "significance": 0.92, | |
| }, | |
| { | |
| "title": "Investors await TSLA update as outlook remains uncertain", | |
| "timestamp": now.isoformat(), | |
| "ensemble_pol": 0.01, | |
| "finbert_pol": 0.03, | |
| "roberta_pol": 0.0, | |
| "finbert_score": 0.71, | |
| "roberta_score": 0.67, | |
| "agreement": 1.0, | |
| "conviction": 0.02, | |
| "significance": 0.6, | |
| }, | |
| ] | |
| ) | |
| summary = engine.get_summary(df) | |
| assert summary["direction_call"] == "MIXED" | |
| assert summary["direction_confidence"] < 55 | |
| assert summary["recency_support"] < 0.7 | |
| def test_vibe_scale_moves_off_center_for_mild_directional_lean(): | |
| engine = AnalyticsEngine() | |
| now = datetime.now(timezone.utc) | |
| bullish = _headline_frame( | |
| [ | |
| { | |
| "title": "Oracle partnership expands enterprise demand pipeline", | |
| "timestamp": now.isoformat(), | |
| "ensemble_pol": 0.39, | |
| "finbert_pol": 0.44, | |
| "roberta_pol": 0.31, | |
| "finbert_score": 0.85, | |
| "roberta_score": 0.79, | |
| "agreement": 1.0, | |
| "conviction": 0.35, | |
| "significance": 0.73, | |
| }, | |
| { | |
| "title": "Analyst note turns constructive on Oracle cloud growth", | |
| "timestamp": (now - timedelta(hours=5)).isoformat(), | |
| "ensemble_pol": 0.28, | |
| "finbert_pol": 0.33, | |
| "roberta_pol": 0.2, | |
| "finbert_score": 0.81, | |
| "roberta_score": 0.74, | |
| "agreement": 1.0, | |
| "conviction": 0.25, | |
| "significance": 0.7, | |
| }, | |
| ] | |
| ) | |
| bearish = _headline_frame( | |
| [ | |
| { | |
| "title": "Intel downgrade reflects weaker PC demand expectations", | |
| "timestamp": now.isoformat(), | |
| "ensemble_pol": -0.41, | |
| "finbert_pol": -0.46, | |
| "roberta_pol": -0.34, | |
| "finbert_score": 0.86, | |
| "roberta_score": 0.8, | |
| "agreement": 1.0, | |
| "conviction": 0.37, | |
| "significance": 0.75, | |
| }, | |
| { | |
| "title": "Intel delay raises execution concerns for next launch", | |
| "timestamp": (now - timedelta(hours=4)).isoformat(), | |
| "ensemble_pol": -0.29, | |
| "finbert_pol": -0.34, | |
| "roberta_pol": -0.22, | |
| "finbert_score": 0.82, | |
| "roberta_score": 0.76, | |
| "agreement": 1.0, | |
| "conviction": 0.26, | |
| "significance": 0.71, | |
| }, | |
| ] | |
| ) | |
| bullish_summary = engine.get_summary(bullish) | |
| bearish_summary = engine.get_summary(bearish) | |
| assert bullish_summary["vibe"] >= 6 | |
| assert bearish_summary["vibe"] <= 4 | |
| def test_estimate_time(): | |
| engine = AnalyticsEngine() | |
| eta = engine.estimate_time(600) | |
| assert 20 <= eta <= 200 | |
| eta_small = engine.estimate_time(50) | |
| assert eta_small < eta | |
| def test_self_calibration(): | |
| engine = AnalyticsEngine() | |
| initial_eta = engine.estimate_time(600) | |
| engine.record_timing("finbert_per_batch", 0.1, 1) | |
| engine.record_timing("roberta_per_batch", 0.05, 1) | |
| engine.record_timing("ranker_per_batch", 0.15, 1) | |
| engine.record_timing("scrape_per_article", 0.005, 1) | |
| calibrated_eta = engine.estimate_time(600) | |
| assert calibrated_eta < initial_eta | |
| def test_cleanup(): | |
| with open("test_dummy.csv", "w", encoding="utf-8") as file: | |
| file.write("test") | |
| NewsScraper.cleanup() | |
| import glob | |
| assert len(glob.glob("test_dummy.csv")) == 0 | |