Spaces:

Jitendra12421
/

finbert_anaylzer

Sleeping

File size: 11,057 Bytes

from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime, timedelta, timezone

import pandas as pd

from .analytics import AnalyticsEngine


@dataclass(frozen=True)
class BacktestScenario:
    name: str
    rows: list[dict]
    next_day_return_pct: float
    ticker: str = "TEST"


@dataclass(frozen=True)
class BacktestSuite:
    tuning: list[BacktestScenario]
    holdout: list[BacktestScenario]


def _row(
    title: str,
    timestamp: datetime,
    ensemble_pol: float,
    finbert_pol: float,
    roberta_pol: float,
    finbert_score: float,
    roberta_score: float,
    conviction: float,
    significance: float,
) -> dict:
    return {
        "title": title,
        "timestamp": timestamp.isoformat(),
        "ensemble_pol": ensemble_pol,
        "finbert_pol": finbert_pol,
        "roberta_pol": roberta_pol,
        "finbert_score": finbert_score,
        "roberta_score": roberta_score,
        "agreement": 1.0,
        "conviction": conviction,
        "significance": significance,
    }


def build_benchmark_suite(reference_time: datetime | None = None) -> BacktestSuite:
    now = reference_time or datetime.now(timezone.utc)

    tuning = [
        BacktestScenario(
            name="tuning_fresh_bullish_consensus",
            ticker="TSLA",
            next_day_return_pct=1.2,
            rows=[
                _row("TSLA beats estimates and raises guidance after record deliveries", now, 0.86, 0.91, 0.79, 0.97, 0.90, 0.82, 0.95),
                _row("Analyst upgrades TSLA and raises price target", now - timedelta(hours=3), 0.72, 0.80, 0.63, 0.92, 0.84, 0.71, 0.88),
                _row("TSLA wins major battery contract in growth push", now - timedelta(hours=8), 0.61, 0.68, 0.53, 0.88, 0.80, 0.61, 0.82),
            ],
        ),
        BacktestScenario(
            name="tuning_fresh_bearish_consensus",
            ticker="TSLA",
            next_day_return_pct=-1.4,
            rows=[
                _row("TSLA cuts guidance as revenue falls below estimates", now, -0.88, -0.93, -0.81, 0.97, 0.90, 0.86, 0.96),
                _row("SEC investigation and lawsuit deepen pressure on TSLA stock", now - timedelta(hours=2), -0.78, -0.85, -0.68, 0.94, 0.86, 0.76, 0.91),
                _row("Analyst downgrade sends TSLA lower on demand fears", now - timedelta(hours=5), -0.67, -0.72, -0.59, 0.89, 0.82, 0.64, 0.84),
            ],
        ),
        BacktestScenario(
            name="tuning_conflicted_flow",
            ticker="TSLA",
            next_day_return_pct=0.1,
            rows=[
                _row("TSLA beats estimates but warns on margin headwinds", now, 0.18, 0.24, 0.10, 0.81, 0.76, 0.20, 0.72),
                _row("Analyst downgrade offsets recent TSLA rally", now - timedelta(hours=4), -0.22, -0.28, -0.14, 0.82, 0.74, 0.22, 0.75),
                _row("Investors await TSLA delivery update as outlook remains uncertain", now - timedelta(hours=9), 0.02, 0.04, 0.00, 0.70, 0.66, 0.04, 0.63),
            ],
        ),
        BacktestScenario(
            name="tuning_stale_positive_signal",
            ticker="TSLA",
            next_day_return_pct=0.1,
            rows=[
                _row("TSLA beats estimates and raises guidance", now - timedelta(days=5), 0.82, 0.90, 0.74, 0.95, 0.87, 0.79, 0.92),
                _row("Investors await TSLA update as outlook remains uncertain", now, 0.01, 0.03, 0.00, 0.71, 0.67, 0.02, 0.60),
            ],
        ),
    ]

    holdout = [
        BacktestScenario(
            name="holdout_broad_bullish_repricing",
            ticker="NVDA",
            next_day_return_pct=1.1,
            rows=[
                _row("NVIDIA tops estimates and raises outlook on AI demand", now, 0.84, 0.89, 0.77, 0.96, 0.89, 0.80, 0.94),
                _row("Brokerage upgrade lifts NVIDIA price target after strong guidance", now - timedelta(hours=2), 0.69, 0.74, 0.61, 0.91, 0.84, 0.66, 0.87),
                _row("NVIDIA secures major cloud partnership expansion", now - timedelta(hours=6), 0.56, 0.61, 0.48, 0.87, 0.80, 0.54, 0.80),
            ],
        ),
        BacktestScenario(
            name="holdout_major_singleton_earnings",
            ticker="AAPL",
            next_day_return_pct=0.8,
            rows=[
                _row("Apple beats estimates and raises guidance for next quarter", now, 0.80, 0.86, 0.72, 0.95, 0.87, 0.77, 0.93),
            ],
        ),
        BacktestScenario(
            name="holdout_broad_bearish_repricing",
            ticker="NFLX",
            next_day_return_pct=-1.1,
            rows=[
                _row("Netflix misses estimates and cuts outlook as subscriber growth slows", now, -0.84, -0.89, -0.77, 0.96, 0.90, 0.81, 0.95),
                _row("Analyst downgrade hits Netflix after weak guidance", now - timedelta(hours=3), -0.66, -0.72, -0.58, 0.90, 0.83, 0.63, 0.86),
                _row("Probe and lawsuit add pressure to Netflix shares", now - timedelta(hours=7), -0.58, -0.64, -0.49, 0.88, 0.81, 0.55, 0.82),
            ],
        ),
        BacktestScenario(
            name="holdout_mixed_crosscurrents",
            ticker="AMZN",
            next_day_return_pct=0.05,
            rows=[
                _row("Amazon wins cloud contract but warns on margin pressure", now, 0.17, 0.22, 0.10, 0.82, 0.76, 0.19, 0.74),
                _row("Analyst downgrade trims Amazon target after recent rally", now - timedelta(hours=4), -0.19, -0.25, -0.11, 0.81, 0.73, 0.20, 0.73),
                _row("Investors stay cautious ahead of Amazon operating update", now - timedelta(hours=9), 0.00, 0.02, 0.00, 0.70, 0.66, 0.01, 0.61),
            ],
        ),
        BacktestScenario(
            name="holdout_thin_generic_positive",
            ticker="META",
            next_day_return_pct=0.18,
            rows=[
                _row("Meta launches new consumer feature across more markets", now, 0.33, 0.37, 0.27, 0.82, 0.78, 0.29, 0.67),
            ],
        ),
        BacktestScenario(
            name="holdout_thin_generic_negative",
            ticker="DIS",
            next_day_return_pct=-0.14,
            rows=[
                _row("Disney faces production delay at key studio release", now, -0.31, -0.35, -0.26, 0.84, 0.77, 0.27, 0.66),
            ],
        ),
        BacktestScenario(
            name="holdout_stale_positive_without_followthrough",
            ticker="CRM",
            next_day_return_pct=0.12,
            rows=[
                _row("Salesforce announces partnership expansion and upbeat commentary", now - timedelta(days=4), 0.61, 0.67, 0.52, 0.89, 0.81, 0.58, 0.83),
                _row("Traders await Salesforce update as visibility remains mixed", now, 0.01, 0.03, 0.00, 0.71, 0.67, 0.02, 0.60),
            ],
        ),
        BacktestScenario(
            name="holdout_mild_positive_lean",
            ticker="ORCL",
            next_day_return_pct=0.42,
            rows=[
                _row("Oracle partnership expands enterprise demand pipeline", now, 0.39, 0.44, 0.31, 0.85, 0.79, 0.35, 0.73),
                _row("Analyst note turns constructive on Oracle cloud growth", now - timedelta(hours=5), 0.28, 0.33, 0.20, 0.81, 0.74, 0.25, 0.70),
            ],
        ),
        BacktestScenario(
            name="holdout_mild_negative_lean",
            ticker="INTC",
            next_day_return_pct=-0.46,
            rows=[
                _row("Intel downgrade reflects weaker PC demand expectations", now, -0.41, -0.46, -0.34, 0.86, 0.80, 0.37, 0.75),
                _row("Intel delay raises execution concerns for next launch", now - timedelta(hours=4), -0.29, -0.34, -0.22, 0.82, 0.76, 0.26, 0.71),
            ],
        ),
    ]

    return BacktestSuite(tuning=tuning, holdout=holdout)


def expected_direction(next_day_return_pct: float, neutral_band_pct: float = 0.35) -> str:
    if next_day_return_pct >= neutral_band_pct:
        return "UP"
    if next_day_return_pct <= -neutral_band_pct:
        return "DOWN"
    return "MIXED"


def _target_score(next_day_return_pct: float) -> int:
    normalized = max(-1.0, min(1.0, next_day_return_pct / 2.0))
    return int(round((normalized + 1.0) * 50.0))


def run_backtest(
    scenarios: list[BacktestScenario],
    engine: AnalyticsEngine | None = None,
    neutral_band_pct: float = 0.35,
) -> dict:
    sentiment_engine = engine or AnalyticsEngine()
    results = []

    for scenario in scenarios:
        summary = sentiment_engine.get_summary(pd.DataFrame(scenario.rows))
        actual_call = expected_direction(scenario.next_day_return_pct, neutral_band_pct=neutral_band_pct)
        predicted_call = summary["direction_call"]
        results.append(
            {
                "scenario": scenario.name,
                "ticker": scenario.ticker,
                "next_day_return_pct": float(scenario.next_day_return_pct),
                "actual_call": actual_call,
                "predicted_call": predicted_call,
                "direction_score": int(summary["direction_score"]),
                "direction_confidence": int(summary["direction_confidence"]),
                "score_error": abs(int(summary["direction_score"]) - _target_score(scenario.next_day_return_pct)),
                "is_correct": predicted_call == actual_call,
                "overcalled": actual_call == "MIXED" and predicted_call in {"UP", "DOWN"},
                "undercalled": actual_call in {"UP", "DOWN"} and predicted_call == "MIXED",
            }
        )

    result_frame = pd.DataFrame(results)
    decisive_mask = result_frame["predicted_call"].isin(["UP", "DOWN"])
    actual_decisive_mask = result_frame["actual_call"].isin(["UP", "DOWN"])
    mixed_actual_mask = result_frame["actual_call"].eq("MIXED")

    decisive_precision = float(result_frame.loc[decisive_mask, "is_correct"].mean()) if decisive_mask.any() else 0.0
    decisive_recall = float(
        result_frame.loc[actual_decisive_mask, "predicted_call"].eq(result_frame.loc[actual_decisive_mask, "actual_call"]).mean()
    ) if actual_decisive_mask.any() else 0.0
    mixed_accuracy = float(
        result_frame.loc[mixed_actual_mask, "predicted_call"].eq("MIXED").mean()
    ) if mixed_actual_mask.any() else 0.0

    return {
        "results": result_frame.to_dict(orient="records"),
        "metrics": {
            "scenario_count": int(len(result_frame)),
            "overall_accuracy": float(result_frame["is_correct"].mean()) if not result_frame.empty else 0.0,
            "decisive_precision": decisive_precision,
            "decisive_recall": decisive_recall,
            "mixed_accuracy": mixed_accuracy,
            "coverage": float(decisive_mask.mean()) if not result_frame.empty else 0.0,
            "overcall_rate": float(result_frame["overcalled"].mean()) if not result_frame.empty else 0.0,
            "undercall_rate": float(result_frame["undercalled"].mean()) if not result_frame.empty else 0.0,
            "mean_score_error": float(result_frame["score_error"].mean()) if not result_frame.empty else 0.0,
        },
    }