Spaces:

Jitendra12421
/

finbert_anaylzer

Sleeping

App Files Files Community

finbert_anaylzer / engine /backtest.py

Jitendra12421

Upload 12 files

16ae9d0 verified about 2 months ago

raw

history blame contribute delete

11.1 kB

	from __future__ import annotations

	from dataclasses import dataclass
	from datetime import datetime, timedelta, timezone

	import pandas as pd

	from .analytics import AnalyticsEngine


	@dataclass(frozen=True)
	class BacktestScenario:
	name: str
	rows: list[dict]
	next_day_return_pct: float
	ticker: str = "TEST"


	@dataclass(frozen=True)
	class BacktestSuite:
	tuning: list[BacktestScenario]
	holdout: list[BacktestScenario]


	def _row(
	title: str,
	timestamp: datetime,
	ensemble_pol: float,
	finbert_pol: float,
	roberta_pol: float,
	finbert_score: float,
	roberta_score: float,
	conviction: float,
	significance: float,
	) -> dict:
	return {
	"title": title,
	"timestamp": timestamp.isoformat(),
	"ensemble_pol": ensemble_pol,
	"finbert_pol": finbert_pol,
	"roberta_pol": roberta_pol,
	"finbert_score": finbert_score,
	"roberta_score": roberta_score,
	"agreement": 1.0,
	"conviction": conviction,
	"significance": significance,
	}


	def build_benchmark_suite(reference_time: datetime \| None = None) -> BacktestSuite:
	now = reference_time or datetime.now(timezone.utc)

	tuning = [
	BacktestScenario(
	name="tuning_fresh_bullish_consensus",
	ticker="TSLA",
	next_day_return_pct=1.2,
	rows=[
	_row("TSLA beats estimates and raises guidance after record deliveries", now, 0.86, 0.91, 0.79, 0.97, 0.90, 0.82, 0.95),
	_row("Analyst upgrades TSLA and raises price target", now - timedelta(hours=3), 0.72, 0.80, 0.63, 0.92, 0.84, 0.71, 0.88),
	_row("TSLA wins major battery contract in growth push", now - timedelta(hours=8), 0.61, 0.68, 0.53, 0.88, 0.80, 0.61, 0.82),
	],
	),
	BacktestScenario(
	name="tuning_fresh_bearish_consensus",
	ticker="TSLA",
	next_day_return_pct=-1.4,
	rows=[
	_row("TSLA cuts guidance as revenue falls below estimates", now, -0.88, -0.93, -0.81, 0.97, 0.90, 0.86, 0.96),
	_row("SEC investigation and lawsuit deepen pressure on TSLA stock", now - timedelta(hours=2), -0.78, -0.85, -0.68, 0.94, 0.86, 0.76, 0.91),
	_row("Analyst downgrade sends TSLA lower on demand fears", now - timedelta(hours=5), -0.67, -0.72, -0.59, 0.89, 0.82, 0.64, 0.84),
	],
	),
	BacktestScenario(
	name="tuning_conflicted_flow",
	ticker="TSLA",
	next_day_return_pct=0.1,
	rows=[
	_row("TSLA beats estimates but warns on margin headwinds", now, 0.18, 0.24, 0.10, 0.81, 0.76, 0.20, 0.72),
	_row("Analyst downgrade offsets recent TSLA rally", now - timedelta(hours=4), -0.22, -0.28, -0.14, 0.82, 0.74, 0.22, 0.75),
	_row("Investors await TSLA delivery update as outlook remains uncertain", now - timedelta(hours=9), 0.02, 0.04, 0.00, 0.70, 0.66, 0.04, 0.63),
	],
	),
	BacktestScenario(
	name="tuning_stale_positive_signal",
	ticker="TSLA",
	next_day_return_pct=0.1,
	rows=[
	_row("TSLA beats estimates and raises guidance", now - timedelta(days=5), 0.82, 0.90, 0.74, 0.95, 0.87, 0.79, 0.92),
	_row("Investors await TSLA update as outlook remains uncertain", now, 0.01, 0.03, 0.00, 0.71, 0.67, 0.02, 0.60),
	],
	),
	]

	holdout = [
	BacktestScenario(
	name="holdout_broad_bullish_repricing",
	ticker="NVDA",
	next_day_return_pct=1.1,
	rows=[
	_row("NVIDIA tops estimates and raises outlook on AI demand", now, 0.84, 0.89, 0.77, 0.96, 0.89, 0.80, 0.94),
	_row("Brokerage upgrade lifts NVIDIA price target after strong guidance", now - timedelta(hours=2), 0.69, 0.74, 0.61, 0.91, 0.84, 0.66, 0.87),
	_row("NVIDIA secures major cloud partnership expansion", now - timedelta(hours=6), 0.56, 0.61, 0.48, 0.87, 0.80, 0.54, 0.80),
	],
	),
	BacktestScenario(
	name="holdout_major_singleton_earnings",
	ticker="AAPL",
	next_day_return_pct=0.8,
	rows=[
	_row("Apple beats estimates and raises guidance for next quarter", now, 0.80, 0.86, 0.72, 0.95, 0.87, 0.77, 0.93),
	],
	),
	BacktestScenario(
	name="holdout_broad_bearish_repricing",
	ticker="NFLX",
	next_day_return_pct=-1.1,
	rows=[
	_row("Netflix misses estimates and cuts outlook as subscriber growth slows", now, -0.84, -0.89, -0.77, 0.96, 0.90, 0.81, 0.95),
	_row("Analyst downgrade hits Netflix after weak guidance", now - timedelta(hours=3), -0.66, -0.72, -0.58, 0.90, 0.83, 0.63, 0.86),
	_row("Probe and lawsuit add pressure to Netflix shares", now - timedelta(hours=7), -0.58, -0.64, -0.49, 0.88, 0.81, 0.55, 0.82),
	],
	),
	BacktestScenario(
	name="holdout_mixed_crosscurrents",
	ticker="AMZN",
	next_day_return_pct=0.05,
	rows=[
	_row("Amazon wins cloud contract but warns on margin pressure", now, 0.17, 0.22, 0.10, 0.82, 0.76, 0.19, 0.74),
	_row("Analyst downgrade trims Amazon target after recent rally", now - timedelta(hours=4), -0.19, -0.25, -0.11, 0.81, 0.73, 0.20, 0.73),
	_row("Investors stay cautious ahead of Amazon operating update", now - timedelta(hours=9), 0.00, 0.02, 0.00, 0.70, 0.66, 0.01, 0.61),
	],
	),
	BacktestScenario(
	name="holdout_thin_generic_positive",
	ticker="META",
	next_day_return_pct=0.18,
	rows=[
	_row("Meta launches new consumer feature across more markets", now, 0.33, 0.37, 0.27, 0.82, 0.78, 0.29, 0.67),
	],
	),
	BacktestScenario(
	name="holdout_thin_generic_negative",
	ticker="DIS",
	next_day_return_pct=-0.14,
	rows=[
	_row("Disney faces production delay at key studio release", now, -0.31, -0.35, -0.26, 0.84, 0.77, 0.27, 0.66),
	],
	),
	BacktestScenario(
	name="holdout_stale_positive_without_followthrough",
	ticker="CRM",
	next_day_return_pct=0.12,
	rows=[
	_row("Salesforce announces partnership expansion and upbeat commentary", now - timedelta(days=4), 0.61, 0.67, 0.52, 0.89, 0.81, 0.58, 0.83),
	_row("Traders await Salesforce update as visibility remains mixed", now, 0.01, 0.03, 0.00, 0.71, 0.67, 0.02, 0.60),
	],
	),
	BacktestScenario(
	name="holdout_mild_positive_lean",
	ticker="ORCL",
	next_day_return_pct=0.42,
	rows=[
	_row("Oracle partnership expands enterprise demand pipeline", now, 0.39, 0.44, 0.31, 0.85, 0.79, 0.35, 0.73),
	_row("Analyst note turns constructive on Oracle cloud growth", now - timedelta(hours=5), 0.28, 0.33, 0.20, 0.81, 0.74, 0.25, 0.70),
	],
	),
	BacktestScenario(
	name="holdout_mild_negative_lean",
	ticker="INTC",
	next_day_return_pct=-0.46,
	rows=[
	_row("Intel downgrade reflects weaker PC demand expectations", now, -0.41, -0.46, -0.34, 0.86, 0.80, 0.37, 0.75),
	_row("Intel delay raises execution concerns for next launch", now - timedelta(hours=4), -0.29, -0.34, -0.22, 0.82, 0.76, 0.26, 0.71),
	],
	),
	]

	return BacktestSuite(tuning=tuning, holdout=holdout)


	def expected_direction(next_day_return_pct: float, neutral_band_pct: float = 0.35) -> str:
	if next_day_return_pct >= neutral_band_pct:
	return "UP"
	if next_day_return_pct <= -neutral_band_pct:
	return "DOWN"
	return "MIXED"


	def _target_score(next_day_return_pct: float) -> int:
	normalized = max(-1.0, min(1.0, next_day_return_pct / 2.0))
	return int(round((normalized + 1.0) * 50.0))


	def run_backtest(
	scenarios: list[BacktestScenario],
	engine: AnalyticsEngine \| None = None,
	neutral_band_pct: float = 0.35,
	) -> dict:
	sentiment_engine = engine or AnalyticsEngine()
	results = []

	for scenario in scenarios:
	summary = sentiment_engine.get_summary(pd.DataFrame(scenario.rows))
	actual_call = expected_direction(scenario.next_day_return_pct, neutral_band_pct=neutral_band_pct)
	predicted_call = summary["direction_call"]
	results.append(
	{
	"scenario": scenario.name,
	"ticker": scenario.ticker,
	"next_day_return_pct": float(scenario.next_day_return_pct),
	"actual_call": actual_call,
	"predicted_call": predicted_call,
	"direction_score": int(summary["direction_score"]),
	"direction_confidence": int(summary["direction_confidence"]),
	"score_error": abs(int(summary["direction_score"]) - _target_score(scenario.next_day_return_pct)),
	"is_correct": predicted_call == actual_call,
	"overcalled": actual_call == "MIXED" and predicted_call in {"UP", "DOWN"},
	"undercalled": actual_call in {"UP", "DOWN"} and predicted_call == "MIXED",
	}
	)

	result_frame = pd.DataFrame(results)
	decisive_mask = result_frame["predicted_call"].isin(["UP", "DOWN"])
	actual_decisive_mask = result_frame["actual_call"].isin(["UP", "DOWN"])
	mixed_actual_mask = result_frame["actual_call"].eq("MIXED")

	decisive_precision = float(result_frame.loc[decisive_mask, "is_correct"].mean()) if decisive_mask.any() else 0.0
	decisive_recall = float(
	result_frame.loc[actual_decisive_mask, "predicted_call"].eq(result_frame.loc[actual_decisive_mask, "actual_call"]).mean()
	) if actual_decisive_mask.any() else 0.0
	mixed_accuracy = float(
	result_frame.loc[mixed_actual_mask, "predicted_call"].eq("MIXED").mean()
	) if mixed_actual_mask.any() else 0.0

	return {
	"results": result_frame.to_dict(orient="records"),
	"metrics": {
	"scenario_count": int(len(result_frame)),
	"overall_accuracy": float(result_frame["is_correct"].mean()) if not result_frame.empty else 0.0,
	"decisive_precision": decisive_precision,
	"decisive_recall": decisive_recall,
	"mixed_accuracy": mixed_accuracy,
	"coverage": float(decisive_mask.mean()) if not result_frame.empty else 0.0,
	"overcall_rate": float(result_frame["overcalled"].mean()) if not result_frame.empty else 0.0,
	"undercall_rate": float(result_frame["undercalled"].mean()) if not result_frame.empty else 0.0,
	"mean_score_error": float(result_frame["score_error"].mean()) if not result_frame.empty else 0.0,
	},
	}