Spaces:

nananie143
/

footypredict-pro

Runtime error

NetBoss

V3.0 Ultimate Enhancement - Complete production system

6f7e932 about 1 month ago

9.17 kB

	"""
	Backtesting System

	Test model accuracy on historical data to validate predictions.
	Features:
	- Walk-forward validation
	- Multiple time periods
	- Profit/loss simulation
	- Accuracy by league, team, outcome
	"""

	import json
	import logging
	from datetime import datetime, timedelta
	from pathlib import Path
	from typing import Dict, List, Optional
	import numpy as np
	import pandas as pd

	logger = logging.getLogger(__name__)

	BASE_DIR = Path(__file__).parent.parent.parent
	DATA_DIR = BASE_DIR / "data"
	RESULTS_DIR = DATA_DIR / "backtest_results"
	RESULTS_DIR.mkdir(parents=True, exist_ok=True)


	class Backtester:
	"""Test predictions against historical data"""

	def __init__(self):
	self.data = None
	self.results = []

	def load_data(self) -> pd.DataFrame:
	"""Load historical match data"""
	if self.data is not None:
	return self.data

	# Try local cache
	cache_file = DATA_DIR / "training_data.csv"
	if cache_file.exists():
	self.data = pd.read_csv(cache_file)
	self.data['date'] = pd.to_datetime(self.data['date'])
	return self.data

	# Download
	try:
	url = 'https://raw.githubusercontent.com/martj42/international_results/master/results.csv'
	self.data = pd.read_csv(url)
	self.data['date'] = pd.to_datetime(self.data['date'])
	return self.data
	except:
	return pd.DataFrame()

	def calculate_elo(self, df: pd.DataFrame) -> Dict[str, float]:
	"""Calculate Elo ratings up to a point in time"""
	elo = {}
	K = 32

	df = df.sort_values('date')

	for _, row in df.iterrows():
	home, away = row['home_team'], row['away_team']
	h_elo = elo.get(home, 1500)
	a_elo = elo.get(away, 1500)

	exp_h = 1 / (1 + 10**((a_elo - h_elo) / 400))

	if row['home_score'] > row['away_score']:
	s_h, s_a = 1, 0
	elif row['home_score'] < row['away_score']:
	s_h, s_a = 0, 1
	else:
	s_h, s_a = 0.5, 0.5

	elo[home] = h_elo + K * (s_h - exp_h)
	elo[away] = a_elo + K * (s_a - (1 - exp_h))

	return elo

	def predict_match(self, home_elo: float, away_elo: float, home_advantage: float = 100) -> Dict:
	"""Simple Elo-based prediction"""
	h_elo = home_elo + home_advantage

	# Expected score
	exp_h = 1 / (1 + 10**((away_elo - h_elo) / 400))

	# Convert to 3-way probabilities (rough approximation)
	draw_prob = 0.25 - 0.1 * abs(exp_h - 0.5) # More likely draw when teams are even
	home_prob = exp_h * (1 - draw_prob)
	away_prob = (1 - exp_h) * (1 - draw_prob)

	# Normalize
	total = home_prob + draw_prob + away_prob
	home_prob /= total
	draw_prob /= total
	away_prob /= total

	if home_prob > draw_prob and home_prob > away_prob:
	pred = 'H'
	elif away_prob > draw_prob:
	pred = 'A'
	else:
	pred = 'D'

	return {
	'home_prob': home_prob,
	'draw_prob': draw_prob,
	'away_prob': away_prob,
	'prediction': pred,
	'confidence': max(home_prob, draw_prob, away_prob)
	}

	def run_backtest(self,
	start_year: int = 2020,
	end_year: int = 2024,
	min_confidence: float = 0.5) -> Dict:
	"""Run backtest over a period"""
	df = self.load_data()
	if df.empty:
	return {'error': 'No data available'}

	# Filter date range
	df = df[(df['date'].dt.year >= start_year) & (df['date'].dt.year <= end_year)].copy()
	df = df.sort_values('date')

	if len(df) < 100:
	return {'error': 'Not enough data for backtest'}

	# Split: use first 70% to build Elo, test on last 30%
	split_idx = int(len(df) * 0.7)
	train_df = df.iloc[:split_idx]
	test_df = df.iloc[split_idx:]

	# Build Elo from training data
	elo = self.calculate_elo(train_df)

	# Test predictions
	results = {
	'total': 0,
	'correct': 0,
	'by_outcome': {'H': {'total': 0, 'correct': 0},
	'D': {'total': 0, 'correct': 0},
	'A': {'total': 0, 'correct': 0}},
	'by_confidence': {
	'high': {'total': 0, 'correct': 0}, # > 0.6
	'medium': {'total': 0, 'correct': 0}, # 0.5-0.6
	'low': {'total': 0, 'correct': 0} # < 0.5
	},
	'profit_loss': 0, # Assuming $10 flat bets at 1.9 odds
	'predictions': []
	}

	for _, row in test_df.iterrows():
	home, away = row['home_team'], row['away_team']
	h_elo = elo.get(home, 1500)
	a_elo = elo.get(away, 1500)

	pred = self.predict_match(h_elo, a_elo)

	if pred['confidence'] < min_confidence:
	continue

	# Actual result
	if row['home_score'] > row['away_score']:
	actual = 'H'
	elif row['home_score'] < row['away_score']:
	actual = 'A'
	else:
	actual = 'D'

	correct = pred['prediction'] == actual

	results['total'] += 1
	if correct:
	results['correct'] += 1
	results['profit_loss'] += 9 # Win $9 on $10 at 1.9 odds
	else:
	results['profit_loss'] -= 10 # Lose $10

	results['by_outcome'][pred['prediction']]['total'] += 1
	if correct:
	results['by_outcome'][pred['prediction']]['correct'] += 1

	# Confidence bucket
	if pred['confidence'] > 0.6:
	bucket = 'high'
	elif pred['confidence'] > 0.5:
	bucket = 'medium'
	else:
	bucket = 'low'

	results['by_confidence'][bucket]['total'] += 1
	if correct:
	results['by_confidence'][bucket]['correct'] += 1

	results['predictions'].append({
	'date': str(row['date'].date()),
	'match': f"{home} vs {away}",
	'predicted': pred['prediction'],
	'actual': actual,
	'correct': correct,
	'confidence': round(pred['confidence'], 3)
	})

	# Update Elo
	exp_h = 1 / (1 + 10**((a_elo - h_elo) / 400))
	if actual == 'H': s_h, s_a = 1, 0
	elif actual == 'A': s_h, s_a = 0, 1
	else: s_h, s_a = 0.5, 0.5
	elo[home] = h_elo + 32 * (s_h - exp_h)
	elo[away] = a_elo + 32 * (s_a - (1 - exp_h))

	# Calculate summary stats
	results['accuracy'] = results['correct'] / results['total'] if results['total'] > 0 else 0
	results['roi'] = results['profit_loss'] / (results['total'] * 10) if results['total'] > 0 else 0

	for outcome in results['by_outcome'].values():
	outcome['accuracy'] = outcome['correct'] / outcome['total'] if outcome['total'] > 0 else 0

	for conf in results['by_confidence'].values():
	conf['accuracy'] = conf['correct'] / conf['total'] if conf['total'] > 0 else 0

	results['period'] = f"{start_year}-{end_year}"
	results['test_matches'] = len(test_df)
	results['predictions'] = results['predictions'][-50:] # Last 50 only

	# Save results
	timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
	with open(RESULTS_DIR / f'backtest_{timestamp}.json', 'w') as f:
	json.dump(results, f, indent=2)

	return results

	def get_summary(self) -> Dict:
	"""Get summary of all backtests"""
	results = []
	for f in RESULTS_DIR.glob('backtest_*.json'):
	with open(f, 'r') as file:
	data = json.load(file)
	results.append({
	'file': f.name,
	'period': data.get('period'),
	'accuracy': data.get('accuracy'),
	'roi': data.get('roi'),
	'total_predictions': data.get('total')
	})
	return {'backtests': results}


	# Global instance
	_backtester: Optional[Backtester] = None

	def get_backtester() -> Backtester:
	global _backtester
	if _backtester is None:
	_backtester = Backtester()
	return _backtester

	def run_backtest(start_year: int = 2020, end_year: int = 2024, min_confidence: float = 0.5):
	return get_backtester().run_backtest(start_year, end_year, min_confidence)

	def get_backtest_summary():
	return get_backtester().get_summary()