Spaces:

nananie143
/

footypredict-pro

Runtime error

App Files Files Community

footypredict-pro / src /evaluation /metrics.py

nananie143

feat: Complete blueprint implementation with 66+ modules

90bacf7 verified 3 months ago

raw

history blame contribute delete

7.43 kB

	"""
	Evaluation Metrics Module
	Calculates prediction and betting performance metrics.

	Part of the complete blueprint implementation.
	"""

	import numpy as np
	import pandas as pd
	from typing import Dict, List, Optional
	import logging
	from sklearn.metrics import (
	accuracy_score, precision_score, recall_score, f1_score,
	log_loss, brier_score_loss, roc_auc_score
	)

	logger = logging.getLogger(__name__)


	class EvaluationMetrics:
	"""
	Calculates comprehensive prediction metrics.

	Categories:
	- Classification metrics
	- Probabilistic metrics
	- Betting metrics
	"""

	def __init__(self):
	pass

	def classification_metrics(
	self,
	y_true: np.ndarray,
	y_pred: np.ndarray,
	labels: List[str] = None
	) -> Dict:
	"""Calculate classification metrics."""
	labels = labels or ['H', 'D', 'A']

	results = {
	'accuracy': round(accuracy_score(y_true, y_pred), 4),
	'precision': round(precision_score(y_true, y_pred, average='weighted', zero_division=0), 4),
	'recall': round(recall_score(y_true, y_pred, average='weighted', zero_division=0), 4),
	'f1': round(f1_score(y_true, y_pred, average='weighted', zero_division=0), 4)
	}

	# Per-class metrics
	for i, label in enumerate(labels):
	y_true_binary = (y_true == i).astype(int) if isinstance(y_true[0], (int, np.integer)) else (y_true == label).astype(int)
	y_pred_binary = (y_pred == i).astype(int) if isinstance(y_pred[0], (int, np.integer)) else (y_pred == label).astype(int)

	results[f'{label}_precision'] = round(precision_score(y_true_binary, y_pred_binary, zero_division=0), 4)
	results[f'{label}_recall'] = round(recall_score(y_true_binary, y_pred_binary, zero_division=0), 4)

	return results

	def probabilistic_metrics(
	self,
	y_true: np.ndarray,
	y_proba: np.ndarray
	) -> Dict:
	"""Calculate probabilistic calibration metrics."""
	# Brier score (lower is better)
	n_classes = y_proba.shape[1] if len(y_proba.shape) > 1 else 1

	# One-hot encode true labels
	if len(y_true.shape) == 1:
	y_true_onehot = np.zeros((len(y_true), n_classes))
	for i, label in enumerate(y_true):
	if isinstance(label, (int, np.integer)) and label < n_classes:
	y_true_onehot[i, label] = 1
	else:
	y_true_onehot = y_true

	brier = np.mean((y_proba - y_true_onehot) ** 2)

	# Log loss
	try:
	ll = log_loss(y_true, y_proba)
	except Exception:
	ll = None

	# AUC (one-vs-rest)
	try:
	auc = roc_auc_score(y_true_onehot, y_proba, multi_class='ovr', average='weighted')
	except Exception:
	auc = None

	return {
	'brier_score': round(brier, 4),
	'log_loss': round(ll, 4) if ll else None,
	'auc_roc': round(auc, 4) if auc else None
	}

	def betting_metrics(
	self,
	predictions: List[Dict],
	outcomes: List[Dict],
	initial_bankroll: float = 1000
	) -> Dict:
	"""Calculate betting performance metrics."""
	if not predictions or not outcomes:
	return {}

	# Match predictions to outcomes
	results = []
	for pred, outcome in zip(predictions, outcomes):
	if pred.get('bet_placed', True):
	stake = pred.get('stake', pred.get('unit_stake', 10))
	odds = pred.get('odds', 2.0)
	won = pred.get('predicted_outcome') == outcome.get('actual_outcome')

	profit = stake * (odds - 1) if won else -stake
	results.append({
	'stake': stake,
	'odds': odds,
	'won': won,
	'profit': profit
	})

	if not results:
	return {}

	total_staked = sum(r['stake'] for r in results)
	total_profit = sum(r['profit'] for r in results)
	wins = sum(1 for r in results if r['won'])

	# Yield = profit / stake
	yield_pct = total_profit / total_staked * 100 if total_staked > 0 else 0

	# Max drawdown
	cumulative = [0]
	for r in results:
	cumulative.append(cumulative[-1] + r['profit'])

	peak = cumulative[0]
	max_dd = 0
	for val in cumulative:
	if val > peak:
	peak = val
	dd = peak - val
	if dd > max_dd:
	max_dd = dd

	# Sharpe-like ratio
	returns = [r['profit'] / r['stake'] for r in results]
	sharpe = np.mean(returns) / np.std(returns) * np.sqrt(len(results)) if np.std(returns) > 0 else 0

	return {
	'total_bets': len(results),
	'wins': wins,
	'losses': len(results) - wins,
	'win_rate': round(wins / len(results) * 100, 2),
	'total_staked': round(total_staked, 2),
	'total_profit': round(total_profit, 2),
	'yield': round(yield_pct, 2),
	'roi': round((total_profit / initial_bankroll) * 100, 2),
	'max_drawdown': round(max_dd, 2),
	'sharpe_ratio': round(sharpe, 4),
	'average_odds': round(np.mean([r['odds'] for r in results]), 2)
	}

	def calibration_analysis(
	self,
	y_true: np.ndarray,
	y_proba: np.ndarray,
	n_bins: int = 10
	) -> Dict:
	"""Analyze probability calibration."""
	# Convert to binary for each class
	if len(y_proba.shape) == 1:
	y_proba = y_proba.reshape(-1, 1)

	n_classes = y_proba.shape[1]
	calibration = []

	for c in range(n_classes):
	class_proba = y_proba[:, c]
	class_true = (y_true == c).astype(int)

	# Bin predictions
	bins = np.linspace(0, 1, n_bins + 1)
	bin_means = []
	bin_true_frequencies = []

	for i in range(n_bins):
	mask = (class_proba >= bins[i]) & (class_proba < bins[i+1])
	if mask.sum() > 0:
	bin_means.append(class_proba[mask].mean())
	bin_true_frequencies.append(class_true[mask].mean())

	if bin_means:
	calibration_error = np.mean(np.abs(np.array(bin_means) - np.array(bin_true_frequencies)))
	else:
	calibration_error = 0

	calibration.append({
	'class': c,
	'calibration_error': round(calibration_error, 4),
	'bin_means': [round(x, 4) for x in bin_means],
	'bin_frequencies': [round(x, 4) for x in bin_true_frequencies]
	})

	return {
	'by_class': calibration,
	'average_calibration_error': round(np.mean([c['calibration_error'] for c in calibration]), 4)
	}


	_metrics: Optional[EvaluationMetrics] = None

	def get_metrics() -> EvaluationMetrics:
	global _metrics
	if _metrics is None:
	_metrics = EvaluationMetrics()
	return _metrics