nananie143's picture
feat: Complete blueprint implementation with 66+ modules
90bacf7 verified
"""
Team Features Module
Generates team-level features for predictions.
Part of the complete blueprint implementation.
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Optional
import logging
logger = logging.getLogger(__name__)
class TeamFeatureGenerator:
"""
Generates team-level features from historical match data.
Features include:
- Rolling averages (goals, shots, possession)
- Attack/defense ratings
- Home/away splits
- Form indicators
"""
ROLLING_WINDOWS = [3, 5, 10, 20]
def __init__(self, matches_df: pd.DataFrame = None):
self.matches = matches_df
self.team_stats = {}
def set_matches(self, matches_df: pd.DataFrame):
"""Set match data for feature generation."""
self.matches = matches_df.copy()
if 'match_date' in self.matches.columns:
self.matches = self.matches.sort_values('match_date')
self._compute_team_stats()
def _compute_team_stats(self):
"""Compute rolling statistics for all teams."""
if self.matches is None:
return
teams = set(self.matches['home_team'].unique()) | set(self.matches['away_team'].unique())
for team in teams:
self.team_stats[team] = self._compute_single_team_stats(team)
def _compute_single_team_stats(self, team: str) -> Dict:
"""Compute statistics for a single team."""
# Get all matches for team
home_matches = self.matches[self.matches['home_team'] == team].copy()
away_matches = self.matches[self.matches['away_team'] == team].copy()
# Standardize columns for combining
home_matches['is_home'] = True
home_matches['team_goals'] = home_matches['home_goals']
home_matches['opp_goals'] = home_matches['away_goals']
away_matches['is_home'] = False
away_matches['team_goals'] = away_matches['away_goals']
away_matches['opp_goals'] = away_matches['home_goals']
all_matches = pd.concat([home_matches, away_matches]).sort_values('match_date')
if len(all_matches) == 0:
return {}
stats = {
'matches_played': len(all_matches),
'home_matches': len(home_matches),
'away_matches': len(away_matches),
}
# Calculate rolling averages
for window in self.ROLLING_WINDOWS:
if len(all_matches) >= window:
recent = all_matches.tail(window)
stats[f'goals_scored_avg_{window}'] = recent['team_goals'].mean()
stats[f'goals_conceded_avg_{window}'] = recent['opp_goals'].mean()
stats[f'goals_diff_avg_{window}'] = (recent['team_goals'] - recent['opp_goals']).mean()
# Points
points = recent.apply(
lambda r: 3 if r['team_goals'] > r['opp_goals']
else (1 if r['team_goals'] == r['opp_goals'] else 0),
axis=1
)
stats[f'ppg_{window}'] = points.mean()
# Win/Draw/Loss rates
stats[f'win_rate_{window}'] = (recent['team_goals'] > recent['opp_goals']).mean()
stats[f'draw_rate_{window}'] = (recent['team_goals'] == recent['opp_goals']).mean()
stats[f'loss_rate_{window}'] = (recent['team_goals'] < recent['opp_goals']).mean()
# Clean sheets and BTTS
stats[f'clean_sheet_rate_{window}'] = (recent['opp_goals'] == 0).mean()
stats[f'failed_to_score_rate_{window}'] = (recent['team_goals'] == 0).mean()
stats[f'btts_rate_{window}'] = ((recent['team_goals'] > 0) & (recent['opp_goals'] > 0)).mean()
# Over/Under
total_goals = recent['team_goals'] + recent['opp_goals']
stats[f'over_2.5_rate_{window}'] = (total_goals > 2.5).mean()
stats[f'over_1.5_rate_{window}'] = (total_goals > 1.5).mean()
# Home/Away splits
if len(home_matches) > 0:
stats['home_goals_avg'] = home_matches['home_goals'].mean()
stats['home_conceded_avg'] = home_matches['away_goals'].mean()
stats['home_win_rate'] = (home_matches['home_goals'] > home_matches['away_goals']).mean()
if len(away_matches) > 0:
stats['away_goals_avg'] = away_matches['away_goals'].mean()
stats['away_conceded_avg'] = away_matches['home_goals'].mean()
stats['away_win_rate'] = (away_matches['away_goals'] > away_matches['home_goals']).mean()
return stats
def get_team_features(self, team: str) -> Dict:
"""Get features for a specific team."""
return self.team_stats.get(team, {})
def get_match_features(
self,
home_team: str,
away_team: str
) -> Dict:
"""Get combined features for a match."""
home_stats = self.get_team_features(home_team)
away_stats = self.get_team_features(away_team)
features = {}
# Add home team features with prefix
for key, value in home_stats.items():
features[f'home_{key}'] = value
# Add away team features with prefix
for key, value in away_stats.items():
features[f'away_{key}'] = value
# Add difference features
for window in self.ROLLING_WINDOWS:
if f'goals_scored_avg_{window}' in home_stats and f'goals_scored_avg_{window}' in away_stats:
features[f'attack_diff_{window}'] = (
home_stats[f'goals_scored_avg_{window}'] -
away_stats[f'goals_conceded_avg_{window}']
)
features[f'defense_diff_{window}'] = (
away_stats[f'goals_scored_avg_{window}'] -
home_stats[f'goals_conceded_avg_{window}']
)
features[f'ppg_diff_{window}'] = (
home_stats.get(f'ppg_{window}', 0) -
away_stats.get(f'ppg_{window}', 0)
)
return features
def generate_all_features(self) -> pd.DataFrame:
"""Generate features for all matches."""
if self.matches is None:
return pd.DataFrame()
features_list = []
for _, row in self.matches.iterrows():
match_features = self.get_match_features(row['home_team'], row['away_team'])
match_features['match_id'] = row.get('match_id', f"{row['home_team']}_{row['away_team']}")
features_list.append(match_features)
return pd.DataFrame(features_list)
# Global instance
_generator: Optional[TeamFeatureGenerator] = None
def get_generator(matches_df: pd.DataFrame = None) -> TeamFeatureGenerator:
"""Get or create team feature generator."""
global _generator
if _generator is None:
_generator = TeamFeatureGenerator()
if matches_df is not None:
_generator.set_matches(matches_df)
return _generator
def generate_team_features(
home_team: str,
away_team: str,
matches_df: pd.DataFrame
) -> Dict:
"""Quick function to generate match features."""
generator = get_generator(matches_df)
return generator.get_match_features(home_team, away_team)