Spaces:
Runtime error
Runtime error
| """ | |
| Team Features Module | |
| Generates team-level features for predictions. | |
| Part of the complete blueprint implementation. | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from typing import Dict, List, Optional | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| class TeamFeatureGenerator: | |
| """ | |
| Generates team-level features from historical match data. | |
| Features include: | |
| - Rolling averages (goals, shots, possession) | |
| - Attack/defense ratings | |
| - Home/away splits | |
| - Form indicators | |
| """ | |
| ROLLING_WINDOWS = [3, 5, 10, 20] | |
| def __init__(self, matches_df: pd.DataFrame = None): | |
| self.matches = matches_df | |
| self.team_stats = {} | |
| def set_matches(self, matches_df: pd.DataFrame): | |
| """Set match data for feature generation.""" | |
| self.matches = matches_df.copy() | |
| if 'match_date' in self.matches.columns: | |
| self.matches = self.matches.sort_values('match_date') | |
| self._compute_team_stats() | |
| def _compute_team_stats(self): | |
| """Compute rolling statistics for all teams.""" | |
| if self.matches is None: | |
| return | |
| teams = set(self.matches['home_team'].unique()) | set(self.matches['away_team'].unique()) | |
| for team in teams: | |
| self.team_stats[team] = self._compute_single_team_stats(team) | |
| def _compute_single_team_stats(self, team: str) -> Dict: | |
| """Compute statistics for a single team.""" | |
| # Get all matches for team | |
| home_matches = self.matches[self.matches['home_team'] == team].copy() | |
| away_matches = self.matches[self.matches['away_team'] == team].copy() | |
| # Standardize columns for combining | |
| home_matches['is_home'] = True | |
| home_matches['team_goals'] = home_matches['home_goals'] | |
| home_matches['opp_goals'] = home_matches['away_goals'] | |
| away_matches['is_home'] = False | |
| away_matches['team_goals'] = away_matches['away_goals'] | |
| away_matches['opp_goals'] = away_matches['home_goals'] | |
| all_matches = pd.concat([home_matches, away_matches]).sort_values('match_date') | |
| if len(all_matches) == 0: | |
| return {} | |
| stats = { | |
| 'matches_played': len(all_matches), | |
| 'home_matches': len(home_matches), | |
| 'away_matches': len(away_matches), | |
| } | |
| # Calculate rolling averages | |
| for window in self.ROLLING_WINDOWS: | |
| if len(all_matches) >= window: | |
| recent = all_matches.tail(window) | |
| stats[f'goals_scored_avg_{window}'] = recent['team_goals'].mean() | |
| stats[f'goals_conceded_avg_{window}'] = recent['opp_goals'].mean() | |
| stats[f'goals_diff_avg_{window}'] = (recent['team_goals'] - recent['opp_goals']).mean() | |
| # Points | |
| points = recent.apply( | |
| lambda r: 3 if r['team_goals'] > r['opp_goals'] | |
| else (1 if r['team_goals'] == r['opp_goals'] else 0), | |
| axis=1 | |
| ) | |
| stats[f'ppg_{window}'] = points.mean() | |
| # Win/Draw/Loss rates | |
| stats[f'win_rate_{window}'] = (recent['team_goals'] > recent['opp_goals']).mean() | |
| stats[f'draw_rate_{window}'] = (recent['team_goals'] == recent['opp_goals']).mean() | |
| stats[f'loss_rate_{window}'] = (recent['team_goals'] < recent['opp_goals']).mean() | |
| # Clean sheets and BTTS | |
| stats[f'clean_sheet_rate_{window}'] = (recent['opp_goals'] == 0).mean() | |
| stats[f'failed_to_score_rate_{window}'] = (recent['team_goals'] == 0).mean() | |
| stats[f'btts_rate_{window}'] = ((recent['team_goals'] > 0) & (recent['opp_goals'] > 0)).mean() | |
| # Over/Under | |
| total_goals = recent['team_goals'] + recent['opp_goals'] | |
| stats[f'over_2.5_rate_{window}'] = (total_goals > 2.5).mean() | |
| stats[f'over_1.5_rate_{window}'] = (total_goals > 1.5).mean() | |
| # Home/Away splits | |
| if len(home_matches) > 0: | |
| stats['home_goals_avg'] = home_matches['home_goals'].mean() | |
| stats['home_conceded_avg'] = home_matches['away_goals'].mean() | |
| stats['home_win_rate'] = (home_matches['home_goals'] > home_matches['away_goals']).mean() | |
| if len(away_matches) > 0: | |
| stats['away_goals_avg'] = away_matches['away_goals'].mean() | |
| stats['away_conceded_avg'] = away_matches['home_goals'].mean() | |
| stats['away_win_rate'] = (away_matches['away_goals'] > away_matches['home_goals']).mean() | |
| return stats | |
| def get_team_features(self, team: str) -> Dict: | |
| """Get features for a specific team.""" | |
| return self.team_stats.get(team, {}) | |
| def get_match_features( | |
| self, | |
| home_team: str, | |
| away_team: str | |
| ) -> Dict: | |
| """Get combined features for a match.""" | |
| home_stats = self.get_team_features(home_team) | |
| away_stats = self.get_team_features(away_team) | |
| features = {} | |
| # Add home team features with prefix | |
| for key, value in home_stats.items(): | |
| features[f'home_{key}'] = value | |
| # Add away team features with prefix | |
| for key, value in away_stats.items(): | |
| features[f'away_{key}'] = value | |
| # Add difference features | |
| for window in self.ROLLING_WINDOWS: | |
| if f'goals_scored_avg_{window}' in home_stats and f'goals_scored_avg_{window}' in away_stats: | |
| features[f'attack_diff_{window}'] = ( | |
| home_stats[f'goals_scored_avg_{window}'] - | |
| away_stats[f'goals_conceded_avg_{window}'] | |
| ) | |
| features[f'defense_diff_{window}'] = ( | |
| away_stats[f'goals_scored_avg_{window}'] - | |
| home_stats[f'goals_conceded_avg_{window}'] | |
| ) | |
| features[f'ppg_diff_{window}'] = ( | |
| home_stats.get(f'ppg_{window}', 0) - | |
| away_stats.get(f'ppg_{window}', 0) | |
| ) | |
| return features | |
| def generate_all_features(self) -> pd.DataFrame: | |
| """Generate features for all matches.""" | |
| if self.matches is None: | |
| return pd.DataFrame() | |
| features_list = [] | |
| for _, row in self.matches.iterrows(): | |
| match_features = self.get_match_features(row['home_team'], row['away_team']) | |
| match_features['match_id'] = row.get('match_id', f"{row['home_team']}_{row['away_team']}") | |
| features_list.append(match_features) | |
| return pd.DataFrame(features_list) | |
| # Global instance | |
| _generator: Optional[TeamFeatureGenerator] = None | |
| def get_generator(matches_df: pd.DataFrame = None) -> TeamFeatureGenerator: | |
| """Get or create team feature generator.""" | |
| global _generator | |
| if _generator is None: | |
| _generator = TeamFeatureGenerator() | |
| if matches_df is not None: | |
| _generator.set_matches(matches_df) | |
| return _generator | |
| def generate_team_features( | |
| home_team: str, | |
| away_team: str, | |
| matches_df: pd.DataFrame | |
| ) -> Dict: | |
| """Quick function to generate match features.""" | |
| generator = get_generator(matches_df) | |
| return generator.get_match_features(home_team, away_team) | |