Spaces:
Runtime error
Runtime error
| """ | |
| Player Features Module | |
| Aggregates player-level data into team features. | |
| Part of the complete blueprint implementation. | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from typing import Dict, List, Optional | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| class PlayerFeatureGenerator: | |
| """ | |
| Generates player-aggregated features for team predictions. | |
| Features include: | |
| - Squad quality ratings | |
| - Key player availability | |
| - Goal/assist contributions | |
| - Experience metrics | |
| """ | |
| def __init__(self, player_data: pd.DataFrame = None): | |
| self.player_data = player_data | |
| self.team_squads = {} | |
| def set_player_data(self, player_data: pd.DataFrame): | |
| """Set player data for feature generation.""" | |
| self.player_data = player_data.copy() | |
| self._build_team_squads() | |
| def _build_team_squads(self): | |
| """Build team squad mappings.""" | |
| if self.player_data is None or 'team' not in self.player_data.columns: | |
| return | |
| for team in self.player_data['team'].unique(): | |
| team_players = self.player_data[self.player_data['team'] == team] | |
| self.team_squads[team] = team_players | |
| def get_squad_strength(self, team: str) -> Dict: | |
| """Calculate squad strength metrics.""" | |
| if team not in self.team_squads: | |
| return self._empty_squad_features() | |
| squad = self.team_squads[team] | |
| features = { | |
| 'squad_size': len(squad), | |
| } | |
| # Average ratings if available | |
| if 'rating' in squad.columns: | |
| features['avg_rating'] = squad['rating'].mean() | |
| features['max_rating'] = squad['rating'].max() | |
| features['min_rating'] = squad['rating'].min() | |
| # Goal contributions | |
| if 'goals' in squad.columns: | |
| features['total_goals'] = squad['goals'].sum() | |
| features['avg_goals'] = squad['goals'].mean() | |
| features['top_scorer_goals'] = squad['goals'].max() | |
| if 'assists' in squad.columns: | |
| features['total_assists'] = squad['assists'].sum() | |
| features['avg_assists'] = squad['assists'].mean() | |
| # Experience | |
| if 'appearances' in squad.columns: | |
| features['total_appearances'] = squad['appearances'].sum() | |
| features['avg_appearances'] = squad['appearances'].mean() | |
| if 'age' in squad.columns: | |
| features['avg_age'] = squad['age'].mean() | |
| features['youngest'] = squad['age'].min() | |
| features['oldest'] = squad['age'].max() | |
| # Market value if available | |
| if 'market_value' in squad.columns: | |
| features['total_value'] = squad['market_value'].sum() | |
| features['avg_value'] = squad['market_value'].mean() | |
| # xG/xA if available | |
| if 'xg' in squad.columns: | |
| features['total_xg'] = squad['xg'].sum() | |
| features['avg_xg'] = squad['xg'].mean() | |
| if 'xa' in squad.columns: | |
| features['total_xa'] = squad['xa'].sum() | |
| return features | |
| def _empty_squad_features(self) -> Dict: | |
| """Return empty squad features.""" | |
| return { | |
| 'squad_size': 0, | |
| 'avg_rating': 0, | |
| 'total_goals': 0, | |
| 'total_assists': 0, | |
| 'avg_age': 0, | |
| } | |
| def get_key_players(self, team: str, n: int = 5) -> List[Dict]: | |
| """Get top N key players for a team.""" | |
| if team not in self.team_squads: | |
| return [] | |
| squad = self.team_squads[team].copy() | |
| # Score players by importance | |
| if 'rating' in squad.columns: | |
| squad['importance'] = squad['rating'] | |
| elif 'goals' in squad.columns and 'assists' in squad.columns: | |
| squad['importance'] = squad['goals'] * 1.5 + squad['assists'] | |
| else: | |
| squad['importance'] = 0 | |
| top_players = squad.nlargest(n, 'importance') | |
| return top_players.to_dict('records') | |
| def get_missing_player_impact( | |
| self, | |
| team: str, | |
| missing_players: List[str] | |
| ) -> float: | |
| """Estimate impact of missing players.""" | |
| if team not in self.team_squads or not missing_players: | |
| return 0.0 | |
| squad = self.team_squads[team] | |
| if 'player_name' not in squad.columns: | |
| return 0.0 | |
| # Find missing players in squad | |
| missing = squad[squad['player_name'].isin(missing_players)] | |
| if len(missing) == 0: | |
| return 0.0 | |
| # Calculate impact based on contributions | |
| total_goals = squad['goals'].sum() if 'goals' in squad.columns else 1 | |
| missing_goals = missing['goals'].sum() if 'goals' in missing.columns else 0 | |
| goal_impact = missing_goals / max(total_goals, 1) | |
| return min(goal_impact * 0.5, 0.3) # Cap at 30% impact | |
| def get_match_features( | |
| self, | |
| home_team: str, | |
| away_team: str, | |
| home_missing: List[str] = None, | |
| away_missing: List[str] = None | |
| ) -> Dict: | |
| """Get player-based features for a match.""" | |
| home_strength = self.get_squad_strength(home_team) | |
| away_strength = self.get_squad_strength(away_team) | |
| features = {} | |
| # Add team features | |
| for key, value in home_strength.items(): | |
| features[f'home_{key}'] = value | |
| for key, value in away_strength.items(): | |
| features[f'away_{key}'] = value | |
| # Add differences | |
| if 'avg_rating' in home_strength and 'avg_rating' in away_strength: | |
| features['rating_diff'] = home_strength['avg_rating'] - away_strength['avg_rating'] | |
| if 'total_goals' in home_strength and 'total_goals' in away_strength: | |
| features['goals_contribution_diff'] = home_strength['total_goals'] - away_strength['total_goals'] | |
| # Missing player impact | |
| if home_missing: | |
| features['home_injury_impact'] = self.get_missing_player_impact(home_team, home_missing) | |
| if away_missing: | |
| features['away_injury_impact'] = self.get_missing_player_impact(away_team, away_missing) | |
| return features | |
| # Global instance | |
| _generator: Optional[PlayerFeatureGenerator] = None | |
| def get_generator(player_data: pd.DataFrame = None) -> PlayerFeatureGenerator: | |
| """Get or create player feature generator.""" | |
| global _generator | |
| if _generator is None: | |
| _generator = PlayerFeatureGenerator() | |
| if player_data is not None: | |
| _generator.set_player_data(player_data) | |
| return _generator | |