Spaces:
Running
Running
| """ | |
| Tracking Data Processor | |
| ======================= | |
| Este módulo procesa datos de tracking de jugadores para enriquecer | |
| el análisis de balón parado con métricas físicas y posicionales. | |
| Métricas que se pueden calcular: | |
| -------------------------------- | |
| 1. MÉTRICAS FÍSICAS (por secuencia de BP): | |
| - Distancia total recorrida por equipo | |
| - Velocidad máxima alcanzada | |
| - Sprints (>25 km/h) durante la secuencia | |
| - Aceleración/desaceleración | |
| 2. MÉTRICAS POSICIONALES (en el momento del corner): | |
| - Formación defensiva (distribución de jugadores en área) | |
| - Marcaje hombre a hombre vs zonal | |
| - Jugadores en zona de remate | |
| - Espacios libres en el área | |
| 3. MÉTRICAS DE MOVIMIENTO (durante la secuencia): | |
| - Carreras de desmarque | |
| - Movimientos de blocaje | |
| - Pressing post-pérdida | |
| 4. MÉTRICAS DE RECUPERACIÓN DEFENSIVA: | |
| - Tiempo para reorganizarse | |
| - Jugadores en posición defensiva | |
| - Transiciones defensivas | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| from pathlib import Path | |
| from typing import Dict, List, Tuple, Optional | |
| from dataclasses import dataclass | |
| import json | |
| class PitchDimensions: | |
| """Dimensiones del campo en metros (105x68 es estándar UEFA)""" | |
| length: float = 105.0 | |
| width: float = 68.0 | |
| penalty_area_length: float = 16.5 | |
| penalty_area_width: float = 40.32 | |
| goal_area_length: float = 5.5 | |
| goal_area_width: float = 18.32 | |
| center_x: float = 0.0 # Centro del campo | |
| center_y: float = 0.0 | |
| class TrackingFrame: | |
| """Representa un frame de tracking con todos los jugadores""" | |
| frame: int | |
| timestamp_ms: int | |
| period: int | |
| home_players: Dict[str, Tuple[float, float, float]] # player_id -> (x, y, speed) | |
| away_players: Dict[str, Tuple[float, float, float]] | |
| ball_position: Tuple[float, float, float] # x, y, z | |
| ball_speed: float | |
| team_in_possession: Optional[str] | |
| player_in_possession: Optional[str] | |
| class TrackingDataLoader: | |
| """Carga y parsea datos de tracking""" | |
| def __init__(self, filepath: str): | |
| self.filepath = Path(filepath) | |
| self.df: Optional[pd.DataFrame] = None | |
| self.pitch = PitchDimensions() | |
| def load(self, sample_rate: int = 1) -> pd.DataFrame: | |
| """ | |
| Carga el archivo de tracking. | |
| Args: | |
| sample_rate: Leer cada N frames (1 = todos, 5 = cada 5 frames) | |
| """ | |
| print(f"📂 Cargando tracking data: {self.filepath.name}") | |
| self.df = pd.read_csv( | |
| self.filepath, | |
| low_memory=False, | |
| dtype={ | |
| 'frame': 'int32', | |
| 'player_id': 'str', | |
| 'player_x': 'float32', | |
| 'player_y': 'float32', | |
| 'player_speed': 'float32', | |
| 'is_player_visible': 'int8', | |
| 'ball_x': 'float32', | |
| 'ball_y': 'float32', | |
| 'ball_z': 'float32', | |
| 'ball_speed': 'float32', | |
| 'is_ball_visible': 'int8', | |
| 'match_period': 'int8', | |
| 'video_time_ms': 'int32' | |
| } | |
| ) | |
| # Downsample si es necesario | |
| if sample_rate > 1: | |
| unique_frames = self.df['frame'].unique() | |
| sampled_frames = unique_frames[::sample_rate] | |
| self.df = self.df[self.df['frame'].isin(sampled_frames)] | |
| print(f" ✓ {len(self.df):,} filas cargadas") | |
| print(f" ✓ {self.df['frame'].nunique():,} frames") | |
| print(f" ✓ {self.df['player_id'].nunique()} jugadores únicos") | |
| return self.df | |
| def get_teams(self) -> Tuple[str, str]: | |
| """Identifica los IDs de los dos equipos""" | |
| teams = self.df['team_in_poss'].dropna().unique() | |
| teams = [t for t in teams if pd.notna(t)] | |
| return tuple(teams[:2]) if len(teams) >= 2 else (teams[0], None) | |
| class SetPieceTrackingExtractor: | |
| """ | |
| Extrae datos de tracking para secuencias de balón parado. | |
| Combina los eventos procesados con los datos de tracking. | |
| """ | |
| def __init__(self, tracking_df: pd.DataFrame, events_df: pd.DataFrame): | |
| self.tracking = tracking_df | |
| self.events = events_df | |
| self.fps = 25 # Frames por segundo | |
| def get_frame_for_timestamp(self, period: int, minute: int, second: int) -> Optional[int]: | |
| """ | |
| Encuentra el frame correspondiente a un momento del partido. | |
| """ | |
| # Convertir minuto/segundo a milisegundos del video | |
| if period == 2: | |
| # El segundo tiempo empieza desde 0 en video_time_ms | |
| target_ms = (minute - 45) * 60 * 1000 + second * 1000 | |
| else: | |
| target_ms = minute * 60 * 1000 + second * 1000 | |
| period_df = self.tracking[self.tracking['match_period'] == period] | |
| if period_df.empty: | |
| return None | |
| # Encontrar el frame más cercano | |
| closest_idx = (period_df['video_time_ms'] - target_ms).abs().idxmin() | |
| return period_df.loc[closest_idx, 'frame'] | |
| def extract_sequence_tracking( | |
| self, | |
| period: int, | |
| start_minute: int, | |
| start_second: int, | |
| duration_seconds: float = 10.0 | |
| ) -> pd.DataFrame: | |
| """ | |
| Extrae los datos de tracking para una secuencia de balón parado. | |
| Args: | |
| period: Período del partido (1 o 2) | |
| start_minute: Minuto de inicio | |
| start_second: Segundo de inicio | |
| duration_seconds: Duración de la secuencia a extraer | |
| Returns: | |
| DataFrame con el tracking de la secuencia | |
| """ | |
| start_frame = self.get_frame_for_timestamp(period, start_minute, start_second) | |
| if start_frame is None: | |
| return pd.DataFrame() | |
| end_frame = start_frame + int(duration_seconds * self.fps) | |
| return self.tracking[ | |
| (self.tracking['frame'] >= start_frame) & | |
| (self.tracking['frame'] <= end_frame) & | |
| (self.tracking['match_period'] == period) | |
| ] | |
| class TrackingMetricsCalculator: | |
| """Calcula métricas avanzadas a partir de datos de tracking""" | |
| def __init__(self, pitch: PitchDimensions = None): | |
| self.pitch = pitch or PitchDimensions() | |
| def calculate_physical_metrics(self, sequence_df: pd.DataFrame) -> Dict: | |
| """ | |
| Calcula métricas físicas para una secuencia. | |
| Returns: | |
| Dict con métricas como distancia total, sprints, velocidad máxima | |
| """ | |
| if sequence_df.empty: | |
| return {} | |
| metrics = {} | |
| # Velocidad máxima por jugador | |
| max_speeds = sequence_df.groupby('player_id')['player_speed'].max() | |
| metrics['max_speed_kmh'] = float(max_speeds.max() * 3.6) | |
| # Sprints (>25 km/h = 6.94 m/s) | |
| sprint_threshold = 6.94 | |
| sprints = sequence_df[sequence_df['player_speed'] > sprint_threshold] | |
| metrics['num_sprints'] = len(sprints['player_id'].unique()) | |
| # Distancia total por equipo (aproximación) | |
| # Calculamos el desplazamiento entre frames | |
| sequence_sorted = sequence_df.sort_values(['player_id', 'frame']) | |
| sequence_sorted['dx'] = sequence_sorted.groupby('player_id')['player_x'].diff() | |
| sequence_sorted['dy'] = sequence_sorted.groupby('player_id')['player_y'].diff() | |
| sequence_sorted['distance'] = np.sqrt( | |
| sequence_sorted['dx']**2 + sequence_sorted['dy']**2 | |
| ) | |
| total_distance = sequence_sorted.groupby('player_id')['distance'].sum() | |
| metrics['total_distance_m'] = float(total_distance.sum()) | |
| metrics['avg_distance_per_player_m'] = float(total_distance.mean()) | |
| return metrics | |
| def calculate_defensive_setup( | |
| self, | |
| frame_df: pd.DataFrame, | |
| defending_team_id: str, | |
| attacking_side: str = 'right' # 'left' o 'right' indica qué arco defienden | |
| ) -> Dict: | |
| """ | |
| Analiza la disposición defensiva en un momento específico (e.g., al ejecutarse el corner). | |
| Returns: | |
| Dict con métricas de formación defensiva | |
| """ | |
| if frame_df.empty: | |
| return {} | |
| # Filtrar jugadores visibles del equipo defensor | |
| # (asumiendo que podemos inferir el equipo del jugador por contexto) | |
| visible_players = frame_df[frame_df['is_player_visible'] == 1] | |
| # Definir zona del área (depende de qué lado ataca) | |
| if attacking_side == 'right': | |
| penalty_area_x = self.pitch.length / 2 - self.pitch.penalty_area_length | |
| area_filter = visible_players['player_x'] >= penalty_area_x | |
| else: | |
| penalty_area_x = -self.pitch.length / 2 + self.pitch.penalty_area_length | |
| area_filter = visible_players['player_x'] <= penalty_area_x | |
| players_in_area = visible_players[area_filter] | |
| metrics = { | |
| 'players_in_penalty_area': len(players_in_area), | |
| 'avg_distance_to_goal': 0, | |
| 'defensive_spread': 0, # Dispersión de la defensa | |
| } | |
| if not players_in_area.empty: | |
| # Calcular dispersión (std de posiciones) | |
| metrics['defensive_spread_x'] = float(players_in_area['player_x'].std()) | |
| metrics['defensive_spread_y'] = float(players_in_area['player_y'].std()) | |
| # Distancia promedio al arco | |
| goal_x = self.pitch.length / 2 if attacking_side == 'right' else -self.pitch.length / 2 | |
| metrics['avg_distance_to_goal'] = float( | |
| np.sqrt((players_in_area['player_x'] - goal_x)**2 + | |
| players_in_area['player_y']**2).mean() | |
| ) | |
| return metrics | |
| def detect_runs( | |
| self, | |
| sequence_df: pd.DataFrame, | |
| speed_threshold_kmh: float = 20.0 | |
| ) -> List[Dict]: | |
| """ | |
| Detecta carreras significativas durante una secuencia. | |
| Returns: | |
| Lista de carreras detectadas con info del jugador, duración, etc. | |
| """ | |
| speed_threshold = speed_threshold_kmh / 3.6 # Convertir a m/s | |
| runs = [] | |
| for player_id in sequence_df['player_id'].unique(): | |
| player_df = sequence_df[sequence_df['player_id'] == player_id].sort_values('frame') | |
| # Detectar secuencias de frames con velocidad alta | |
| high_speed = player_df['player_speed'] > speed_threshold | |
| # Encontrar inicio/fin de carreras | |
| run_start = None | |
| for idx, (frame, is_running) in enumerate(zip(player_df['frame'], high_speed)): | |
| if is_running and run_start is None: | |
| run_start = frame | |
| elif not is_running and run_start is not None: | |
| runs.append({ | |
| 'player_id': player_id, | |
| 'start_frame': run_start, | |
| 'end_frame': frame, | |
| 'duration_frames': frame - run_start, | |
| 'max_speed_kmh': float( | |
| player_df[ | |
| (player_df['frame'] >= run_start) & | |
| (player_df['frame'] < frame) | |
| ]['player_speed'].max() * 3.6 | |
| ) | |
| }) | |
| run_start = None | |
| return runs | |
| class TrackingProcessor: | |
| """ | |
| Procesador principal que integra tracking con secuencias de balón parado. | |
| """ | |
| def __init__(self, tracking_path: str, match_id: str): | |
| self.tracking_path = Path(tracking_path) | |
| self.match_id = match_id | |
| self.loader = TrackingDataLoader(tracking_path) | |
| self.metrics_calc = TrackingMetricsCalculator() | |
| def process_match(self, corner_sequences: pd.DataFrame = None) -> Dict: | |
| """ | |
| Procesa el tracking completo de un partido. | |
| Args: | |
| corner_sequences: DataFrame con secuencias de corners del partido | |
| Returns: | |
| Dict con métricas agregadas y por secuencia | |
| """ | |
| # Cargar tracking | |
| tracking_df = self.loader.load(sample_rate=1) | |
| results = { | |
| 'match_id': self.match_id, | |
| 'tracking_stats': self._calculate_match_stats(tracking_df), | |
| 'sequences': [] | |
| } | |
| if corner_sequences is not None: | |
| # Procesar cada secuencia de corner | |
| extractor = SetPieceTrackingExtractor(tracking_df, corner_sequences) | |
| for _, seq in corner_sequences.iterrows(): | |
| seq_tracking = extractor.extract_sequence_tracking( | |
| period=seq['period_id'], | |
| start_minute=seq['minute'], | |
| start_second=seq['second'], | |
| duration_seconds=15.0 | |
| ) | |
| if not seq_tracking.empty: | |
| results['sequences'].append({ | |
| 'corner_sequence_id': seq['corner_sequence_id'], | |
| 'physical_metrics': self.metrics_calc.calculate_physical_metrics(seq_tracking), | |
| 'runs': self.metrics_calc.detect_runs(seq_tracking) | |
| }) | |
| return results | |
| def _calculate_match_stats(self, df: pd.DataFrame) -> Dict: | |
| """Estadísticas generales del partido""" | |
| return { | |
| 'total_frames': int(df['frame'].nunique()), | |
| 'duration_minutes': float(df['video_time_ms'].max() / 1000 / 60), | |
| 'unique_players': int(df['player_id'].nunique()), | |
| 'max_speed_kmh': float(df['player_speed'].max() * 3.6), | |
| 'avg_visibility_pct': float(df['is_player_visible'].mean() * 100) | |
| } | |
| # ============================================================================= | |
| # FUNCIONES DE UTILIDAD PARA INTEGRACIÓN CON PIPELINE EXISTENTE | |
| # ============================================================================= | |
| def enrich_corner_sequence_with_tracking( | |
| sequence_id: str, | |
| tracking_df: pd.DataFrame, | |
| period: int, | |
| minute: int, | |
| second: int | |
| ) -> Dict: | |
| """ | |
| Función de alto nivel para enriquecer una secuencia de corner con datos de tracking. | |
| Args: | |
| sequence_id: ID de la secuencia de corner | |
| tracking_df: DataFrame con datos de tracking del partido | |
| period: Período del partido | |
| minute: Minuto del corner | |
| second: Segundo del corner | |
| Returns: | |
| Dict con métricas de tracking para la secuencia | |
| """ | |
| extractor = SetPieceTrackingExtractor(tracking_df, pd.DataFrame()) | |
| metrics_calc = TrackingMetricsCalculator() | |
| # Extraer tracking de la secuencia (15 segundos post-corner) | |
| seq_tracking = extractor.extract_sequence_tracking( | |
| period=period, | |
| start_minute=minute, | |
| start_second=second, | |
| duration_seconds=15.0 | |
| ) | |
| if seq_tracking.empty: | |
| return {'sequence_id': sequence_id, 'has_tracking': False} | |
| # Frame inicial (momento del corner) | |
| start_frame = seq_tracking['frame'].min() | |
| initial_frame = seq_tracking[seq_tracking['frame'] == start_frame] | |
| return { | |
| 'sequence_id': sequence_id, | |
| 'has_tracking': True, | |
| 'physical_metrics': metrics_calc.calculate_physical_metrics(seq_tracking), | |
| 'runs': metrics_calc.detect_runs(seq_tracking), | |
| 'initial_setup': { | |
| 'players_visible': int(initial_frame['is_player_visible'].sum()), | |
| 'ball_visible': bool(initial_frame['is_ball_visible'].any()) | |
| } | |
| } | |
| def get_player_heatmap_data( | |
| tracking_df: pd.DataFrame, | |
| player_id: str, | |
| period: Optional[int] = None | |
| ) -> Dict: | |
| """ | |
| Genera datos para un heatmap de posiciones de un jugador. | |
| Returns: | |
| Dict con arrays de posiciones x, y para generar heatmap | |
| """ | |
| df = tracking_df[tracking_df['player_id'] == player_id] | |
| if period is not None: | |
| df = df[df['match_period'] == period] | |
| visible = df[df['is_player_visible'] == 1] | |
| return { | |
| 'player_id': player_id, | |
| 'x': visible['player_x'].tolist(), | |
| 'y': visible['player_y'].tolist(), | |
| 'n_samples': len(visible) | |
| } | |
| # ============================================================================= | |
| # EJEMPLO DE USO | |
| # ============================================================================= | |
| if __name__ == "__main__": | |
| # Ejemplo de uso | |
| TRACKING_FILE = "datasets/2025-08-16 - Santander vs Castellón - tracking.csv" | |
| print("=" * 70) | |
| print("🔬 TRACKING DATA PROCESSOR - Demo") | |
| print("=" * 70) | |
| # Cargar datos | |
| loader = TrackingDataLoader(TRACKING_FILE) | |
| df = loader.load(sample_rate=5) # Cada 5 frames para demo rápido | |
| # Calcular métricas para un frame específico | |
| calc = TrackingMetricsCalculator() | |
| # Obtener un frame del primer tiempo | |
| sample_frames = df[df['match_period'] == 1]['frame'].unique()[:250] | |
| sample_df = df[df['frame'].isin(sample_frames)] | |
| print("\n📊 Métricas físicas (muestra de 10 segundos):") | |
| physical = calc.calculate_physical_metrics(sample_df) | |
| for key, value in physical.items(): | |
| print(f" {key}: {value:.2f}") | |
| print("\n🏃 Carreras detectadas:") | |
| runs = calc.detect_runs(sample_df, speed_threshold_kmh=20.0) | |
| print(f" Total: {len(runs)} carreras") | |
| if runs: | |
| top_run = max(runs, key=lambda x: x['max_speed_kmh']) | |
| print(f" Carrera más rápida: {top_run['max_speed_kmh']:.1f} km/h") | |
| print("\n✅ Procesamiento completado") | |