import numpy as np import pandas as pd from sklearn.preprocessing import MinMaxScaler from typing import Tuple, Dict, Optional import pandas as pd import random from config import ( FIGHT_STATS_PATH, FIGHTER_STATS_PATH, FIGHTER_DETAILS_PATH, MODEL_DATA_PATH ) class FightPredictor: def __init__(self, model): self.model = model self._load_data() def _load_data(self): """Load required datasets""" self.df = pd.read_csv(FIGHT_STATS_PATH) self.df_fighters = pd.read_csv(FIGHTER_STATS_PATH) self.df_fighters_details = pd.read_csv(FIGHTER_DETAILS_PATH, parse_dates=['DOB']) self.df_model = pd.read_csv(MODEL_DATA_PATH, parse_dates=True) # Calculate ages today = pd.Timestamp.today() self.df_fighters_details['AGE'] = self.df_fighters_details['DOB'].apply( lambda x: (today - pd.Timestamp(x)).days / 365.25 ).round(1) def _validate_fighters(self, f1: str, f2: str): """Validate that both fighters exist in dataset""" for fighter in [f1, f2]: if fighter not in self.df_fighters['FIGHTER'].values: raise ValueError(f"Fighter '{fighter}' not found in database") def _get_fighter_stats(self, f1: str, f2: str, verbose: bool) -> Tuple[np.ndarray, Dict]: """Get fighter statistics and compute input features""" f1_df = self.df_fighters.loc[self.df_fighters['FIGHTER'] == f1] f2_df = self.df_fighters.loc[self.df_fighters['FIGHTER'] == f2] # Compute age difference agediff = ( self.df_fighters_details[self.df_fighters_details['FIGHTER'] == f1]['AGE'].values[0] - self.df_fighters_details[self.df_fighters_details['FIGHTER'] == f2]['AGE'].values[0] ) # Collect form scores and fight stats form_scores = [f1_df['form_skore_fighter'].values[0], f2_df['form_skore_fighter'].values[0]] no_of_fights = [f1_df['Fights'].values[0], f2_df['Fights'].values[0]] W_D_NC = ( f1_df[['Win', 'DRAW', 'No_contest']].values.tolist()[0] + f2_df[['Win', 'DRAW', 'No_contest']].values.tolist()[0] ) # Process stats stats_f1, stats_f2 = [], [] for col in self.df_fighters.columns[10:]: splited = col.split('_') if 'CTRL' in splited: stats_f1.append((f1_df[col] / f1_df['TotalTime']).values[0]) stats_f2.append((f2_df[col] / f2_df['TotalTime']).values[0]) if 'attemps' in splited: stats_f1.append((f1_df[col.replace('attemps', 'landed')] / f1_df[col]).values[0]) stats_f1.append((f1_df[col.replace('attemps', 'landed')] / f1_df['TotalTime']).values[0] * 300) stats_f2.append((f2_df[col.replace('attemps', 'landed')] / f2_df[col]).values[0]) stats_f2.append((f2_df[col.replace('attemps', 'landed')] / f2_df['TotalTime']).values[0] * 300) stats_list = stats_f1 + stats_f2 # Prepare input array vstup = np.array([1] + [f1_df.iloc[0][col] - f2_df.iloc[0][col] for col in ['HEIGHT_fighter', 'REACH_fighter']] + [agediff] + form_scores + no_of_fights + W_D_NC + stats_list ) # Prepare details dict if verbose details = {} if verbose: details = { "age_difference": f"{agediff:.1f}", f"{f1}_form_score": f"{form_scores[0]:.2f}", f"{f2}_form_score": f"{form_scores[1]:.2f}", f"{f1}_total_fights": int(no_of_fights[0]), f"{f2}_total_fights": int(no_of_fights[1]) } return vstup, details def _scale_input(self, vstup: np.ndarray) -> np.ndarray: """Scale input features""" scaler = MinMaxScaler(feature_range=(0, 1)) combined_df = pd.concat( [self.df_model, pd.DataFrame([vstup], columns=self.df_model.columns)], ignore_index=True ) vstup_scaled = scaler.fit_transform(combined_df.iloc[:, 1:])[-200:, :] return np.nan_to_num(vstup_scaled) def get_random_fighters(self) -> Tuple[str, str]: """Select two random fighters from the database""" # Get list of all unique fighters all_fighters = self.df_fighters['FIGHTER'].unique().tolist() # Select two random fighters fighter1 = random.choice(all_fighters) # Make sure we don't select the same fighter twice fighter2 = random.choice([f for f in all_fighters if f != fighter1]) return fighter1, fighter2 def get_prediction(self, f1: str, f2: str, verbose: bool = False) -> Optional[Tuple[Dict, Dict, Dict]]: """ Generate fight prediction between two fighters Args: f1: Name of first fighter (or None for random) f2: Name of second fighter (or None for random) verbose: Whether to return additional details Returns: Tuple of (fighter1_dict, fighter2_dict, details_dict) Returns None if prediction fails """ try: # If both fighters are None, get random fighters if not f1 and not f2: f1, f2 = self.get_random_fighters() # Validate fighters exist self._validate_fighters(f1, f2) # Get fighter stats and scale input vstup, raw_details = self._get_fighter_stats(f1, f2, verbose=True) vstup_scaled = self._scale_input(vstup) # Make predictions new_data = np.reshape(vstup_scaled, (1, 200, vstup_scaled.shape[1])) pred_1 = self.model.predict(new_data, verbose=0) # Get reverse prediction vstup_rev, _ = self._get_fighter_stats(f2, f1, False) vstup_rev_scaled = self._scale_input(vstup_rev) new_data_rev = np.reshape(vstup_rev_scaled, (1, 200, vstup_rev_scaled.shape[1])) pred_2 = self.model.predict(new_data_rev, verbose=0) # Calculate final probability f1_prob = float(((1 - pred_1) + pred_2) / 2) f2_prob = round(1 - f1_prob, 4) f1_prob = round(f1_prob, 4) # Structure the response data fighter1_data = { 'name': f1, 'form_score': raw_details.get(f"{f1}_form_score", "0.00"), 'total_fights': int(raw_details.get(f"{f1}_total_fights", 0)), 'win_percentage': f"{f1_prob * 100:.2f}%", 'prob': f1_prob } fighter2_data = { 'name': f2, 'form_score': raw_details.get(f"{f2}_form_score", "0.00"), 'total_fights': int(raw_details.get(f"{f2}_total_fights", 0)), 'win_percentage': f"{f2_prob * 100:.2f}%", 'prob': f2_prob } details = { 'age_difference': raw_details.get("age_difference", "0.0"), } return fighter1_data, fighter2_data, details except Exception as e: print(f"Prediction failed: {e}") return None