ufc-predictor / predictor.py
zjpiazza's picture
Updates
00d2af5
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from typing import Tuple, Dict, Optional
import pandas as pd
import random
from config import (
FIGHT_STATS_PATH,
FIGHTER_STATS_PATH,
FIGHTER_DETAILS_PATH,
MODEL_DATA_PATH
)
class FightPredictor:
def __init__(self, model):
self.model = model
self._load_data()
def _load_data(self):
"""Load required datasets"""
self.df = pd.read_csv(FIGHT_STATS_PATH)
self.df_fighters = pd.read_csv(FIGHTER_STATS_PATH)
self.df_fighters_details = pd.read_csv(FIGHTER_DETAILS_PATH, parse_dates=['DOB'])
self.df_model = pd.read_csv(MODEL_DATA_PATH, parse_dates=True)
# Calculate ages
today = pd.Timestamp.today()
self.df_fighters_details['AGE'] = self.df_fighters_details['DOB'].apply(
lambda x: (today - pd.Timestamp(x)).days / 365.25
).round(1)
def _validate_fighters(self, f1: str, f2: str):
"""Validate that both fighters exist in dataset"""
for fighter in [f1, f2]:
if fighter not in self.df_fighters['FIGHTER'].values:
raise ValueError(f"Fighter '{fighter}' not found in database")
def _get_fighter_stats(self, f1: str, f2: str, verbose: bool) -> Tuple[np.ndarray, Dict]:
"""Get fighter statistics and compute input features"""
f1_df = self.df_fighters.loc[self.df_fighters['FIGHTER'] == f1]
f2_df = self.df_fighters.loc[self.df_fighters['FIGHTER'] == f2]
# Compute age difference
agediff = (
self.df_fighters_details[self.df_fighters_details['FIGHTER'] == f1]['AGE'].values[0] -
self.df_fighters_details[self.df_fighters_details['FIGHTER'] == f2]['AGE'].values[0]
)
# Collect form scores and fight stats
form_scores = [f1_df['form_skore_fighter'].values[0], f2_df['form_skore_fighter'].values[0]]
no_of_fights = [f1_df['Fights'].values[0], f2_df['Fights'].values[0]]
W_D_NC = (
f1_df[['Win', 'DRAW', 'No_contest']].values.tolist()[0] +
f2_df[['Win', 'DRAW', 'No_contest']].values.tolist()[0]
)
# Process stats
stats_f1, stats_f2 = [], []
for col in self.df_fighters.columns[10:]:
splited = col.split('_')
if 'CTRL' in splited:
stats_f1.append((f1_df[col] / f1_df['TotalTime']).values[0])
stats_f2.append((f2_df[col] / f2_df['TotalTime']).values[0])
if 'attemps' in splited:
stats_f1.append((f1_df[col.replace('attemps', 'landed')] / f1_df[col]).values[0])
stats_f1.append((f1_df[col.replace('attemps', 'landed')] / f1_df['TotalTime']).values[0] * 300)
stats_f2.append((f2_df[col.replace('attemps', 'landed')] / f2_df[col]).values[0])
stats_f2.append((f2_df[col.replace('attemps', 'landed')] / f2_df['TotalTime']).values[0] * 300)
stats_list = stats_f1 + stats_f2
# Prepare input array
vstup = np.array([1] +
[f1_df.iloc[0][col] - f2_df.iloc[0][col] for col in ['HEIGHT_fighter', 'REACH_fighter']] +
[agediff] + form_scores + no_of_fights + W_D_NC + stats_list
)
# Prepare details dict if verbose
details = {}
if verbose:
details = {
"age_difference": f"{agediff:.1f}",
f"{f1}_form_score": f"{form_scores[0]:.2f}",
f"{f2}_form_score": f"{form_scores[1]:.2f}",
f"{f1}_total_fights": int(no_of_fights[0]),
f"{f2}_total_fights": int(no_of_fights[1])
}
return vstup, details
def _scale_input(self, vstup: np.ndarray) -> np.ndarray:
"""Scale input features"""
scaler = MinMaxScaler(feature_range=(0, 1))
combined_df = pd.concat(
[self.df_model, pd.DataFrame([vstup], columns=self.df_model.columns)],
ignore_index=True
)
vstup_scaled = scaler.fit_transform(combined_df.iloc[:, 1:])[-200:, :]
return np.nan_to_num(vstup_scaled)
def get_random_fighters(self) -> Tuple[str, str]:
"""Select two random fighters from the database"""
# Get list of all unique fighters
all_fighters = self.df_fighters['FIGHTER'].unique().tolist()
# Select two random fighters
fighter1 = random.choice(all_fighters)
# Make sure we don't select the same fighter twice
fighter2 = random.choice([f for f in all_fighters if f != fighter1])
return fighter1, fighter2
def get_prediction(self, f1: str, f2: str, verbose: bool = False) -> Optional[Tuple[Dict, Dict, Dict]]:
"""
Generate fight prediction between two fighters
Args:
f1: Name of first fighter (or None for random)
f2: Name of second fighter (or None for random)
verbose: Whether to return additional details
Returns:
Tuple of (fighter1_dict, fighter2_dict, details_dict)
Returns None if prediction fails
"""
try:
# If both fighters are None, get random fighters
if not f1 and not f2:
f1, f2 = self.get_random_fighters()
# Validate fighters exist
self._validate_fighters(f1, f2)
# Get fighter stats and scale input
vstup, raw_details = self._get_fighter_stats(f1, f2, verbose=True)
vstup_scaled = self._scale_input(vstup)
# Make predictions
new_data = np.reshape(vstup_scaled, (1, 200, vstup_scaled.shape[1]))
pred_1 = self.model.predict(new_data, verbose=0)
# Get reverse prediction
vstup_rev, _ = self._get_fighter_stats(f2, f1, False)
vstup_rev_scaled = self._scale_input(vstup_rev)
new_data_rev = np.reshape(vstup_rev_scaled, (1, 200, vstup_rev_scaled.shape[1]))
pred_2 = self.model.predict(new_data_rev, verbose=0)
# Calculate final probability
f1_prob = float(((1 - pred_1) + pred_2) / 2)
f2_prob = round(1 - f1_prob, 4)
f1_prob = round(f1_prob, 4)
# Structure the response data
fighter1_data = {
'name': f1,
'form_score': raw_details.get(f"{f1}_form_score", "0.00"),
'total_fights': int(raw_details.get(f"{f1}_total_fights", 0)),
'win_percentage': f"{f1_prob * 100:.2f}%",
'prob': f1_prob
}
fighter2_data = {
'name': f2,
'form_score': raw_details.get(f"{f2}_form_score", "0.00"),
'total_fights': int(raw_details.get(f"{f2}_total_fights", 0)),
'win_percentage': f"{f2_prob * 100:.2f}%",
'prob': f2_prob
}
details = {
'age_difference': raw_details.get("age_difference", "0.0"),
}
return fighter1_data, fighter2_data, details
except Exception as e:
print(f"Prediction failed: {e}")
return None