File size: 7,434 Bytes
cf2059c 3084e65 cf2059c 3084e65 00d2af5 cf2059c 3084e65 cf2059c 00d2af5 cf2059c 3084e65 cf2059c 00d2af5 cf2059c 00d2af5 cf2059c 00d2af5 cf2059c 00d2af5 cf2059c 3084e65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from typing import Tuple, Dict, Optional
import pandas as pd
import random
from config import (
FIGHT_STATS_PATH,
FIGHTER_STATS_PATH,
FIGHTER_DETAILS_PATH,
MODEL_DATA_PATH
)
class FightPredictor:
def __init__(self, model):
self.model = model
self._load_data()
def _load_data(self):
"""Load required datasets"""
self.df = pd.read_csv(FIGHT_STATS_PATH)
self.df_fighters = pd.read_csv(FIGHTER_STATS_PATH)
self.df_fighters_details = pd.read_csv(FIGHTER_DETAILS_PATH, parse_dates=['DOB'])
self.df_model = pd.read_csv(MODEL_DATA_PATH, parse_dates=True)
# Calculate ages
today = pd.Timestamp.today()
self.df_fighters_details['AGE'] = self.df_fighters_details['DOB'].apply(
lambda x: (today - pd.Timestamp(x)).days / 365.25
).round(1)
def _validate_fighters(self, f1: str, f2: str):
"""Validate that both fighters exist in dataset"""
for fighter in [f1, f2]:
if fighter not in self.df_fighters['FIGHTER'].values:
raise ValueError(f"Fighter '{fighter}' not found in database")
def _get_fighter_stats(self, f1: str, f2: str, verbose: bool) -> Tuple[np.ndarray, Dict]:
"""Get fighter statistics and compute input features"""
f1_df = self.df_fighters.loc[self.df_fighters['FIGHTER'] == f1]
f2_df = self.df_fighters.loc[self.df_fighters['FIGHTER'] == f2]
# Compute age difference
agediff = (
self.df_fighters_details[self.df_fighters_details['FIGHTER'] == f1]['AGE'].values[0] -
self.df_fighters_details[self.df_fighters_details['FIGHTER'] == f2]['AGE'].values[0]
)
# Collect form scores and fight stats
form_scores = [f1_df['form_skore_fighter'].values[0], f2_df['form_skore_fighter'].values[0]]
no_of_fights = [f1_df['Fights'].values[0], f2_df['Fights'].values[0]]
W_D_NC = (
f1_df[['Win', 'DRAW', 'No_contest']].values.tolist()[0] +
f2_df[['Win', 'DRAW', 'No_contest']].values.tolist()[0]
)
# Process stats
stats_f1, stats_f2 = [], []
for col in self.df_fighters.columns[10:]:
splited = col.split('_')
if 'CTRL' in splited:
stats_f1.append((f1_df[col] / f1_df['TotalTime']).values[0])
stats_f2.append((f2_df[col] / f2_df['TotalTime']).values[0])
if 'attemps' in splited:
stats_f1.append((f1_df[col.replace('attemps', 'landed')] / f1_df[col]).values[0])
stats_f1.append((f1_df[col.replace('attemps', 'landed')] / f1_df['TotalTime']).values[0] * 300)
stats_f2.append((f2_df[col.replace('attemps', 'landed')] / f2_df[col]).values[0])
stats_f2.append((f2_df[col.replace('attemps', 'landed')] / f2_df['TotalTime']).values[0] * 300)
stats_list = stats_f1 + stats_f2
# Prepare input array
vstup = np.array([1] +
[f1_df.iloc[0][col] - f2_df.iloc[0][col] for col in ['HEIGHT_fighter', 'REACH_fighter']] +
[agediff] + form_scores + no_of_fights + W_D_NC + stats_list
)
# Prepare details dict if verbose
details = {}
if verbose:
details = {
"age_difference": f"{agediff:.1f}",
f"{f1}_form_score": f"{form_scores[0]:.2f}",
f"{f2}_form_score": f"{form_scores[1]:.2f}",
f"{f1}_total_fights": int(no_of_fights[0]),
f"{f2}_total_fights": int(no_of_fights[1])
}
return vstup, details
def _scale_input(self, vstup: np.ndarray) -> np.ndarray:
"""Scale input features"""
scaler = MinMaxScaler(feature_range=(0, 1))
combined_df = pd.concat(
[self.df_model, pd.DataFrame([vstup], columns=self.df_model.columns)],
ignore_index=True
)
vstup_scaled = scaler.fit_transform(combined_df.iloc[:, 1:])[-200:, :]
return np.nan_to_num(vstup_scaled)
def get_random_fighters(self) -> Tuple[str, str]:
"""Select two random fighters from the database"""
# Get list of all unique fighters
all_fighters = self.df_fighters['FIGHTER'].unique().tolist()
# Select two random fighters
fighter1 = random.choice(all_fighters)
# Make sure we don't select the same fighter twice
fighter2 = random.choice([f for f in all_fighters if f != fighter1])
return fighter1, fighter2
def get_prediction(self, f1: str, f2: str, verbose: bool = False) -> Optional[Tuple[Dict, Dict, Dict]]:
"""
Generate fight prediction between two fighters
Args:
f1: Name of first fighter (or None for random)
f2: Name of second fighter (or None for random)
verbose: Whether to return additional details
Returns:
Tuple of (fighter1_dict, fighter2_dict, details_dict)
Returns None if prediction fails
"""
try:
# If both fighters are None, get random fighters
if not f1 and not f2:
f1, f2 = self.get_random_fighters()
# Validate fighters exist
self._validate_fighters(f1, f2)
# Get fighter stats and scale input
vstup, raw_details = self._get_fighter_stats(f1, f2, verbose=True)
vstup_scaled = self._scale_input(vstup)
# Make predictions
new_data = np.reshape(vstup_scaled, (1, 200, vstup_scaled.shape[1]))
pred_1 = self.model.predict(new_data, verbose=0)
# Get reverse prediction
vstup_rev, _ = self._get_fighter_stats(f2, f1, False)
vstup_rev_scaled = self._scale_input(vstup_rev)
new_data_rev = np.reshape(vstup_rev_scaled, (1, 200, vstup_rev_scaled.shape[1]))
pred_2 = self.model.predict(new_data_rev, verbose=0)
# Calculate final probability
f1_prob = float(((1 - pred_1) + pred_2) / 2)
f2_prob = round(1 - f1_prob, 4)
f1_prob = round(f1_prob, 4)
# Structure the response data
fighter1_data = {
'name': f1,
'form_score': raw_details.get(f"{f1}_form_score", "0.00"),
'total_fights': int(raw_details.get(f"{f1}_total_fights", 0)),
'win_percentage': f"{f1_prob * 100:.2f}%",
'prob': f1_prob
}
fighter2_data = {
'name': f2,
'form_score': raw_details.get(f"{f2}_form_score", "0.00"),
'total_fights': int(raw_details.get(f"{f2}_total_fights", 0)),
'win_percentage': f"{f2_prob * 100:.2f}%",
'prob': f2_prob
}
details = {
'age_difference': raw_details.get("age_difference", "0.0"),
}
return fighter1_data, fighter2_data, details
except Exception as e:
print(f"Prediction failed: {e}")
return None |