James McCool
commited on
Commit
·
c283108
1
Parent(s):
dd908a8
Enhance player selection handling in predict_dupes function
Browse filesThis update improves the player selection process by ensuring only valid string values are added to the unique player set. Additionally, it introduces a check to return a zero vector if no valid players are found, enhancing robustness. The player_columns are dynamically defined based on the portfolio structure, ensuring accurate processing of player data across different lineup configurations.
- global_func/predict_dupes.py +24 -3
global_func/predict_dupes.py
CHANGED
|
@@ -56,9 +56,19 @@ def calculate_player_similarity_score(portfolio, player_columns):
|
|
| 56 |
# Get all unique players across all lineups
|
| 57 |
all_players = set()
|
| 58 |
for col in player_columns:
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
all_players = sorted(list(all_players))
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
# Create a binary matrix: 1 if player is in lineup, 0 if not
|
| 63 |
binary_matrix = np.zeros((len(portfolio), len(all_players)))
|
| 64 |
|
|
@@ -115,7 +125,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
| 115 |
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank']
|
| 116 |
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own']
|
| 117 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
|
|
|
| 118 |
player_columns = [col for col in portfolio.columns[:5] if col not in ['salary', 'median', 'Own']]
|
|
|
|
| 119 |
flex_ownerships = pd.concat([
|
| 120 |
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
| 121 |
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
|
@@ -157,7 +169,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
| 157 |
dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
|
| 158 |
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
|
| 159 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
|
|
|
| 160 |
player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']]
|
|
|
|
| 161 |
for i in range(1, num_players + 1):
|
| 162 |
portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
|
| 163 |
portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
|
|
@@ -181,7 +195,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
| 181 |
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
|
| 182 |
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
|
| 183 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
|
|
|
| 184 |
player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
|
|
|
|
| 185 |
flex_ownerships = pd.concat([
|
| 186 |
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
| 187 |
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
|
@@ -226,7 +242,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
| 226 |
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
|
| 227 |
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
|
| 228 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
|
|
|
| 229 |
player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
|
|
|
|
| 230 |
flex_ownerships = pd.concat([
|
| 231 |
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
| 232 |
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
|
@@ -271,7 +289,9 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
| 271 |
dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
|
| 272 |
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
|
| 273 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
|
|
|
| 274 |
player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']]
|
|
|
|
| 275 |
for i in range(1, num_players + 1):
|
| 276 |
portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
|
| 277 |
portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
|
|
@@ -329,9 +349,10 @@ def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, streng
|
|
| 329 |
portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
|
| 330 |
portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership, axis=1)
|
| 331 |
portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns))
|
| 332 |
-
|
|
|
|
| 333 |
portfolio['Similarity Score'] = calculate_player_similarity_score(portfolio, player_columns)
|
| 334 |
-
|
| 335 |
portfolio = portfolio.drop(columns=dup_count_columns)
|
| 336 |
portfolio = portfolio.drop(columns=own_columns)
|
| 337 |
portfolio = portfolio.drop(columns=calc_columns)
|
|
|
|
| 56 |
# Get all unique players across all lineups
|
| 57 |
all_players = set()
|
| 58 |
for col in player_columns:
|
| 59 |
+
# Only add string values (player names), skip numeric values
|
| 60 |
+
unique_vals = player_data[col].unique()
|
| 61 |
+
for val in unique_vals:
|
| 62 |
+
if isinstance(val, str) and val.strip() != '':
|
| 63 |
+
all_players.add(val)
|
| 64 |
+
|
| 65 |
+
# Convert to sorted list
|
| 66 |
all_players = sorted(list(all_players))
|
| 67 |
|
| 68 |
+
# If no valid players found, return zeros
|
| 69 |
+
if len(all_players) == 0:
|
| 70 |
+
return np.zeros(len(portfolio))
|
| 71 |
+
|
| 72 |
# Create a binary matrix: 1 if player is in lineup, 0 if not
|
| 73 |
binary_matrix = np.zeros((len(portfolio), len(all_players)))
|
| 74 |
|
|
|
|
| 125 |
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank']
|
| 126 |
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own']
|
| 127 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
| 128 |
+
# Get the original player columns (first 5 columns excluding salary, median, Own)
|
| 129 |
player_columns = [col for col in portfolio.columns[:5] if col not in ['salary', 'median', 'Own']]
|
| 130 |
+
|
| 131 |
flex_ownerships = pd.concat([
|
| 132 |
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
| 133 |
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
|
|
|
| 169 |
dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
|
| 170 |
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
|
| 171 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
| 172 |
+
# Get the original player columns (first num_players columns excluding salary, median, Own)
|
| 173 |
player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']]
|
| 174 |
+
|
| 175 |
for i in range(1, num_players + 1):
|
| 176 |
portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
|
| 177 |
portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
|
|
|
|
| 195 |
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
|
| 196 |
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
|
| 197 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
| 198 |
+
# Get the original player columns (first 6 columns excluding salary, median, Own)
|
| 199 |
player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
|
| 200 |
+
|
| 201 |
flex_ownerships = pd.concat([
|
| 202 |
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
| 203 |
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
|
|
|
| 242 |
dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank']
|
| 243 |
own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own']
|
| 244 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
| 245 |
+
# Get the original player columns (first 6 columns excluding salary, median, Own)
|
| 246 |
player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']]
|
| 247 |
+
|
| 248 |
flex_ownerships = pd.concat([
|
| 249 |
portfolio.iloc[:,1].map(maps_dict['own_map']),
|
| 250 |
portfolio.iloc[:,2].map(maps_dict['own_map']),
|
|
|
|
| 289 |
dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)]
|
| 290 |
own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)]
|
| 291 |
calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio']
|
| 292 |
+
# Get the original player columns (first num_players columns excluding salary, median, Own)
|
| 293 |
player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']]
|
| 294 |
+
|
| 295 |
for i in range(1, num_players + 1):
|
| 296 |
portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank'])
|
| 297 |
portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100
|
|
|
|
| 349 |
portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean()
|
| 350 |
portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership, axis=1)
|
| 351 |
portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns))
|
| 352 |
+
|
| 353 |
+
# Calculate similarity score based on actual player selection
|
| 354 |
portfolio['Similarity Score'] = calculate_player_similarity_score(portfolio, player_columns)
|
| 355 |
+
|
| 356 |
portfolio = portfolio.drop(columns=dup_count_columns)
|
| 357 |
portfolio = portfolio.drop(columns=own_columns)
|
| 358 |
portfolio = portfolio.drop(columns=calc_columns)
|