Spaces:
Sleeping
Sleeping
James McCool
commited on
Commit
·
a9507f6
1
Parent(s):
07db08b
Refactor calculate_weighted_ownership function to operate on a single row of ownership values, improving clarity and performance in the reassess_edge function.
Browse files- global_func/reassess_edge.py +15 -29
global_func/reassess_edge.py
CHANGED
|
@@ -9,53 +9,39 @@ import pandas as pd
|
|
| 9 |
import numpy as np
|
| 10 |
import math
|
| 11 |
|
| 12 |
-
def
|
| 13 |
"""
|
| 14 |
-
|
| 15 |
|
| 16 |
Args:
|
| 17 |
-
|
| 18 |
|
| 19 |
Returns:
|
| 20 |
-
|
| 21 |
"""
|
| 22 |
-
# Convert
|
| 23 |
-
|
| 24 |
|
| 25 |
-
# Calculate
|
| 26 |
-
|
| 27 |
|
| 28 |
# Calculate average of each value with the overall mean
|
| 29 |
-
value_means = (
|
| 30 |
|
| 31 |
# Take average of all those means
|
| 32 |
-
avg_of_means = np.mean(value_means
|
| 33 |
|
| 34 |
# Multiply by count of values
|
| 35 |
-
weighted = avg_of_means *
|
| 36 |
|
| 37 |
-
# Subtract (max - min)
|
| 38 |
-
row_max = np.max(
|
| 39 |
-
row_min = np.min(
|
| 40 |
weighted = weighted - (row_max - row_min)
|
| 41 |
|
| 42 |
# Convert back to percentage form
|
| 43 |
return weighted * 10000
|
| 44 |
|
| 45 |
-
def calculate_weighted_ownership_wrapper(row_ownerships):
|
| 46 |
-
"""
|
| 47 |
-
Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply()
|
| 48 |
-
|
| 49 |
-
Args:
|
| 50 |
-
row_ownerships: Series containing ownership values in percentage form
|
| 51 |
-
|
| 52 |
-
Returns:
|
| 53 |
-
float: Calculated weighted ownership value
|
| 54 |
-
"""
|
| 55 |
-
# Convert Series to 2D array for vectorized function
|
| 56 |
-
ownership_array = row_ownerships.values.reshape(1, -1)
|
| 57 |
-
return calculate_weighted_ownership_vectorized(ownership_array)[0]
|
| 58 |
-
|
| 59 |
def reassess_dupes(row: pd.Series, salary_max: int) -> float:
|
| 60 |
return math.ceil(row['Dupes'] + ((row['salary_diff'] / 100) + ((salary_max + (salary_max - row['salary'])) / 100)) * (1 - (row['own_diff'] / 100)))
|
| 61 |
|
|
@@ -89,7 +75,7 @@ def reassess_edge(refactored_frame: pd.DataFrame, original_frame: pd.DataFrame,
|
|
| 89 |
refactored_df.loc[lineups, 'Finish_percentile'] = refactored_df.loc[lineups, 'Finish_percentile']
|
| 90 |
refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%']
|
| 91 |
refactored_df.loc[lineups, 'Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size)
|
| 92 |
-
refactored_df.loc[lineups, 'Weighted Own'] =
|
| 93 |
refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(axis=1), 1 / len(own_columns))
|
| 94 |
|
| 95 |
return refactored_df
|
|
|
|
| 9 |
import numpy as np
|
| 10 |
import math
|
| 11 |
|
| 12 |
+
def calculate_weighted_ownership_single_row(row_ownerships):
|
| 13 |
"""
|
| 14 |
+
Calculate weighted ownership for a single row of ownership values.
|
| 15 |
|
| 16 |
Args:
|
| 17 |
+
row_ownerships: Series containing ownership values in percentage form
|
| 18 |
|
| 19 |
Returns:
|
| 20 |
+
float: Calculated weighted ownership value
|
| 21 |
"""
|
| 22 |
+
# Convert to numpy array and handle NaN values
|
| 23 |
+
ownership_values = np.where(np.isnan(row_ownerships.values), 0, row_ownerships.values) / 100
|
| 24 |
|
| 25 |
+
# Calculate mean
|
| 26 |
+
row_mean = np.mean(ownership_values)
|
| 27 |
|
| 28 |
# Calculate average of each value with the overall mean
|
| 29 |
+
value_means = (ownership_values + row_mean) / 2
|
| 30 |
|
| 31 |
# Take average of all those means
|
| 32 |
+
avg_of_means = np.mean(value_means)
|
| 33 |
|
| 34 |
# Multiply by count of values
|
| 35 |
+
weighted = avg_of_means * len(ownership_values)
|
| 36 |
|
| 37 |
+
# Subtract (max - min)
|
| 38 |
+
row_max = np.max(ownership_values)
|
| 39 |
+
row_min = np.min(ownership_values)
|
| 40 |
weighted = weighted - (row_max - row_min)
|
| 41 |
|
| 42 |
# Convert back to percentage form
|
| 43 |
return weighted * 10000
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
def reassess_dupes(row: pd.Series, salary_max: int) -> float:
|
| 46 |
return math.ceil(row['Dupes'] + ((row['salary_diff'] / 100) + ((salary_max + (salary_max - row['salary'])) / 100)) * (1 - (row['own_diff'] / 100)))
|
| 47 |
|
|
|
|
| 75 |
refactored_df.loc[lineups, 'Finish_percentile'] = refactored_df.loc[lineups, 'Finish_percentile']
|
| 76 |
refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%']
|
| 77 |
refactored_df.loc[lineups, 'Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size)
|
| 78 |
+
refactored_df.loc[lineups, 'Weighted Own'] = calculate_weighted_ownership_single_row(refactored_df.loc[lineups, own_columns])
|
| 79 |
refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(axis=1), 1 / len(own_columns))
|
| 80 |
|
| 81 |
return refactored_df
|