James McCool commited on
Commit
a9507f6
·
1 Parent(s): 07db08b

Refactor calculate_weighted_ownership function to operate on a single row of ownership values, improving clarity and performance in the reassess_edge function.

Browse files
Files changed (1) hide show
  1. global_func/reassess_edge.py +15 -29
global_func/reassess_edge.py CHANGED
@@ -9,53 +9,39 @@ import pandas as pd
9
  import numpy as np
10
  import math
11
 
12
- def calculate_weighted_ownership_vectorized(ownership_array):
13
  """
14
- Vectorized version of calculate_weighted_ownership using NumPy operations.
15
 
16
  Args:
17
- ownership_array: 2D array of ownership values (rows x players)
18
 
19
  Returns:
20
- array: Calculated weighted ownership values for each row
21
  """
22
- # Convert percentages to decimals and handle NaN values
23
- ownership_array = np.where(np.isnan(ownership_array), 0, ownership_array) / 100
24
 
25
- # Calculate row means
26
- row_means = np.mean(ownership_array, axis=1, keepdims=True)
27
 
28
  # Calculate average of each value with the overall mean
29
- value_means = (ownership_array + row_means) / 2
30
 
31
  # Take average of all those means
32
- avg_of_means = np.mean(value_means, axis=1)
33
 
34
  # Multiply by count of values
35
- weighted = avg_of_means * ownership_array.shape[1]
36
 
37
- # Subtract (max - min) for each row
38
- row_max = np.max(ownership_array, axis=1)
39
- row_min = np.min(ownership_array, axis=1)
40
  weighted = weighted - (row_max - row_min)
41
 
42
  # Convert back to percentage form
43
  return weighted * 10000
44
 
45
- def calculate_weighted_ownership_wrapper(row_ownerships):
46
- """
47
- Wrapper function for the original calculate_weighted_ownership to work with Pandas .apply()
48
-
49
- Args:
50
- row_ownerships: Series containing ownership values in percentage form
51
-
52
- Returns:
53
- float: Calculated weighted ownership value
54
- """
55
- # Convert Series to 2D array for vectorized function
56
- ownership_array = row_ownerships.values.reshape(1, -1)
57
- return calculate_weighted_ownership_vectorized(ownership_array)[0]
58
-
59
  def reassess_dupes(row: pd.Series, salary_max: int) -> float:
60
  return math.ceil(row['Dupes'] + ((row['salary_diff'] / 100) + ((salary_max + (salary_max - row['salary'])) / 100)) * (1 - (row['own_diff'] / 100)))
61
 
@@ -89,7 +75,7 @@ def reassess_edge(refactored_frame: pd.DataFrame, original_frame: pd.DataFrame,
89
  refactored_df.loc[lineups, 'Finish_percentile'] = refactored_df.loc[lineups, 'Finish_percentile']
90
  refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%']
91
  refactored_df.loc[lineups, 'Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size)
92
- refactored_df.loc[lineups, 'Weighted Own'] = calculate_weighted_ownership_wrapper(refactored_df.loc[lineups, own_columns])
93
  refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(axis=1), 1 / len(own_columns))
94
 
95
  return refactored_df
 
9
  import numpy as np
10
  import math
11
 
12
+ def calculate_weighted_ownership_single_row(row_ownerships):
13
  """
14
+ Calculate weighted ownership for a single row of ownership values.
15
 
16
  Args:
17
+ row_ownerships: Series containing ownership values in percentage form
18
 
19
  Returns:
20
+ float: Calculated weighted ownership value
21
  """
22
+ # Convert to numpy array and handle NaN values
23
+ ownership_values = np.where(np.isnan(row_ownerships.values), 0, row_ownerships.values) / 100
24
 
25
+ # Calculate mean
26
+ row_mean = np.mean(ownership_values)
27
 
28
  # Calculate average of each value with the overall mean
29
+ value_means = (ownership_values + row_mean) / 2
30
 
31
  # Take average of all those means
32
+ avg_of_means = np.mean(value_means)
33
 
34
  # Multiply by count of values
35
+ weighted = avg_of_means * len(ownership_values)
36
 
37
+ # Subtract (max - min)
38
+ row_max = np.max(ownership_values)
39
+ row_min = np.min(ownership_values)
40
  weighted = weighted - (row_max - row_min)
41
 
42
  # Convert back to percentage form
43
  return weighted * 10000
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def reassess_dupes(row: pd.Series, salary_max: int) -> float:
46
  return math.ceil(row['Dupes'] + ((row['salary_diff'] / 100) + ((salary_max + (salary_max - row['salary'])) / 100)) * (1 - (row['own_diff'] / 100)))
47
 
 
75
  refactored_df.loc[lineups, 'Finish_percentile'] = refactored_df.loc[lineups, 'Finish_percentile']
76
  refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%']
77
  refactored_df.loc[lineups, 'Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size)
78
+ refactored_df.loc[lineups, 'Weighted Own'] = calculate_weighted_ownership_single_row(refactored_df.loc[lineups, own_columns])
79
  refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(axis=1), 1 / len(own_columns))
80
 
81
  return refactored_df