James McCool commited on
Commit
7eef51a
·
1 Parent(s): 7df001f

Implement vectorized calculations for salary, median, and ownership in app.py to enhance performance and memory efficiency. Refactor reassess_edge and stratification_function to minimize DataFrame copies and improve memory management. Update filtering logic to use boolean masks for better efficiency.

Browse files
app.py CHANGED
@@ -204,6 +204,134 @@ def create_memory_efficient_mappings(projections_df, site_var, type_var, sport_v
204
 
205
  return base_mappings
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  def create_position_export_dict(column_name, csv_file, site_var, type_var, sport_var):
208
  try:
209
  # Remove any numbers from the column name to get the position
@@ -1143,60 +1271,20 @@ if selected_tab == 'Manage Portfolio':
1143
  st.session_state['working_frame'] = pd.read_parquet(io.BytesIO(st.session_state['origin_portfolio']))
1144
  st.session_state['player_columns'] = [col for col in st.session_state['working_frame'].columns if col not in excluded_cols]
1145
 
1146
- if type_var == 'Classic':
1147
- if sport_var == 'CS2' or sport_var == 'LOL':
1148
- # Calculate salary (CPT uses cpt_salary_map, others use salary_map)
1149
- st.session_state['working_frame']['salary'] = st.session_state['working_frame'].apply(
1150
- lambda row: st.session_state['map_dict']['cpt_salary_map'].get(row.iloc[0], 0) +
1151
- sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row.iloc[1:]),
1152
- axis=1
1153
- )
1154
-
1155
- # Calculate median (CPT uses cpt_proj_map, others use proj_map)
1156
- st.session_state['working_frame']['median'] = st.session_state['working_frame'].apply(
1157
- lambda row: st.session_state['map_dict']['cpt_proj_map'].get(row.iloc[0], 0) +
1158
- sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row.iloc[1:]),
1159
- axis=1
1160
- )
1161
-
1162
- # Calculate ownership (CPT uses cpt_own_map, others use own_map)
1163
- st.session_state['working_frame']['Own'] = st.session_state['working_frame'].apply(
1164
- lambda row: st.session_state['map_dict']['cpt_own_map'].get(row.iloc[0], 0) +
1165
- sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
1166
- axis=1
1167
- )
1168
-
1169
- elif sport_var != 'CS2' and sport_var != 'LOL':
1170
- st.session_state['working_frame']['salary'] = st.session_state['working_frame'].apply(lambda row: sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row), axis=1)
1171
- st.session_state['working_frame']['median'] = st.session_state['working_frame'].apply(lambda row: sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row), axis=1)
1172
- st.session_state['working_frame']['Own'] = st.session_state['working_frame'].apply(lambda row: sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row), axis=1)
1173
- if 'stack_dict' in st.session_state:
1174
- st.session_state['working_frame']['Stack'] = st.session_state['working_frame'].index.map(st.session_state['stack_dict'])
1175
- st.session_state['working_frame']['Size'] = st.session_state['working_frame'].index.map(st.session_state['size_dict'])
1176
- elif type_var == 'Showdown':
1177
- # Calculate salary (CPT uses cpt_salary_map, others use salary_map)
1178
- st.session_state['working_frame']['salary'] = st.session_state['working_frame'].apply(
1179
- lambda row: st.session_state['map_dict']['cpt_salary_map'].get(row.iloc[0], 0) +
1180
- sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row.iloc[1:]),
1181
- axis=1
1182
- )
1183
-
1184
- # Calculate median (CPT uses cpt_proj_map, others use proj_map)
1185
- st.session_state['working_frame']['median'] = st.session_state['working_frame'].apply(
1186
- lambda row: st.session_state['map_dict']['cpt_proj_map'].get(row.iloc[0], 0) +
1187
- sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row.iloc[1:]),
1188
- axis=1
1189
- )
1190
-
1191
- # Calculate ownership (CPT uses cpt_own_map, others use own_map)
1192
- st.session_state['working_frame']['Own'] = st.session_state['working_frame'].apply(
1193
- lambda row: st.session_state['map_dict']['cpt_own_map'].get(row.iloc[0], 0) +
1194
- sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
1195
- axis=1
1196
- )
1197
- # st.session_state['working_frame']['Own'] = st.session_state['working_frame']['Own'].astype('float32')
1198
- st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
1199
- st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
1200
 
1201
  st.session_state['base_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var, salary_max)
1202
  st.session_state['working_frame'] = st.session_state['base_frame'].copy()
@@ -1259,60 +1347,60 @@ if selected_tab == 'Manage Portfolio':
1259
 
1260
  if reg_submitted:
1261
  st.session_state['settings_base'] = False
1262
- parsed_frame = st.session_state['working_frame'].copy()
1263
 
1264
- parsed_frame = parsed_frame[parsed_frame['salary'] >= min_salary]
1265
- parsed_frame = parsed_frame[parsed_frame['salary'] <= max_salary]
1266
- parsed_frame = parsed_frame[parsed_frame['median'] >= min_proj]
1267
- parsed_frame = parsed_frame[parsed_frame['median'] <= max_proj]
1268
- parsed_frame = parsed_frame[parsed_frame['Own'] >= min_own]
1269
- parsed_frame = parsed_frame[parsed_frame['Own'] <= max_own]
1270
- parsed_frame = parsed_frame[parsed_frame['Dupes'] >= min_dupes]
1271
- parsed_frame = parsed_frame[parsed_frame['Dupes'] <= max_dupes]
1272
- parsed_frame = parsed_frame[parsed_frame['Finish_percentile'] >= min_finish_percentile]
1273
- parsed_frame = parsed_frame[parsed_frame['Finish_percentile'] <= max_finish_percentile]
1274
- parsed_frame = parsed_frame[parsed_frame['Lineup Edge'] >= min_lineup_edge]
1275
- parsed_frame = parsed_frame[parsed_frame['Lineup Edge'] <= max_lineup_edge]
 
 
 
1276
 
1277
 
1278
- if 'Stack' in parsed_frame.columns:
1279
- if stack_include_toggle == 'All Stacks':
1280
- parsed_frame = parsed_frame
1281
- else:
1282
- parsed_frame = parsed_frame[parsed_frame['Stack'].isin(stack_selections)]
1283
  if stack_remove_toggle == 'Yes':
1284
- parsed_frame = parsed_frame[~parsed_frame['Stack'].isin(stack_remove)]
1285
- else:
1286
- parsed_frame = parsed_frame
1287
- st.session_state['working_frame'] = parsed_frame.sort_values(by='median', ascending=False).reset_index(drop=True)
1288
  st.session_state['export_merge'] = st.session_state['working_frame'].copy()
1289
  if exp_submitted:
1290
  st.session_state['settings_base'] = False
1291
- parsed_frame = st.session_state['export_base'].copy()
1292
-
1293
- parsed_frame = parsed_frame[parsed_frame['salary'] >= min_salary]
1294
- parsed_frame = parsed_frame[parsed_frame['salary'] <= max_salary]
1295
- parsed_frame = parsed_frame[parsed_frame['median'] >= min_proj]
1296
- parsed_frame = parsed_frame[parsed_frame['median'] <= max_proj]
1297
- parsed_frame = parsed_frame[parsed_frame['Own'] >= min_own]
1298
- parsed_frame = parsed_frame[parsed_frame['Own'] <= max_own]
1299
- parsed_frame = parsed_frame[parsed_frame['Dupes'] >= min_dupes]
1300
- parsed_frame = parsed_frame[parsed_frame['Dupes'] <= max_dupes]
1301
- parsed_frame = parsed_frame[parsed_frame['Finish_percentile'] >= min_finish_percentile]
1302
- parsed_frame = parsed_frame[parsed_frame['Finish_percentile'] <= max_finish_percentile]
1303
- parsed_frame = parsed_frame[parsed_frame['Lineup Edge'] >= min_lineup_edge]
1304
- parsed_frame = parsed_frame[parsed_frame['Lineup Edge'] <= max_lineup_edge]
1305
-
1306
- if 'Stack' in parsed_frame.columns:
1307
- if stack_include_toggle == 'All Stacks':
1308
- parsed_frame = parsed_frame
1309
- else:
1310
- parsed_frame = parsed_frame[parsed_frame['Stack'].isin(stack_selections)]
1311
  if stack_remove_toggle == 'Yes':
1312
- parsed_frame = parsed_frame[~parsed_frame['Stack'].isin(stack_remove)]
1313
- else:
1314
- parsed_frame = parsed_frame
1315
- st.session_state['export_base'] = parsed_frame.sort_values(by='median', ascending=False).reset_index(drop=True)
1316
  st.session_state['export_merge'] = st.session_state['export_base'].copy()
1317
 
1318
  with st.expander('Micro Filter Options'):
@@ -1923,189 +2011,39 @@ if selected_tab == 'Manage Portfolio':
1923
  exp_submitted = st.form_submit_button("Export")
1924
  if reg_submitted:
1925
  st.session_state['settings_base'] = False
1926
- prior_frame = st.session_state['working_frame'].copy()
1927
  parsed_frame = exposure_spread(st.session_state['working_frame'], st.session_state['exposure_player'], exposure_target, ignore_stacks, remove_teams_exposure, specific_replacements, specific_columns, st.session_state['projections_df'], sport_var, type_var, salary_max, stacking_sports)
1928
 
1929
- if type_var == 'Classic':
1930
- if sport_var == 'CS2' or sport_var == 'LOL':
1931
- # Calculate salary (CPT uses cpt_salary_map, others use salary_map)
1932
- parsed_frame['salary'] = parsed_frame.apply(
1933
- lambda row: st.session_state['map_dict']['cpt_salary_map'].get(row.iloc[0], 0) +
1934
- sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row.iloc[1:]),
1935
- axis=1
1936
- )
1937
-
1938
- # Calculate median (CPT uses cpt_proj_map, others use proj_map)
1939
- parsed_frame['median'] = parsed_frame.apply(
1940
- lambda row: st.session_state['map_dict']['cpt_proj_map'].get(row.iloc[0], 0) +
1941
- sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row.iloc[1:]),
1942
- axis=1
1943
- )
1944
-
1945
- # Calculate ownership (CPT uses cpt_own_map, others use own_map)
1946
- parsed_frame['Own'] = parsed_frame.apply(
1947
- lambda row: st.session_state['map_dict']['cpt_own_map'].get(row.iloc[0], 0) +
1948
- sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
1949
- axis=1
1950
- )
1951
-
1952
- elif sport_var != 'CS2' and sport_var != 'LOL':
1953
- parsed_frame['salary'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row), axis=1)
1954
- parsed_frame['median'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row), axis=1)
1955
- parsed_frame['Own'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row), axis=1)
1956
- if 'stack_dict' in st.session_state:
1957
- team_dict = dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team']))
1958
- if sport_var == 'LOL':
1959
- parsed_frame['Stack'] = parsed_frame.apply(
1960
- lambda row: Counter(
1961
- team_dict.get(player, '') for player in row
1962
- if team_dict.get(player, '') != ''
1963
- ).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row) else '',
1964
- axis=1
1965
- )
1966
- parsed_frame['Size'] = parsed_frame.apply(
1967
- lambda row: Counter(
1968
- team_dict.get(player, '') for player in row
1969
- if team_dict.get(player, '') != ''
1970
- ).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row) else 0,
1971
- axis=1
1972
- )
1973
- else:
1974
- parsed_frame['Stack'] = parsed_frame.apply(
1975
- lambda row: Counter(
1976
- team_dict.get(player, '') for player in row[2:]
1977
- if team_dict.get(player, '') != ''
1978
- ).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row[2:]) else '',
1979
- axis=1
1980
- )
1981
- parsed_frame['Size'] = parsed_frame.apply(
1982
- lambda row: Counter(
1983
- team_dict.get(player, '') for player in row[2:]
1984
- if team_dict.get(player, '') != ''
1985
- ).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row[2:]) else 0,
1986
- axis=1
1987
- )
1988
- elif type_var == 'Showdown':
1989
- # Calculate salary (CPT uses cpt_salary_map, others use salary_map)
1990
- parsed_frame['salary'] = parsed_frame.apply(
1991
- lambda row: st.session_state['map_dict']['cpt_salary_map'].get(row.iloc[0], 0) +
1992
- sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row.iloc[1:]),
1993
- axis=1
1994
- )
1995
-
1996
- # Calculate median (CPT uses cpt_proj_map, others use proj_map)
1997
- parsed_frame['median'] = parsed_frame.apply(
1998
- lambda row: st.session_state['map_dict']['cpt_proj_map'].get(row.iloc[0], 0) +
1999
- sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row.iloc[1:]),
2000
- axis=1
2001
- )
2002
-
2003
- # Calculate ownership (CPT uses cpt_own_map, others use own_map)
2004
- parsed_frame['Own'] = parsed_frame.apply(
2005
- lambda row: st.session_state['map_dict']['cpt_own_map'].get(row.iloc[0], 0) +
2006
- sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
2007
- axis=1
2008
- )
2009
  st.session_state['working_frame'] = parsed_frame.reset_index(drop=True)
2010
- # st.session_state['working_frame']['Own'] = st.session_state['working_frame']['Own'].astype('float32')
2011
- st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
2012
- st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
2013
 
2014
  # st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
2015
  st.session_state['working_frame'] = reassess_edge(st.session_state['working_frame'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var, salary_max)
2016
  st.session_state['export_merge'] = st.session_state['working_frame'].copy()
2017
  elif exp_submitted:
2018
  st.session_state['settings_base'] = False
2019
- prior_frame = st.session_state['export_base'].copy()
2020
  parsed_frame = exposure_spread(st.session_state['export_base'], st.session_state['exposure_player'], exposure_target, ignore_stacks, remove_teams_exposure, specific_replacements, specific_columns, st.session_state['projections_df'], sport_var, type_var, salary_max, stacking_sports)
2021
 
2022
- if type_var == 'Classic':
2023
- if sport_var == 'CS2' or sport_var == 'LOL':
2024
- parsed_frame['salary'] = parsed_frame.apply(
2025
- lambda row: st.session_state['map_dict']['cpt_salary_map'].get(row.iloc[0], 0) +
2026
- sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row.iloc[1:]),
2027
- axis=1
2028
- )
2029
-
2030
- parsed_frame['median'] = parsed_frame.apply(
2031
- lambda row: st.session_state['map_dict']['cpt_proj_map'].get(row.iloc[0], 0) +
2032
- sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row.iloc[1:]),
2033
- axis=1
2034
- )
2035
-
2036
- # Calculate ownership (CPT uses cpt_own_map, others use own_map)
2037
- parsed_frame['Own'] = parsed_frame.apply(
2038
- lambda row: st.session_state['map_dict']['cpt_own_map'].get(row.iloc[0], 0) +
2039
- sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
2040
- axis=1
2041
- )
2042
 
2043
- elif sport_var != 'CS2' and sport_var != 'LOL':
2044
- parsed_frame['salary'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row), axis=1)
2045
- parsed_frame['median'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row), axis=1)
2046
- parsed_frame['Own'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row), axis=1)
2047
- if 'stack_dict' in st.session_state:
2048
- team_dict = dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team']))
2049
- if sport_var == 'LOL':
2050
- parsed_frame['Stack'] = parsed_frame.apply(
2051
- lambda row: Counter(
2052
- team_dict.get(player, '') for player in row
2053
- if team_dict.get(player, '') != ''
2054
- ).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row) else '',
2055
- axis=1
2056
- )
2057
- parsed_frame['Size'] = parsed_frame.apply(
2058
- lambda row: Counter(
2059
- team_dict.get(player, '') for player in row
2060
- if team_dict.get(player, '') != ''
2061
- ).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row) else 0,
2062
- axis=1
2063
- )
2064
- else:
2065
- parsed_frame['Stack'] = parsed_frame.apply(
2066
- lambda row: Counter(
2067
- team_dict.get(player, '') for player in row[2:]
2068
- if team_dict.get(player, '') != ''
2069
- ).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row[2:]) else '',
2070
- axis=1
2071
- )
2072
- parsed_frame['Size'] = parsed_frame.apply(
2073
- lambda row: Counter(
2074
- team_dict.get(player, '') for player in row[2:]
2075
- if team_dict.get(player, '') != ''
2076
- ).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row[2:]) else 0,
2077
- axis=1
2078
- )
2079
- elif type_var == 'Showdown':
2080
- if sport_var == 'GOLF':
2081
-
2082
- parsed_frame['salary'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row), axis=1)
2083
- parsed_frame['median'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row), axis=1)
2084
- parsed_frame['Own'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row), axis=1)
2085
- else:
2086
- parsed_frame['salary'] = parsed_frame.apply(
2087
- lambda row: st.session_state['map_dict']['cpt_salary_map'].get(row.iloc[0], 0) +
2088
- sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row.iloc[1:]),
2089
- axis=1
2090
- )
2091
-
2092
- # Calculate median (CPT uses cpt_proj_map, others use proj_map)
2093
- parsed_frame['median'] = parsed_frame.apply(
2094
- lambda row: st.session_state['map_dict']['cpt_proj_map'].get(row.iloc[0], 0) +
2095
- sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row.iloc[1:]),
2096
- axis=1
2097
- )
2098
-
2099
- # Calculate ownership (CPT uses cpt_own_map, others use own_map)
2100
- parsed_frame['Own'] = parsed_frame.apply(
2101
- lambda row: st.session_state['map_dict']['cpt_own_map'].get(row.iloc[0], 0) +
2102
- sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
2103
- axis=1
2104
- )
2105
  st.session_state['export_base'] = parsed_frame.reset_index(drop=True)
2106
- # st.session_state['export_base']['Own'] = st.session_state['export_base']['Own'].astype('float32')
2107
- st.session_state['export_base']['median'] = st.session_state['export_base']['median'].astype('float32')
2108
- st.session_state['export_base']['salary'] = st.session_state['export_base']['salary'].astype('uint16')
2109
 
2110
  # st.session_state['export_base'] = predict_dupes(st.session_state['export_base'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
2111
  st.session_state['export_base'] = reassess_edge(st.session_state['export_base'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var, salary_max)
 
204
 
205
  return base_mappings
206
 
207
+ def calculate_salary_vectorized(df, player_columns, map_dict, type_var, sport_var):
208
+ """Vectorized salary calculation to replace expensive apply operations"""
209
+ if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
210
+ # Captain + flex calculations
211
+ cpt_salaries = df.iloc[:, 0].map(map_dict['cpt_salary_map']).fillna(0)
212
+ flex_salaries = sum(df.iloc[:, i].map(map_dict['salary_map']).fillna(0) for i in range(1, len(player_columns)))
213
+ return cpt_salaries + flex_salaries
214
+ elif type_var == 'Showdown':
215
+ if sport_var == 'GOLF':
216
+ return sum(df[col].map(map_dict['salary_map']).fillna(0) for col in player_columns)
217
+ else:
218
+ cpt_salaries = df.iloc[:, 0].map(map_dict['cpt_salary_map']).fillna(0)
219
+ flex_salaries = sum(df.iloc[:, i].map(map_dict['salary_map']).fillna(0) for i in range(1, len(player_columns)))
220
+ return cpt_salaries + flex_salaries
221
+ else:
222
+ # Classic non-CS2/LOL
223
+ return sum(df[col].map(map_dict['salary_map']).fillna(0) for col in player_columns)
224
+
225
+ def calculate_median_vectorized(df, player_columns, map_dict, type_var, sport_var):
226
+ """Vectorized median calculation to replace expensive apply operations"""
227
+ if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
228
+ cpt_medians = df.iloc[:, 0].map(map_dict['cpt_proj_map']).fillna(0)
229
+ flex_medians = sum(df.iloc[:, i].map(map_dict['proj_map']).fillna(0) for i in range(1, len(player_columns)))
230
+ return cpt_medians + flex_medians
231
+ elif type_var == 'Showdown':
232
+ if sport_var == 'GOLF':
233
+ return sum(df[col].map(map_dict['proj_map']).fillna(0) for col in player_columns)
234
+ else:
235
+ cpt_medians = df.iloc[:, 0].map(map_dict['cpt_proj_map']).fillna(0)
236
+ flex_medians = sum(df.iloc[:, i].map(map_dict['proj_map']).fillna(0) for i in range(1, len(player_columns)))
237
+ return cpt_medians + flex_medians
238
+ else:
239
+ return sum(df[col].map(map_dict['proj_map']).fillna(0) for col in player_columns)
240
+
241
+ def calculate_ownership_vectorized(df, player_columns, map_dict, type_var, sport_var):
242
+ """Vectorized ownership calculation to replace expensive apply operations"""
243
+ if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
244
+ cpt_own = df.iloc[:, 0].map(map_dict['cpt_own_map']).fillna(0)
245
+ flex_own = sum(df.iloc[:, i].map(map_dict['own_map']).fillna(0) for i in range(1, len(player_columns)))
246
+ return cpt_own + flex_own
247
+ elif type_var == 'Showdown':
248
+ if sport_var == 'GOLF':
249
+ return sum(df[col].map(map_dict['own_map']).fillna(0) for col in player_columns)
250
+ else:
251
+ cpt_own = df.iloc[:, 0].map(map_dict['cpt_own_map']).fillna(0)
252
+ flex_own = sum(df.iloc[:, i].map(map_dict['own_map']).fillna(0) for i in range(1, len(player_columns)))
253
+ return cpt_own + flex_own
254
+ else:
255
+ return sum(df[col].map(map_dict['own_map']).fillna(0) for col in player_columns)
256
+
257
+ def calculate_lineup_metrics(df, player_columns, map_dict, type_var, sport_var, projections_df=None):
258
+ """Centralized function to calculate salary, median, and ownership efficiently"""
259
+ df = df.copy() # Work on a copy to avoid modifying original
260
+
261
+ # Vectorized calculations
262
+ df['salary'] = calculate_salary_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
263
+ df['median'] = calculate_median_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
264
+ df['Own'] = calculate_ownership_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
265
+
266
+ # Handle stacking for specific sports
267
+ if projections_df is not None and 'team' in projections_df.columns:
268
+ team_dict = dict(zip(projections_df['player_names'], projections_df['team']))
269
+
270
+ if type_var == 'Classic' and sport_var not in ['CS2', 'LOL', 'GOLF']:
271
+ # Stack calculation for classic sports (excluding first 2 columns for pitchers)
272
+ stack_columns = player_columns[2:] if len(player_columns) > 2 else player_columns
273
+ df['Stack'] = df[stack_columns].apply(
274
+ lambda row: Counter(
275
+ team_dict.get(player, '') for player in row
276
+ if team_dict.get(player, '') != ''
277
+ ).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row) else '',
278
+ axis=1
279
+ )
280
+ df['Size'] = df[stack_columns].apply(
281
+ lambda row: Counter(
282
+ team_dict.get(player, '') for player in row
283
+ if team_dict.get(player, '') != ''
284
+ ).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row) else 0,
285
+ axis=1
286
+ )
287
+ elif sport_var == 'LOL':
288
+ # LOL uses all player columns for stacking
289
+ df['Stack'] = df[player_columns].apply(
290
+ lambda row: Counter(
291
+ team_dict.get(player, '') for player in row
292
+ if team_dict.get(player, '') != ''
293
+ ).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row) else '',
294
+ axis=1
295
+ )
296
+ df['Size'] = df[player_columns].apply(
297
+ lambda row: Counter(
298
+ team_dict.get(player, '') for player in row
299
+ if team_dict.get(player, '') != ''
300
+ ).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row) else 0,
301
+ axis=1
302
+ )
303
+
304
+ # Optimize data types
305
+ df['salary'] = df['salary'].astype('uint16')
306
+ df['median'] = df['median'].astype('float32')
307
+
308
+ return df
309
+
310
+ def create_team_filter_mask(df, player_columns, team_map, teams_to_filter, focus_type='Overall', type_var='Classic'):
311
+ """Create boolean mask for team filtering without creating intermediate DataFrames"""
312
+ mask = pd.Series(False, index=df.index)
313
+
314
+ if type_var == 'Showdown' and focus_type != 'Overall':
315
+ if focus_type == 'CPT':
316
+ focus_columns = [player_columns[0]] # First column only
317
+ elif focus_type == 'FLEX':
318
+ focus_columns = player_columns[1:] # All except first
319
+ else:
320
+ focus_columns = player_columns
321
+ else:
322
+ # For Classic or Overall focus, use appropriate columns
323
+ if type_var == 'Classic':
324
+ focus_columns = [col for col in player_columns if col not in ['SP1', 'SP2']] # Exclude pitchers
325
+ else:
326
+ focus_columns = player_columns
327
+
328
+ for team in teams_to_filter:
329
+ for col in focus_columns:
330
+ team_mask = df[col].map(team_map) == team
331
+ mask |= team_mask
332
+
333
+ return mask
334
+
335
  def create_position_export_dict(column_name, csv_file, site_var, type_var, sport_var):
336
  try:
337
  # Remove any numbers from the column name to get the position
 
1271
  st.session_state['working_frame'] = pd.read_parquet(io.BytesIO(st.session_state['origin_portfolio']))
1272
  st.session_state['player_columns'] = [col for col in st.session_state['working_frame'].columns if col not in excluded_cols]
1273
 
1274
+ # Use vectorized calculation function
1275
+ st.session_state['working_frame'] = calculate_lineup_metrics(
1276
+ st.session_state['working_frame'],
1277
+ st.session_state['player_columns'],
1278
+ st.session_state['map_dict'],
1279
+ type_var,
1280
+ sport_var,
1281
+ st.session_state['projections_df'] if 'stack_dict' in st.session_state else None
1282
+ )
1283
+
1284
+ # Map existing stack/size data if available
1285
+ if 'stack_dict' in st.session_state:
1286
+ st.session_state['working_frame']['Stack'] = st.session_state['working_frame'].index.map(st.session_state['stack_dict'])
1287
+ st.session_state['working_frame']['Size'] = st.session_state['working_frame'].index.map(st.session_state['size_dict'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1288
 
1289
  st.session_state['base_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var, salary_max)
1290
  st.session_state['working_frame'] = st.session_state['base_frame'].copy()
 
1347
 
1348
  if reg_submitted:
1349
  st.session_state['settings_base'] = False
 
1350
 
1351
+ # Use index-based filtering instead of copying DataFrame
1352
+ filter_mask = (
1353
+ (st.session_state['working_frame']['salary'] >= min_salary) &
1354
+ (st.session_state['working_frame']['salary'] <= max_salary) &
1355
+ (st.session_state['working_frame']['median'] >= min_proj) &
1356
+ (st.session_state['working_frame']['median'] <= max_proj) &
1357
+ (st.session_state['working_frame']['Own'] >= min_own) &
1358
+ (st.session_state['working_frame']['Own'] <= max_own) &
1359
+ (st.session_state['working_frame']['Dupes'] >= min_dupes) &
1360
+ (st.session_state['working_frame']['Dupes'] <= max_dupes) &
1361
+ (st.session_state['working_frame']['Finish_percentile'] >= min_finish_percentile) &
1362
+ (st.session_state['working_frame']['Finish_percentile'] <= max_finish_percentile) &
1363
+ (st.session_state['working_frame']['Lineup Edge'] >= min_lineup_edge) &
1364
+ (st.session_state['working_frame']['Lineup Edge'] <= max_lineup_edge)
1365
+ )
1366
 
1367
 
1368
+ # Handle stack filtering
1369
+ if 'Stack' in st.session_state['working_frame'].columns:
1370
+ if stack_include_toggle != 'All Stacks':
1371
+ filter_mask &= st.session_state['working_frame']['Stack'].isin(stack_selections)
 
1372
  if stack_remove_toggle == 'Yes':
1373
+ filter_mask &= ~st.session_state['working_frame']['Stack'].isin(stack_remove)
1374
+
1375
+ # Apply all filters at once
1376
+ st.session_state['working_frame'] = st.session_state['working_frame'][filter_mask].sort_values(by='median', ascending=False).reset_index(drop=True)
1377
  st.session_state['export_merge'] = st.session_state['working_frame'].copy()
1378
  if exp_submitted:
1379
  st.session_state['settings_base'] = False
1380
+
1381
+ # Use index-based filtering for export_base
1382
+ export_filter_mask = (
1383
+ (st.session_state['export_base']['salary'] >= min_salary) &
1384
+ (st.session_state['export_base']['salary'] <= max_salary) &
1385
+ (st.session_state['export_base']['median'] >= min_proj) &
1386
+ (st.session_state['export_base']['median'] <= max_proj) &
1387
+ (st.session_state['export_base']['Own'] >= min_own) &
1388
+ (st.session_state['export_base']['Own'] <= max_own) &
1389
+ (st.session_state['export_base']['Dupes'] >= min_dupes) &
1390
+ (st.session_state['export_base']['Dupes'] <= max_dupes) &
1391
+ (st.session_state['export_base']['Finish_percentile'] >= min_finish_percentile) &
1392
+ (st.session_state['export_base']['Finish_percentile'] <= max_finish_percentile) &
1393
+ (st.session_state['export_base']['Lineup Edge'] >= min_lineup_edge) &
1394
+ (st.session_state['export_base']['Lineup Edge'] <= max_lineup_edge)
1395
+ )
1396
+
1397
+ if 'Stack' in st.session_state['export_base'].columns:
1398
+ if stack_include_toggle != 'All Stacks':
1399
+ export_filter_mask &= st.session_state['export_base']['Stack'].isin(stack_selections)
1400
  if stack_remove_toggle == 'Yes':
1401
+ export_filter_mask &= ~st.session_state['export_base']['Stack'].isin(stack_remove)
1402
+
1403
+ st.session_state['export_base'] = st.session_state['export_base'][export_filter_mask].sort_values(by='median', ascending=False).reset_index(drop=True)
 
1404
  st.session_state['export_merge'] = st.session_state['export_base'].copy()
1405
 
1406
  with st.expander('Micro Filter Options'):
 
2011
  exp_submitted = st.form_submit_button("Export")
2012
  if reg_submitted:
2013
  st.session_state['settings_base'] = False
 
2014
  parsed_frame = exposure_spread(st.session_state['working_frame'], st.session_state['exposure_player'], exposure_target, ignore_stacks, remove_teams_exposure, specific_replacements, specific_columns, st.session_state['projections_df'], sport_var, type_var, salary_max, stacking_sports)
2015
 
2016
+ # Use consolidated calculation function
2017
+ parsed_frame = calculate_lineup_metrics(
2018
+ parsed_frame,
2019
+ st.session_state['player_columns'],
2020
+ st.session_state['map_dict'],
2021
+ type_var,
2022
+ sport_var,
2023
+ st.session_state['projections_df']
2024
+ )
2025
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026
  st.session_state['working_frame'] = parsed_frame.reset_index(drop=True)
 
 
 
2027
 
2028
  # st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
2029
  st.session_state['working_frame'] = reassess_edge(st.session_state['working_frame'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var, salary_max)
2030
  st.session_state['export_merge'] = st.session_state['working_frame'].copy()
2031
  elif exp_submitted:
2032
  st.session_state['settings_base'] = False
 
2033
  parsed_frame = exposure_spread(st.session_state['export_base'], st.session_state['exposure_player'], exposure_target, ignore_stacks, remove_teams_exposure, specific_replacements, specific_columns, st.session_state['projections_df'], sport_var, type_var, salary_max, stacking_sports)
2034
 
2035
+ # Use consolidated calculation function for export
2036
+ parsed_frame = calculate_lineup_metrics(
2037
+ parsed_frame,
2038
+ st.session_state['player_columns'],
2039
+ st.session_state['map_dict'],
2040
+ type_var,
2041
+ sport_var,
2042
+ st.session_state['projections_df']
2043
+ )
 
 
 
 
 
 
 
 
 
 
 
2044
 
2045
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2046
  st.session_state['export_base'] = parsed_frame.reset_index(drop=True)
 
 
 
2047
 
2048
  # st.session_state['export_base'] = predict_dupes(st.session_state['export_base'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
2049
  st.session_state['export_base'] = reassess_edge(st.session_state['export_base'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var, salary_max)
global_func/reassess_edge.py CHANGED
@@ -23,13 +23,20 @@ def reassess_edge(modified_frame: pd.DataFrame, base_frame: pd.DataFrame, maps_d
23
  # Store the number of rows in the modified frame
24
  num_modified_rows = len(modified_frame)
25
 
26
- # Concatenate the modified frame with the base frame
27
- combined_frame = pd.concat([modified_frame.drop(columns=['Dupes', 'Finish_percentile', 'Lineup Edge', 'Win%', 'Weighted Own', 'Geomean', 'Diversity']), base_frame.drop(columns=['Dupes', 'Finish_percentile', 'Lineup Edge', 'Win%', 'Weighted Own', 'Geomean', 'Diversity'])], ignore_index=True)
 
 
 
 
 
 
 
28
 
29
  # Run predict_dupes on the combined frame
30
  updated_combined_frame = predict_dupes(combined_frame, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var, max_salary)
31
 
32
- # Extract the first N rows (which correspond to our modified frame)
33
- result_frame = updated_combined_frame.head(num_modified_rows).copy()
34
 
35
  return result_frame
 
23
  # Store the number of rows in the modified frame
24
  num_modified_rows = len(modified_frame)
25
 
26
+ # Define columns to drop for memory efficiency
27
+ cols_to_drop = ['Dupes', 'Finish_percentile', 'Lineup Edge', 'Win%', 'Weighted Own', 'Geomean', 'Diversity']
28
+
29
+ # More memory-efficient concatenation
30
+ modified_clean = modified_frame.drop(columns=[col for col in cols_to_drop if col in modified_frame.columns])
31
+ base_clean = base_frame.drop(columns=[col for col in cols_to_drop if col in base_frame.columns])
32
+
33
+ # Use ignore_index=True and avoid unnecessary copies
34
+ combined_frame = pd.concat([modified_clean, base_clean], ignore_index=True, copy=False)
35
 
36
  # Run predict_dupes on the combined frame
37
  updated_combined_frame = predict_dupes(combined_frame, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var, max_salary)
38
 
39
+ # Extract the first N rows (which correspond to our modified frame) - use iloc for efficiency
40
+ result_frame = updated_combined_frame.iloc[:num_modified_rows].copy()
41
 
42
  return result_frame
global_func/stratification_function.py CHANGED
@@ -5,15 +5,15 @@ def stratification_function(portfolio: pd.DataFrame, lineup_target: int, exclude
5
  excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity']
6
  player_columns = [col for col in portfolio.columns if col not in excluded_cols]
7
 
8
- concat_portfolio = portfolio.copy()
9
  if sorting_choice == 'Finish_percentile':
10
- concat_portfolio = concat_portfolio.sort_values(by=sorting_choice, ascending=True).reset_index(drop=True)
11
  else:
12
- concat_portfolio = concat_portfolio.sort_values(by=sorting_choice, ascending=False).reset_index(drop=True)
13
-
14
- # Calculate target similarity scores for linear progression
15
- similarity_floor = concat_portfolio[sorting_choice].quantile(low_threshold / 100)
16
- similarity_ceiling = concat_portfolio[sorting_choice].quantile(high_threshold / 100)
17
 
18
  # Create evenly spaced target similarity scores
19
  target_similarities = np.linspace(similarity_floor, similarity_ceiling, lineup_target)
@@ -22,11 +22,9 @@ def stratification_function(portfolio: pd.DataFrame, lineup_target: int, exclude
22
  selected_indices = []
23
  for target_sim in target_similarities:
24
  # Find the index of the closest similarity score
25
- closest_idx = (concat_portfolio[sorting_choice] - target_sim).abs().idxmin()
26
  if closest_idx not in selected_indices: # Avoid duplicates
27
  selected_indices.append(closest_idx)
28
 
29
- # Select the lineups
30
- concat_portfolio = concat_portfolio.loc[selected_indices].reset_index(drop=True)
31
-
32
- return concat_portfolio.sort_values(by=sorting_choice, ascending=False)
 
5
  excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity']
6
  player_columns = [col for col in portfolio.columns if col not in excluded_cols]
7
 
8
+ # Work with indices instead of copying entire DataFrame
9
  if sorting_choice == 'Finish_percentile':
10
+ sorted_indices = portfolio[sorting_choice].sort_values(ascending=True).index
11
  else:
12
+ sorted_indices = portfolio[sorting_choice].sort_values(ascending=False).index
13
+
14
+ # Calculate quantiles without copying
15
+ similarity_floor = portfolio[sorting_choice].quantile(low_threshold / 100)
16
+ similarity_ceiling = portfolio[sorting_choice].quantile(high_threshold / 100)
17
 
18
  # Create evenly spaced target similarity scores
19
  target_similarities = np.linspace(similarity_floor, similarity_ceiling, lineup_target)
 
22
  selected_indices = []
23
  for target_sim in target_similarities:
24
  # Find the index of the closest similarity score
25
+ closest_idx = (portfolio[sorting_choice] - target_sim).abs().idxmin()
26
  if closest_idx not in selected_indices: # Avoid duplicates
27
  selected_indices.append(closest_idx)
28
 
29
+ # Return view instead of copy
30
+ return portfolio.loc[selected_indices].sort_values(by=sorting_choice, ascending=False)