James McCool
commited on
Commit
·
7eef51a
1
Parent(s):
7df001f
Implement vectorized calculations for salary, median, and ownership in app.py to enhance performance and memory efficiency. Refactor reassess_edge and stratification_function to minimize DataFrame copies and improve memory management. Update filtering logic to use boolean masks for better efficiency.
Browse files- app.py +208 -270
- global_func/reassess_edge.py +11 -4
- global_func/stratification_function.py +10 -12
app.py
CHANGED
|
@@ -204,6 +204,134 @@ def create_memory_efficient_mappings(projections_df, site_var, type_var, sport_v
|
|
| 204 |
|
| 205 |
return base_mappings
|
| 206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
def create_position_export_dict(column_name, csv_file, site_var, type_var, sport_var):
|
| 208 |
try:
|
| 209 |
# Remove any numbers from the column name to get the position
|
|
@@ -1143,60 +1271,20 @@ if selected_tab == 'Manage Portfolio':
|
|
| 1143 |
st.session_state['working_frame'] = pd.read_parquet(io.BytesIO(st.session_state['origin_portfolio']))
|
| 1144 |
st.session_state['player_columns'] = [col for col in st.session_state['working_frame'].columns if col not in excluded_cols]
|
| 1145 |
|
| 1146 |
-
|
| 1147 |
-
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
|
| 1160 |
-
)
|
| 1161 |
-
|
| 1162 |
-
# Calculate ownership (CPT uses cpt_own_map, others use own_map)
|
| 1163 |
-
st.session_state['working_frame']['Own'] = st.session_state['working_frame'].apply(
|
| 1164 |
-
lambda row: st.session_state['map_dict']['cpt_own_map'].get(row.iloc[0], 0) +
|
| 1165 |
-
sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
|
| 1166 |
-
axis=1
|
| 1167 |
-
)
|
| 1168 |
-
|
| 1169 |
-
elif sport_var != 'CS2' and sport_var != 'LOL':
|
| 1170 |
-
st.session_state['working_frame']['salary'] = st.session_state['working_frame'].apply(lambda row: sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row), axis=1)
|
| 1171 |
-
st.session_state['working_frame']['median'] = st.session_state['working_frame'].apply(lambda row: sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row), axis=1)
|
| 1172 |
-
st.session_state['working_frame']['Own'] = st.session_state['working_frame'].apply(lambda row: sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row), axis=1)
|
| 1173 |
-
if 'stack_dict' in st.session_state:
|
| 1174 |
-
st.session_state['working_frame']['Stack'] = st.session_state['working_frame'].index.map(st.session_state['stack_dict'])
|
| 1175 |
-
st.session_state['working_frame']['Size'] = st.session_state['working_frame'].index.map(st.session_state['size_dict'])
|
| 1176 |
-
elif type_var == 'Showdown':
|
| 1177 |
-
# Calculate salary (CPT uses cpt_salary_map, others use salary_map)
|
| 1178 |
-
st.session_state['working_frame']['salary'] = st.session_state['working_frame'].apply(
|
| 1179 |
-
lambda row: st.session_state['map_dict']['cpt_salary_map'].get(row.iloc[0], 0) +
|
| 1180 |
-
sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row.iloc[1:]),
|
| 1181 |
-
axis=1
|
| 1182 |
-
)
|
| 1183 |
-
|
| 1184 |
-
# Calculate median (CPT uses cpt_proj_map, others use proj_map)
|
| 1185 |
-
st.session_state['working_frame']['median'] = st.session_state['working_frame'].apply(
|
| 1186 |
-
lambda row: st.session_state['map_dict']['cpt_proj_map'].get(row.iloc[0], 0) +
|
| 1187 |
-
sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row.iloc[1:]),
|
| 1188 |
-
axis=1
|
| 1189 |
-
)
|
| 1190 |
-
|
| 1191 |
-
# Calculate ownership (CPT uses cpt_own_map, others use own_map)
|
| 1192 |
-
st.session_state['working_frame']['Own'] = st.session_state['working_frame'].apply(
|
| 1193 |
-
lambda row: st.session_state['map_dict']['cpt_own_map'].get(row.iloc[0], 0) +
|
| 1194 |
-
sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
|
| 1195 |
-
axis=1
|
| 1196 |
-
)
|
| 1197 |
-
# st.session_state['working_frame']['Own'] = st.session_state['working_frame']['Own'].astype('float32')
|
| 1198 |
-
st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
|
| 1199 |
-
st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
|
| 1200 |
|
| 1201 |
st.session_state['base_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var, salary_max)
|
| 1202 |
st.session_state['working_frame'] = st.session_state['base_frame'].copy()
|
|
@@ -1259,60 +1347,60 @@ if selected_tab == 'Manage Portfolio':
|
|
| 1259 |
|
| 1260 |
if reg_submitted:
|
| 1261 |
st.session_state['settings_base'] = False
|
| 1262 |
-
parsed_frame = st.session_state['working_frame'].copy()
|
| 1263 |
|
| 1264 |
-
|
| 1265 |
-
|
| 1266 |
-
|
| 1267 |
-
|
| 1268 |
-
|
| 1269 |
-
|
| 1270 |
-
|
| 1271 |
-
|
| 1272 |
-
|
| 1273 |
-
|
| 1274 |
-
|
| 1275 |
-
|
|
|
|
|
|
|
|
|
|
| 1276 |
|
| 1277 |
|
| 1278 |
-
|
| 1279 |
-
|
| 1280 |
-
|
| 1281 |
-
|
| 1282 |
-
parsed_frame = parsed_frame[parsed_frame['Stack'].isin(stack_selections)]
|
| 1283 |
if stack_remove_toggle == 'Yes':
|
| 1284 |
-
|
| 1285 |
-
|
| 1286 |
-
|
| 1287 |
-
st.session_state['working_frame'] =
|
| 1288 |
st.session_state['export_merge'] = st.session_state['working_frame'].copy()
|
| 1289 |
if exp_submitted:
|
| 1290 |
st.session_state['settings_base'] = False
|
| 1291 |
-
|
| 1292 |
-
|
| 1293 |
-
|
| 1294 |
-
|
| 1295 |
-
|
| 1296 |
-
|
| 1297 |
-
|
| 1298 |
-
|
| 1299 |
-
|
| 1300 |
-
|
| 1301 |
-
|
| 1302 |
-
|
| 1303 |
-
|
| 1304 |
-
|
| 1305 |
-
|
| 1306 |
-
|
| 1307 |
-
|
| 1308 |
-
|
| 1309 |
-
|
| 1310 |
-
|
| 1311 |
if stack_remove_toggle == 'Yes':
|
| 1312 |
-
|
| 1313 |
-
|
| 1314 |
-
|
| 1315 |
-
st.session_state['export_base'] = parsed_frame.sort_values(by='median', ascending=False).reset_index(drop=True)
|
| 1316 |
st.session_state['export_merge'] = st.session_state['export_base'].copy()
|
| 1317 |
|
| 1318 |
with st.expander('Micro Filter Options'):
|
|
@@ -1923,189 +2011,39 @@ if selected_tab == 'Manage Portfolio':
|
|
| 1923 |
exp_submitted = st.form_submit_button("Export")
|
| 1924 |
if reg_submitted:
|
| 1925 |
st.session_state['settings_base'] = False
|
| 1926 |
-
prior_frame = st.session_state['working_frame'].copy()
|
| 1927 |
parsed_frame = exposure_spread(st.session_state['working_frame'], st.session_state['exposure_player'], exposure_target, ignore_stacks, remove_teams_exposure, specific_replacements, specific_columns, st.session_state['projections_df'], sport_var, type_var, salary_max, stacking_sports)
|
| 1928 |
|
| 1929 |
-
|
| 1930 |
-
|
| 1931 |
-
|
| 1932 |
-
|
| 1933 |
-
|
| 1934 |
-
|
| 1935 |
-
|
| 1936 |
-
|
| 1937 |
-
|
| 1938 |
-
|
| 1939 |
-
parsed_frame['median'] = parsed_frame.apply(
|
| 1940 |
-
lambda row: st.session_state['map_dict']['cpt_proj_map'].get(row.iloc[0], 0) +
|
| 1941 |
-
sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row.iloc[1:]),
|
| 1942 |
-
axis=1
|
| 1943 |
-
)
|
| 1944 |
-
|
| 1945 |
-
# Calculate ownership (CPT uses cpt_own_map, others use own_map)
|
| 1946 |
-
parsed_frame['Own'] = parsed_frame.apply(
|
| 1947 |
-
lambda row: st.session_state['map_dict']['cpt_own_map'].get(row.iloc[0], 0) +
|
| 1948 |
-
sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
|
| 1949 |
-
axis=1
|
| 1950 |
-
)
|
| 1951 |
-
|
| 1952 |
-
elif sport_var != 'CS2' and sport_var != 'LOL':
|
| 1953 |
-
parsed_frame['salary'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row), axis=1)
|
| 1954 |
-
parsed_frame['median'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row), axis=1)
|
| 1955 |
-
parsed_frame['Own'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row), axis=1)
|
| 1956 |
-
if 'stack_dict' in st.session_state:
|
| 1957 |
-
team_dict = dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team']))
|
| 1958 |
-
if sport_var == 'LOL':
|
| 1959 |
-
parsed_frame['Stack'] = parsed_frame.apply(
|
| 1960 |
-
lambda row: Counter(
|
| 1961 |
-
team_dict.get(player, '') for player in row
|
| 1962 |
-
if team_dict.get(player, '') != ''
|
| 1963 |
-
).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row) else '',
|
| 1964 |
-
axis=1
|
| 1965 |
-
)
|
| 1966 |
-
parsed_frame['Size'] = parsed_frame.apply(
|
| 1967 |
-
lambda row: Counter(
|
| 1968 |
-
team_dict.get(player, '') for player in row
|
| 1969 |
-
if team_dict.get(player, '') != ''
|
| 1970 |
-
).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row) else 0,
|
| 1971 |
-
axis=1
|
| 1972 |
-
)
|
| 1973 |
-
else:
|
| 1974 |
-
parsed_frame['Stack'] = parsed_frame.apply(
|
| 1975 |
-
lambda row: Counter(
|
| 1976 |
-
team_dict.get(player, '') for player in row[2:]
|
| 1977 |
-
if team_dict.get(player, '') != ''
|
| 1978 |
-
).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row[2:]) else '',
|
| 1979 |
-
axis=1
|
| 1980 |
-
)
|
| 1981 |
-
parsed_frame['Size'] = parsed_frame.apply(
|
| 1982 |
-
lambda row: Counter(
|
| 1983 |
-
team_dict.get(player, '') for player in row[2:]
|
| 1984 |
-
if team_dict.get(player, '') != ''
|
| 1985 |
-
).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row[2:]) else 0,
|
| 1986 |
-
axis=1
|
| 1987 |
-
)
|
| 1988 |
-
elif type_var == 'Showdown':
|
| 1989 |
-
# Calculate salary (CPT uses cpt_salary_map, others use salary_map)
|
| 1990 |
-
parsed_frame['salary'] = parsed_frame.apply(
|
| 1991 |
-
lambda row: st.session_state['map_dict']['cpt_salary_map'].get(row.iloc[0], 0) +
|
| 1992 |
-
sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row.iloc[1:]),
|
| 1993 |
-
axis=1
|
| 1994 |
-
)
|
| 1995 |
-
|
| 1996 |
-
# Calculate median (CPT uses cpt_proj_map, others use proj_map)
|
| 1997 |
-
parsed_frame['median'] = parsed_frame.apply(
|
| 1998 |
-
lambda row: st.session_state['map_dict']['cpt_proj_map'].get(row.iloc[0], 0) +
|
| 1999 |
-
sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row.iloc[1:]),
|
| 2000 |
-
axis=1
|
| 2001 |
-
)
|
| 2002 |
-
|
| 2003 |
-
# Calculate ownership (CPT uses cpt_own_map, others use own_map)
|
| 2004 |
-
parsed_frame['Own'] = parsed_frame.apply(
|
| 2005 |
-
lambda row: st.session_state['map_dict']['cpt_own_map'].get(row.iloc[0], 0) +
|
| 2006 |
-
sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
|
| 2007 |
-
axis=1
|
| 2008 |
-
)
|
| 2009 |
st.session_state['working_frame'] = parsed_frame.reset_index(drop=True)
|
| 2010 |
-
# st.session_state['working_frame']['Own'] = st.session_state['working_frame']['Own'].astype('float32')
|
| 2011 |
-
st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
|
| 2012 |
-
st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
|
| 2013 |
|
| 2014 |
# st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
| 2015 |
st.session_state['working_frame'] = reassess_edge(st.session_state['working_frame'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var, salary_max)
|
| 2016 |
st.session_state['export_merge'] = st.session_state['working_frame'].copy()
|
| 2017 |
elif exp_submitted:
|
| 2018 |
st.session_state['settings_base'] = False
|
| 2019 |
-
prior_frame = st.session_state['export_base'].copy()
|
| 2020 |
parsed_frame = exposure_spread(st.session_state['export_base'], st.session_state['exposure_player'], exposure_target, ignore_stacks, remove_teams_exposure, specific_replacements, specific_columns, st.session_state['projections_df'], sport_var, type_var, salary_max, stacking_sports)
|
| 2021 |
|
| 2022 |
-
|
| 2023 |
-
|
| 2024 |
-
|
| 2025 |
-
|
| 2026 |
-
|
| 2027 |
-
|
| 2028 |
-
|
| 2029 |
-
|
| 2030 |
-
|
| 2031 |
-
lambda row: st.session_state['map_dict']['cpt_proj_map'].get(row.iloc[0], 0) +
|
| 2032 |
-
sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row.iloc[1:]),
|
| 2033 |
-
axis=1
|
| 2034 |
-
)
|
| 2035 |
-
|
| 2036 |
-
# Calculate ownership (CPT uses cpt_own_map, others use own_map)
|
| 2037 |
-
parsed_frame['Own'] = parsed_frame.apply(
|
| 2038 |
-
lambda row: st.session_state['map_dict']['cpt_own_map'].get(row.iloc[0], 0) +
|
| 2039 |
-
sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
|
| 2040 |
-
axis=1
|
| 2041 |
-
)
|
| 2042 |
|
| 2043 |
-
|
| 2044 |
-
parsed_frame['salary'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row), axis=1)
|
| 2045 |
-
parsed_frame['median'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row), axis=1)
|
| 2046 |
-
parsed_frame['Own'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row), axis=1)
|
| 2047 |
-
if 'stack_dict' in st.session_state:
|
| 2048 |
-
team_dict = dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team']))
|
| 2049 |
-
if sport_var == 'LOL':
|
| 2050 |
-
parsed_frame['Stack'] = parsed_frame.apply(
|
| 2051 |
-
lambda row: Counter(
|
| 2052 |
-
team_dict.get(player, '') for player in row
|
| 2053 |
-
if team_dict.get(player, '') != ''
|
| 2054 |
-
).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row) else '',
|
| 2055 |
-
axis=1
|
| 2056 |
-
)
|
| 2057 |
-
parsed_frame['Size'] = parsed_frame.apply(
|
| 2058 |
-
lambda row: Counter(
|
| 2059 |
-
team_dict.get(player, '') for player in row
|
| 2060 |
-
if team_dict.get(player, '') != ''
|
| 2061 |
-
).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row) else 0,
|
| 2062 |
-
axis=1
|
| 2063 |
-
)
|
| 2064 |
-
else:
|
| 2065 |
-
parsed_frame['Stack'] = parsed_frame.apply(
|
| 2066 |
-
lambda row: Counter(
|
| 2067 |
-
team_dict.get(player, '') for player in row[2:]
|
| 2068 |
-
if team_dict.get(player, '') != ''
|
| 2069 |
-
).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row[2:]) else '',
|
| 2070 |
-
axis=1
|
| 2071 |
-
)
|
| 2072 |
-
parsed_frame['Size'] = parsed_frame.apply(
|
| 2073 |
-
lambda row: Counter(
|
| 2074 |
-
team_dict.get(player, '') for player in row[2:]
|
| 2075 |
-
if team_dict.get(player, '') != ''
|
| 2076 |
-
).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row[2:]) else 0,
|
| 2077 |
-
axis=1
|
| 2078 |
-
)
|
| 2079 |
-
elif type_var == 'Showdown':
|
| 2080 |
-
if sport_var == 'GOLF':
|
| 2081 |
-
|
| 2082 |
-
parsed_frame['salary'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row), axis=1)
|
| 2083 |
-
parsed_frame['median'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row), axis=1)
|
| 2084 |
-
parsed_frame['Own'] = parsed_frame.apply(lambda row: sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row), axis=1)
|
| 2085 |
-
else:
|
| 2086 |
-
parsed_frame['salary'] = parsed_frame.apply(
|
| 2087 |
-
lambda row: st.session_state['map_dict']['cpt_salary_map'].get(row.iloc[0], 0) +
|
| 2088 |
-
sum(st.session_state['map_dict']['salary_map'].get(player, 0) for player in row.iloc[1:]),
|
| 2089 |
-
axis=1
|
| 2090 |
-
)
|
| 2091 |
-
|
| 2092 |
-
# Calculate median (CPT uses cpt_proj_map, others use proj_map)
|
| 2093 |
-
parsed_frame['median'] = parsed_frame.apply(
|
| 2094 |
-
lambda row: st.session_state['map_dict']['cpt_proj_map'].get(row.iloc[0], 0) +
|
| 2095 |
-
sum(st.session_state['map_dict']['proj_map'].get(player, 0) for player in row.iloc[1:]),
|
| 2096 |
-
axis=1
|
| 2097 |
-
)
|
| 2098 |
-
|
| 2099 |
-
# Calculate ownership (CPT uses cpt_own_map, others use own_map)
|
| 2100 |
-
parsed_frame['Own'] = parsed_frame.apply(
|
| 2101 |
-
lambda row: st.session_state['map_dict']['cpt_own_map'].get(row.iloc[0], 0) +
|
| 2102 |
-
sum(st.session_state['map_dict']['own_map'].get(player, 0) for player in row.iloc[1:]),
|
| 2103 |
-
axis=1
|
| 2104 |
-
)
|
| 2105 |
st.session_state['export_base'] = parsed_frame.reset_index(drop=True)
|
| 2106 |
-
# st.session_state['export_base']['Own'] = st.session_state['export_base']['Own'].astype('float32')
|
| 2107 |
-
st.session_state['export_base']['median'] = st.session_state['export_base']['median'].astype('float32')
|
| 2108 |
-
st.session_state['export_base']['salary'] = st.session_state['export_base']['salary'].astype('uint16')
|
| 2109 |
|
| 2110 |
# st.session_state['export_base'] = predict_dupes(st.session_state['export_base'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
| 2111 |
st.session_state['export_base'] = reassess_edge(st.session_state['export_base'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var, salary_max)
|
|
|
|
| 204 |
|
| 205 |
return base_mappings
|
| 206 |
|
| 207 |
+
def calculate_salary_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
| 208 |
+
"""Vectorized salary calculation to replace expensive apply operations"""
|
| 209 |
+
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
| 210 |
+
# Captain + flex calculations
|
| 211 |
+
cpt_salaries = df.iloc[:, 0].map(map_dict['cpt_salary_map']).fillna(0)
|
| 212 |
+
flex_salaries = sum(df.iloc[:, i].map(map_dict['salary_map']).fillna(0) for i in range(1, len(player_columns)))
|
| 213 |
+
return cpt_salaries + flex_salaries
|
| 214 |
+
elif type_var == 'Showdown':
|
| 215 |
+
if sport_var == 'GOLF':
|
| 216 |
+
return sum(df[col].map(map_dict['salary_map']).fillna(0) for col in player_columns)
|
| 217 |
+
else:
|
| 218 |
+
cpt_salaries = df.iloc[:, 0].map(map_dict['cpt_salary_map']).fillna(0)
|
| 219 |
+
flex_salaries = sum(df.iloc[:, i].map(map_dict['salary_map']).fillna(0) for i in range(1, len(player_columns)))
|
| 220 |
+
return cpt_salaries + flex_salaries
|
| 221 |
+
else:
|
| 222 |
+
# Classic non-CS2/LOL
|
| 223 |
+
return sum(df[col].map(map_dict['salary_map']).fillna(0) for col in player_columns)
|
| 224 |
+
|
| 225 |
+
def calculate_median_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
| 226 |
+
"""Vectorized median calculation to replace expensive apply operations"""
|
| 227 |
+
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
| 228 |
+
cpt_medians = df.iloc[:, 0].map(map_dict['cpt_proj_map']).fillna(0)
|
| 229 |
+
flex_medians = sum(df.iloc[:, i].map(map_dict['proj_map']).fillna(0) for i in range(1, len(player_columns)))
|
| 230 |
+
return cpt_medians + flex_medians
|
| 231 |
+
elif type_var == 'Showdown':
|
| 232 |
+
if sport_var == 'GOLF':
|
| 233 |
+
return sum(df[col].map(map_dict['proj_map']).fillna(0) for col in player_columns)
|
| 234 |
+
else:
|
| 235 |
+
cpt_medians = df.iloc[:, 0].map(map_dict['cpt_proj_map']).fillna(0)
|
| 236 |
+
flex_medians = sum(df.iloc[:, i].map(map_dict['proj_map']).fillna(0) for i in range(1, len(player_columns)))
|
| 237 |
+
return cpt_medians + flex_medians
|
| 238 |
+
else:
|
| 239 |
+
return sum(df[col].map(map_dict['proj_map']).fillna(0) for col in player_columns)
|
| 240 |
+
|
| 241 |
+
def calculate_ownership_vectorized(df, player_columns, map_dict, type_var, sport_var):
|
| 242 |
+
"""Vectorized ownership calculation to replace expensive apply operations"""
|
| 243 |
+
if type_var == 'Classic' and (sport_var == 'CS2' or sport_var == 'LOL'):
|
| 244 |
+
cpt_own = df.iloc[:, 0].map(map_dict['cpt_own_map']).fillna(0)
|
| 245 |
+
flex_own = sum(df.iloc[:, i].map(map_dict['own_map']).fillna(0) for i in range(1, len(player_columns)))
|
| 246 |
+
return cpt_own + flex_own
|
| 247 |
+
elif type_var == 'Showdown':
|
| 248 |
+
if sport_var == 'GOLF':
|
| 249 |
+
return sum(df[col].map(map_dict['own_map']).fillna(0) for col in player_columns)
|
| 250 |
+
else:
|
| 251 |
+
cpt_own = df.iloc[:, 0].map(map_dict['cpt_own_map']).fillna(0)
|
| 252 |
+
flex_own = sum(df.iloc[:, i].map(map_dict['own_map']).fillna(0) for i in range(1, len(player_columns)))
|
| 253 |
+
return cpt_own + flex_own
|
| 254 |
+
else:
|
| 255 |
+
return sum(df[col].map(map_dict['own_map']).fillna(0) for col in player_columns)
|
| 256 |
+
|
| 257 |
+
def calculate_lineup_metrics(df, player_columns, map_dict, type_var, sport_var, projections_df=None):
|
| 258 |
+
"""Centralized function to calculate salary, median, and ownership efficiently"""
|
| 259 |
+
df = df.copy() # Work on a copy to avoid modifying original
|
| 260 |
+
|
| 261 |
+
# Vectorized calculations
|
| 262 |
+
df['salary'] = calculate_salary_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
|
| 263 |
+
df['median'] = calculate_median_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
|
| 264 |
+
df['Own'] = calculate_ownership_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
|
| 265 |
+
|
| 266 |
+
# Handle stacking for specific sports
|
| 267 |
+
if projections_df is not None and 'team' in projections_df.columns:
|
| 268 |
+
team_dict = dict(zip(projections_df['player_names'], projections_df['team']))
|
| 269 |
+
|
| 270 |
+
if type_var == 'Classic' and sport_var not in ['CS2', 'LOL', 'GOLF']:
|
| 271 |
+
# Stack calculation for classic sports (excluding first 2 columns for pitchers)
|
| 272 |
+
stack_columns = player_columns[2:] if len(player_columns) > 2 else player_columns
|
| 273 |
+
df['Stack'] = df[stack_columns].apply(
|
| 274 |
+
lambda row: Counter(
|
| 275 |
+
team_dict.get(player, '') for player in row
|
| 276 |
+
if team_dict.get(player, '') != ''
|
| 277 |
+
).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row) else '',
|
| 278 |
+
axis=1
|
| 279 |
+
)
|
| 280 |
+
df['Size'] = df[stack_columns].apply(
|
| 281 |
+
lambda row: Counter(
|
| 282 |
+
team_dict.get(player, '') for player in row
|
| 283 |
+
if team_dict.get(player, '') != ''
|
| 284 |
+
).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row) else 0,
|
| 285 |
+
axis=1
|
| 286 |
+
)
|
| 287 |
+
elif sport_var == 'LOL':
|
| 288 |
+
# LOL uses all player columns for stacking
|
| 289 |
+
df['Stack'] = df[player_columns].apply(
|
| 290 |
+
lambda row: Counter(
|
| 291 |
+
team_dict.get(player, '') for player in row
|
| 292 |
+
if team_dict.get(player, '') != ''
|
| 293 |
+
).most_common(1)[0][0] if any(team_dict.get(player, '') for player in row) else '',
|
| 294 |
+
axis=1
|
| 295 |
+
)
|
| 296 |
+
df['Size'] = df[player_columns].apply(
|
| 297 |
+
lambda row: Counter(
|
| 298 |
+
team_dict.get(player, '') for player in row
|
| 299 |
+
if team_dict.get(player, '') != ''
|
| 300 |
+
).most_common(1)[0][1] if any(team_dict.get(player, '') for player in row) else 0,
|
| 301 |
+
axis=1
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
# Optimize data types
|
| 305 |
+
df['salary'] = df['salary'].astype('uint16')
|
| 306 |
+
df['median'] = df['median'].astype('float32')
|
| 307 |
+
|
| 308 |
+
return df
|
| 309 |
+
|
| 310 |
+
def create_team_filter_mask(df, player_columns, team_map, teams_to_filter, focus_type='Overall', type_var='Classic'):
|
| 311 |
+
"""Create boolean mask for team filtering without creating intermediate DataFrames"""
|
| 312 |
+
mask = pd.Series(False, index=df.index)
|
| 313 |
+
|
| 314 |
+
if type_var == 'Showdown' and focus_type != 'Overall':
|
| 315 |
+
if focus_type == 'CPT':
|
| 316 |
+
focus_columns = [player_columns[0]] # First column only
|
| 317 |
+
elif focus_type == 'FLEX':
|
| 318 |
+
focus_columns = player_columns[1:] # All except first
|
| 319 |
+
else:
|
| 320 |
+
focus_columns = player_columns
|
| 321 |
+
else:
|
| 322 |
+
# For Classic or Overall focus, use appropriate columns
|
| 323 |
+
if type_var == 'Classic':
|
| 324 |
+
focus_columns = [col for col in player_columns if col not in ['SP1', 'SP2']] # Exclude pitchers
|
| 325 |
+
else:
|
| 326 |
+
focus_columns = player_columns
|
| 327 |
+
|
| 328 |
+
for team in teams_to_filter:
|
| 329 |
+
for col in focus_columns:
|
| 330 |
+
team_mask = df[col].map(team_map) == team
|
| 331 |
+
mask |= team_mask
|
| 332 |
+
|
| 333 |
+
return mask
|
| 334 |
+
|
| 335 |
def create_position_export_dict(column_name, csv_file, site_var, type_var, sport_var):
|
| 336 |
try:
|
| 337 |
# Remove any numbers from the column name to get the position
|
|
|
|
| 1271 |
st.session_state['working_frame'] = pd.read_parquet(io.BytesIO(st.session_state['origin_portfolio']))
|
| 1272 |
st.session_state['player_columns'] = [col for col in st.session_state['working_frame'].columns if col not in excluded_cols]
|
| 1273 |
|
| 1274 |
+
# Use vectorized calculation function
|
| 1275 |
+
st.session_state['working_frame'] = calculate_lineup_metrics(
|
| 1276 |
+
st.session_state['working_frame'],
|
| 1277 |
+
st.session_state['player_columns'],
|
| 1278 |
+
st.session_state['map_dict'],
|
| 1279 |
+
type_var,
|
| 1280 |
+
sport_var,
|
| 1281 |
+
st.session_state['projections_df'] if 'stack_dict' in st.session_state else None
|
| 1282 |
+
)
|
| 1283 |
+
|
| 1284 |
+
# Map existing stack/size data if available
|
| 1285 |
+
if 'stack_dict' in st.session_state:
|
| 1286 |
+
st.session_state['working_frame']['Stack'] = st.session_state['working_frame'].index.map(st.session_state['stack_dict'])
|
| 1287 |
+
st.session_state['working_frame']['Size'] = st.session_state['working_frame'].index.map(st.session_state['size_dict'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1288 |
|
| 1289 |
st.session_state['base_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var, salary_max)
|
| 1290 |
st.session_state['working_frame'] = st.session_state['base_frame'].copy()
|
|
|
|
| 1347 |
|
| 1348 |
if reg_submitted:
|
| 1349 |
st.session_state['settings_base'] = False
|
|
|
|
| 1350 |
|
| 1351 |
+
# Use index-based filtering instead of copying DataFrame
|
| 1352 |
+
filter_mask = (
|
| 1353 |
+
(st.session_state['working_frame']['salary'] >= min_salary) &
|
| 1354 |
+
(st.session_state['working_frame']['salary'] <= max_salary) &
|
| 1355 |
+
(st.session_state['working_frame']['median'] >= min_proj) &
|
| 1356 |
+
(st.session_state['working_frame']['median'] <= max_proj) &
|
| 1357 |
+
(st.session_state['working_frame']['Own'] >= min_own) &
|
| 1358 |
+
(st.session_state['working_frame']['Own'] <= max_own) &
|
| 1359 |
+
(st.session_state['working_frame']['Dupes'] >= min_dupes) &
|
| 1360 |
+
(st.session_state['working_frame']['Dupes'] <= max_dupes) &
|
| 1361 |
+
(st.session_state['working_frame']['Finish_percentile'] >= min_finish_percentile) &
|
| 1362 |
+
(st.session_state['working_frame']['Finish_percentile'] <= max_finish_percentile) &
|
| 1363 |
+
(st.session_state['working_frame']['Lineup Edge'] >= min_lineup_edge) &
|
| 1364 |
+
(st.session_state['working_frame']['Lineup Edge'] <= max_lineup_edge)
|
| 1365 |
+
)
|
| 1366 |
|
| 1367 |
|
| 1368 |
+
# Handle stack filtering
|
| 1369 |
+
if 'Stack' in st.session_state['working_frame'].columns:
|
| 1370 |
+
if stack_include_toggle != 'All Stacks':
|
| 1371 |
+
filter_mask &= st.session_state['working_frame']['Stack'].isin(stack_selections)
|
|
|
|
| 1372 |
if stack_remove_toggle == 'Yes':
|
| 1373 |
+
filter_mask &= ~st.session_state['working_frame']['Stack'].isin(stack_remove)
|
| 1374 |
+
|
| 1375 |
+
# Apply all filters at once
|
| 1376 |
+
st.session_state['working_frame'] = st.session_state['working_frame'][filter_mask].sort_values(by='median', ascending=False).reset_index(drop=True)
|
| 1377 |
st.session_state['export_merge'] = st.session_state['working_frame'].copy()
|
| 1378 |
if exp_submitted:
|
| 1379 |
st.session_state['settings_base'] = False
|
| 1380 |
+
|
| 1381 |
+
# Use index-based filtering for export_base
|
| 1382 |
+
export_filter_mask = (
|
| 1383 |
+
(st.session_state['export_base']['salary'] >= min_salary) &
|
| 1384 |
+
(st.session_state['export_base']['salary'] <= max_salary) &
|
| 1385 |
+
(st.session_state['export_base']['median'] >= min_proj) &
|
| 1386 |
+
(st.session_state['export_base']['median'] <= max_proj) &
|
| 1387 |
+
(st.session_state['export_base']['Own'] >= min_own) &
|
| 1388 |
+
(st.session_state['export_base']['Own'] <= max_own) &
|
| 1389 |
+
(st.session_state['export_base']['Dupes'] >= min_dupes) &
|
| 1390 |
+
(st.session_state['export_base']['Dupes'] <= max_dupes) &
|
| 1391 |
+
(st.session_state['export_base']['Finish_percentile'] >= min_finish_percentile) &
|
| 1392 |
+
(st.session_state['export_base']['Finish_percentile'] <= max_finish_percentile) &
|
| 1393 |
+
(st.session_state['export_base']['Lineup Edge'] >= min_lineup_edge) &
|
| 1394 |
+
(st.session_state['export_base']['Lineup Edge'] <= max_lineup_edge)
|
| 1395 |
+
)
|
| 1396 |
+
|
| 1397 |
+
if 'Stack' in st.session_state['export_base'].columns:
|
| 1398 |
+
if stack_include_toggle != 'All Stacks':
|
| 1399 |
+
export_filter_mask &= st.session_state['export_base']['Stack'].isin(stack_selections)
|
| 1400 |
if stack_remove_toggle == 'Yes':
|
| 1401 |
+
export_filter_mask &= ~st.session_state['export_base']['Stack'].isin(stack_remove)
|
| 1402 |
+
|
| 1403 |
+
st.session_state['export_base'] = st.session_state['export_base'][export_filter_mask].sort_values(by='median', ascending=False).reset_index(drop=True)
|
|
|
|
| 1404 |
st.session_state['export_merge'] = st.session_state['export_base'].copy()
|
| 1405 |
|
| 1406 |
with st.expander('Micro Filter Options'):
|
|
|
|
| 2011 |
exp_submitted = st.form_submit_button("Export")
|
| 2012 |
if reg_submitted:
|
| 2013 |
st.session_state['settings_base'] = False
|
|
|
|
| 2014 |
parsed_frame = exposure_spread(st.session_state['working_frame'], st.session_state['exposure_player'], exposure_target, ignore_stacks, remove_teams_exposure, specific_replacements, specific_columns, st.session_state['projections_df'], sport_var, type_var, salary_max, stacking_sports)
|
| 2015 |
|
| 2016 |
+
# Use consolidated calculation function
|
| 2017 |
+
parsed_frame = calculate_lineup_metrics(
|
| 2018 |
+
parsed_frame,
|
| 2019 |
+
st.session_state['player_columns'],
|
| 2020 |
+
st.session_state['map_dict'],
|
| 2021 |
+
type_var,
|
| 2022 |
+
sport_var,
|
| 2023 |
+
st.session_state['projections_df']
|
| 2024 |
+
)
|
| 2025 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2026 |
st.session_state['working_frame'] = parsed_frame.reset_index(drop=True)
|
|
|
|
|
|
|
|
|
|
| 2027 |
|
| 2028 |
# st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
| 2029 |
st.session_state['working_frame'] = reassess_edge(st.session_state['working_frame'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var, salary_max)
|
| 2030 |
st.session_state['export_merge'] = st.session_state['working_frame'].copy()
|
| 2031 |
elif exp_submitted:
|
| 2032 |
st.session_state['settings_base'] = False
|
|
|
|
| 2033 |
parsed_frame = exposure_spread(st.session_state['export_base'], st.session_state['exposure_player'], exposure_target, ignore_stacks, remove_teams_exposure, specific_replacements, specific_columns, st.session_state['projections_df'], sport_var, type_var, salary_max, stacking_sports)
|
| 2034 |
|
| 2035 |
+
# Use consolidated calculation function for export
|
| 2036 |
+
parsed_frame = calculate_lineup_metrics(
|
| 2037 |
+
parsed_frame,
|
| 2038 |
+
st.session_state['player_columns'],
|
| 2039 |
+
st.session_state['map_dict'],
|
| 2040 |
+
type_var,
|
| 2041 |
+
sport_var,
|
| 2042 |
+
st.session_state['projections_df']
|
| 2043 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2044 |
|
| 2045 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2046 |
st.session_state['export_base'] = parsed_frame.reset_index(drop=True)
|
|
|
|
|
|
|
|
|
|
| 2047 |
|
| 2048 |
# st.session_state['export_base'] = predict_dupes(st.session_state['export_base'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
| 2049 |
st.session_state['export_base'] = reassess_edge(st.session_state['export_base'], st.session_state['base_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var, salary_max)
|
global_func/reassess_edge.py
CHANGED
|
@@ -23,13 +23,20 @@ def reassess_edge(modified_frame: pd.DataFrame, base_frame: pd.DataFrame, maps_d
|
|
| 23 |
# Store the number of rows in the modified frame
|
| 24 |
num_modified_rows = len(modified_frame)
|
| 25 |
|
| 26 |
-
#
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
# Run predict_dupes on the combined frame
|
| 30 |
updated_combined_frame = predict_dupes(combined_frame, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var, max_salary)
|
| 31 |
|
| 32 |
-
# Extract the first N rows (which correspond to our modified frame)
|
| 33 |
-
result_frame = updated_combined_frame.
|
| 34 |
|
| 35 |
return result_frame
|
|
|
|
| 23 |
# Store the number of rows in the modified frame
|
| 24 |
num_modified_rows = len(modified_frame)
|
| 25 |
|
| 26 |
+
# Define columns to drop for memory efficiency
|
| 27 |
+
cols_to_drop = ['Dupes', 'Finish_percentile', 'Lineup Edge', 'Win%', 'Weighted Own', 'Geomean', 'Diversity']
|
| 28 |
+
|
| 29 |
+
# More memory-efficient concatenation
|
| 30 |
+
modified_clean = modified_frame.drop(columns=[col for col in cols_to_drop if col in modified_frame.columns])
|
| 31 |
+
base_clean = base_frame.drop(columns=[col for col in cols_to_drop if col in base_frame.columns])
|
| 32 |
+
|
| 33 |
+
# Use ignore_index=True and avoid unnecessary copies
|
| 34 |
+
combined_frame = pd.concat([modified_clean, base_clean], ignore_index=True, copy=False)
|
| 35 |
|
| 36 |
# Run predict_dupes on the combined frame
|
| 37 |
updated_combined_frame = predict_dupes(combined_frame, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var, max_salary)
|
| 38 |
|
| 39 |
+
# Extract the first N rows (which correspond to our modified frame) - use iloc for efficiency
|
| 40 |
+
result_frame = updated_combined_frame.iloc[:num_modified_rows].copy()
|
| 41 |
|
| 42 |
return result_frame
|
global_func/stratification_function.py
CHANGED
|
@@ -5,15 +5,15 @@ def stratification_function(portfolio: pd.DataFrame, lineup_target: int, exclude
|
|
| 5 |
excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity']
|
| 6 |
player_columns = [col for col in portfolio.columns if col not in excluded_cols]
|
| 7 |
|
| 8 |
-
|
| 9 |
if sorting_choice == 'Finish_percentile':
|
| 10 |
-
|
| 11 |
else:
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
# Calculate
|
| 15 |
-
similarity_floor =
|
| 16 |
-
similarity_ceiling =
|
| 17 |
|
| 18 |
# Create evenly spaced target similarity scores
|
| 19 |
target_similarities = np.linspace(similarity_floor, similarity_ceiling, lineup_target)
|
|
@@ -22,11 +22,9 @@ def stratification_function(portfolio: pd.DataFrame, lineup_target: int, exclude
|
|
| 22 |
selected_indices = []
|
| 23 |
for target_sim in target_similarities:
|
| 24 |
# Find the index of the closest similarity score
|
| 25 |
-
closest_idx = (
|
| 26 |
if closest_idx not in selected_indices: # Avoid duplicates
|
| 27 |
selected_indices.append(closest_idx)
|
| 28 |
|
| 29 |
-
#
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
return concat_portfolio.sort_values(by=sorting_choice, ascending=False)
|
|
|
|
| 5 |
excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity']
|
| 6 |
player_columns = [col for col in portfolio.columns if col not in excluded_cols]
|
| 7 |
|
| 8 |
+
# Work with indices instead of copying entire DataFrame
|
| 9 |
if sorting_choice == 'Finish_percentile':
|
| 10 |
+
sorted_indices = portfolio[sorting_choice].sort_values(ascending=True).index
|
| 11 |
else:
|
| 12 |
+
sorted_indices = portfolio[sorting_choice].sort_values(ascending=False).index
|
| 13 |
+
|
| 14 |
+
# Calculate quantiles without copying
|
| 15 |
+
similarity_floor = portfolio[sorting_choice].quantile(low_threshold / 100)
|
| 16 |
+
similarity_ceiling = portfolio[sorting_choice].quantile(high_threshold / 100)
|
| 17 |
|
| 18 |
# Create evenly spaced target similarity scores
|
| 19 |
target_similarities = np.linspace(similarity_floor, similarity_ceiling, lineup_target)
|
|
|
|
| 22 |
selected_indices = []
|
| 23 |
for target_sim in target_similarities:
|
| 24 |
# Find the index of the closest similarity score
|
| 25 |
+
closest_idx = (portfolio[sorting_choice] - target_sim).abs().idxmin()
|
| 26 |
if closest_idx not in selected_indices: # Avoid duplicates
|
| 27 |
selected_indices.append(closest_idx)
|
| 28 |
|
| 29 |
+
# Return view instead of copy
|
| 30 |
+
return portfolio.loc[selected_indices].sort_values(by=sorting_choice, ascending=False)
|
|
|
|
|
|