James McCool
commited on
Commit
·
3cfe4c4
1
Parent(s):
579a535
Refactor DataFrame optimization in app.py to enhance memory efficiency. Disable categorical conversion for specific columns to avoid issues with exposure_spread, while implementing smarter handling for numeric types. Introduce a new function to prepare DataFrames for exposure_spread, ensuring compatibility and improved performance during calculations.
Browse files
app.py
CHANGED
|
@@ -131,13 +131,29 @@ def chunk_name_matching(portfolio_names, csv_names, chunk_size=1000):
|
|
| 131 |
|
| 132 |
def optimize_dataframe_dtypes(df):
|
| 133 |
"""Optimize DataFrame data types for memory efficiency"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
for col in df.columns:
|
| 135 |
-
if df[col].dtype == '
|
| 136 |
-
#
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
return df
|
| 142 |
|
| 143 |
def create_memory_efficient_mappings(projections_df, site_var, type_var, sport_var):
|
|
@@ -284,6 +300,11 @@ def calculate_lineup_metrics(df, player_columns, map_dict, type_var, sport_var,
|
|
| 284 |
"""Centralized function to calculate salary, median, and ownership efficiently"""
|
| 285 |
df = df.copy() # Work on a copy to avoid modifying original
|
| 286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
# Vectorized calculations
|
| 288 |
df['salary'] = calculate_salary_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
|
| 289 |
df['median'] = calculate_median_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
|
|
@@ -358,6 +379,17 @@ def create_team_filter_mask(df, player_columns, team_map, teams_to_filter, focus
|
|
| 358 |
|
| 359 |
return mask
|
| 360 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
def create_position_export_dict(column_name, csv_file, site_var, type_var, sport_var):
|
| 362 |
try:
|
| 363 |
# Remove any numbers from the column name to get the position
|
|
@@ -2037,7 +2069,9 @@ if selected_tab == 'Manage Portfolio':
|
|
| 2037 |
exp_submitted = st.form_submit_button("Export")
|
| 2038 |
if reg_submitted:
|
| 2039 |
st.session_state['settings_base'] = False
|
| 2040 |
-
|
|
|
|
|
|
|
| 2041 |
|
| 2042 |
# Use consolidated calculation function
|
| 2043 |
parsed_frame = calculate_lineup_metrics(
|
|
@@ -2056,7 +2090,9 @@ if selected_tab == 'Manage Portfolio':
|
|
| 2056 |
st.session_state['export_merge'] = st.session_state['working_frame'].copy()
|
| 2057 |
elif exp_submitted:
|
| 2058 |
st.session_state['settings_base'] = False
|
| 2059 |
-
|
|
|
|
|
|
|
| 2060 |
|
| 2061 |
# Use consolidated calculation function for export
|
| 2062 |
parsed_frame = calculate_lineup_metrics(
|
|
|
|
| 131 |
|
| 132 |
def optimize_dataframe_dtypes(df):
|
| 133 |
"""Optimize DataFrame data types for memory efficiency"""
|
| 134 |
+
# For now, disable categorical conversion entirely to avoid issues with exposure_spread and other operations
|
| 135 |
+
# This maintains compatibility while still providing other memory optimizations
|
| 136 |
+
# Future enhancement: implement smarter categorical handling that preserves mutability
|
| 137 |
+
|
| 138 |
+
# Only optimize numeric columns to more efficient dtypes
|
| 139 |
for col in df.columns:
|
| 140 |
+
if df[col].dtype == 'float64':
|
| 141 |
+
# Convert float64 to float32 if possible without significant precision loss
|
| 142 |
+
try:
|
| 143 |
+
if df[col].max() < 3.4e+38 and df[col].min() > -3.4e+38: # float32 range
|
| 144 |
+
df[col] = df[col].astype('float32')
|
| 145 |
+
except:
|
| 146 |
+
pass
|
| 147 |
+
elif df[col].dtype == 'int64':
|
| 148 |
+
# Convert int64 to smaller int types if possible
|
| 149 |
+
try:
|
| 150 |
+
if df[col].max() <= 32767 and df[col].min() >= -32768:
|
| 151 |
+
df[col] = df[col].astype('int16')
|
| 152 |
+
elif df[col].max() <= 2147483647 and df[col].min() >= -2147483648:
|
| 153 |
+
df[col] = df[col].astype('int32')
|
| 154 |
+
except:
|
| 155 |
+
pass
|
| 156 |
+
|
| 157 |
return df
|
| 158 |
|
| 159 |
def create_memory_efficient_mappings(projections_df, site_var, type_var, sport_var):
|
|
|
|
| 300 |
"""Centralized function to calculate salary, median, and ownership efficiently"""
|
| 301 |
df = df.copy() # Work on a copy to avoid modifying original
|
| 302 |
|
| 303 |
+
# Ensure player columns are object type to avoid categorical issues with exposure_spread
|
| 304 |
+
for col in player_columns:
|
| 305 |
+
if df[col].dtype.name == 'category':
|
| 306 |
+
df[col] = df[col].astype('object')
|
| 307 |
+
|
| 308 |
# Vectorized calculations
|
| 309 |
df['salary'] = calculate_salary_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
|
| 310 |
df['median'] = calculate_median_vectorized(df[player_columns], player_columns, map_dict, type_var, sport_var)
|
|
|
|
| 379 |
|
| 380 |
return mask
|
| 381 |
|
| 382 |
+
def prepare_dataframe_for_exposure_spread(df, player_columns):
|
| 383 |
+
"""Ensure DataFrame is ready for exposure_spread by converting player columns to object type"""
|
| 384 |
+
df_prepared = df.copy()
|
| 385 |
+
|
| 386 |
+
# Convert any categorical player columns back to object type
|
| 387 |
+
for col in player_columns:
|
| 388 |
+
if col in df_prepared.columns and df_prepared[col].dtype.name == 'category':
|
| 389 |
+
df_prepared[col] = df_prepared[col].astype('object')
|
| 390 |
+
|
| 391 |
+
return df_prepared
|
| 392 |
+
|
| 393 |
def create_position_export_dict(column_name, csv_file, site_var, type_var, sport_var):
|
| 394 |
try:
|
| 395 |
# Remove any numbers from the column name to get the position
|
|
|
|
| 2069 |
exp_submitted = st.form_submit_button("Export")
|
| 2070 |
if reg_submitted:
|
| 2071 |
st.session_state['settings_base'] = False
|
| 2072 |
+
# Prepare DataFrame for exposure_spread to avoid categorical issues
|
| 2073 |
+
working_frame_prepared = prepare_dataframe_for_exposure_spread(st.session_state['working_frame'], st.session_state['player_columns'])
|
| 2074 |
+
parsed_frame = exposure_spread(working_frame_prepared, st.session_state['exposure_player'], exposure_target, ignore_stacks, remove_teams_exposure, specific_replacements, specific_columns, st.session_state['projections_df'], sport_var, type_var, salary_max, stacking_sports)
|
| 2075 |
|
| 2076 |
# Use consolidated calculation function
|
| 2077 |
parsed_frame = calculate_lineup_metrics(
|
|
|
|
| 2090 |
st.session_state['export_merge'] = st.session_state['working_frame'].copy()
|
| 2091 |
elif exp_submitted:
|
| 2092 |
st.session_state['settings_base'] = False
|
| 2093 |
+
# Prepare DataFrame for exposure_spread to avoid categorical issues
|
| 2094 |
+
export_base_prepared = prepare_dataframe_for_exposure_spread(st.session_state['export_base'], st.session_state['player_columns'])
|
| 2095 |
+
parsed_frame = exposure_spread(export_base_prepared, st.session_state['exposure_player'], exposure_target, ignore_stacks, remove_teams_exposure, specific_replacements, specific_columns, st.session_state['projections_df'], sport_var, type_var, salary_max, stacking_sports)
|
| 2096 |
|
| 2097 |
# Use consolidated calculation function for export
|
| 2098 |
parsed_frame = calculate_lineup_metrics(
|