DFS_Portfolio_Manager / global_func /stratification_function.py
James McCool
Implement vectorized calculations for salary, median, and ownership in app.py to enhance performance and memory efficiency. Refactor reassess_edge and stratification_function to minimize DataFrame copies and improve memory management. Update filtering logic to use boolean masks for better efficiency.
7eef51a
raw
history blame
1.62 kB
import pandas as pd
import numpy as np
def stratification_function(portfolio: pd.DataFrame, lineup_target: int, exclude_cols: list, sport: str, sorting_choice: str, low_threshold: float, high_threshold: float):
excluded_cols = ['salary', 'median', 'Own', 'Finish_percentile', 'Dupes', 'Stack', 'Size', 'Win%', 'Lineup Edge', 'Weighted Own', 'Geomean', 'Diversity']
player_columns = [col for col in portfolio.columns if col not in excluded_cols]
# Work with indices instead of copying entire DataFrame
if sorting_choice == 'Finish_percentile':
sorted_indices = portfolio[sorting_choice].sort_values(ascending=True).index
else:
sorted_indices = portfolio[sorting_choice].sort_values(ascending=False).index
# Calculate quantiles without copying
similarity_floor = portfolio[sorting_choice].quantile(low_threshold / 100)
similarity_ceiling = portfolio[sorting_choice].quantile(high_threshold / 100)
# Create evenly spaced target similarity scores
target_similarities = np.linspace(similarity_floor, similarity_ceiling, lineup_target)
# Find the closest lineup to each target similarity score
selected_indices = []
for target_sim in target_similarities:
# Find the index of the closest similarity score
closest_idx = (portfolio[sorting_choice] - target_sim).abs().idxmin()
if closest_idx not in selected_indices: # Avoid duplicates
selected_indices.append(closest_idx)
# Return view instead of copy
return portfolio.loc[selected_indices].sort_values(by=sorting_choice, ascending=False)