Spaces:
Build error
Build error
James McCool
Refactor import statements across multiple files to replace 'fuzzywuzzy' with 'rapidfuzz' for improved performance and consistency in string matching functionality. Additionally, clean up unused imports in app.py and related global functions to enhance code clarity and maintainability.
d9db89f | import streamlit as st | |
| import numpy as np | |
| import pandas as pd | |
| import time | |
| import math | |
| from difflib import SequenceMatcher | |
| def calculate_weighted_ownership(row_ownerships): | |
| """ | |
| Calculate weighted ownership based on the formula: | |
| (AVERAGE of (each value's average with overall average)) * count - (max - min) | |
| Args: | |
| row_ownerships: Series containing ownership values in percentage form (e.g., 24.2213 for 24.2213%) | |
| Returns: | |
| float: Calculated weighted ownership value | |
| """ | |
| # Drop NaN values and convert percentages to decimals | |
| row_ownerships = row_ownerships.dropna() / 100 | |
| # Get the mean of all ownership values | |
| row_mean = row_ownerships.mean() | |
| # Calculate average of each value with the overall mean | |
| value_means = [(val + row_mean) / 2 for val in row_ownerships] | |
| # Take average of all those means | |
| avg_of_means = sum(value_means) / len(row_ownerships) | |
| # Multiply by count of values | |
| weighted = avg_of_means * (len(row_ownerships) * 1) | |
| # Subtract (max - min) | |
| weighted = weighted - (row_ownerships.max() - row_ownerships.min()) | |
| # Convert back to percentage form to match input format | |
| return weighted * 10000 | |
| def calculate_player_similarity_score(portfolio, player_columns): | |
| """ | |
| Calculate a similarity score that measures how different each row is from all other rows | |
| based on actual player selection. Optimized for speed using vectorized operations. | |
| Higher scores indicate more unique/different lineups. | |
| Args: | |
| portfolio: DataFrame containing the portfolio data | |
| player_columns: List of column names containing player names | |
| Returns: | |
| Series: Similarity scores for each row | |
| """ | |
| # Extract player data | |
| player_data = portfolio[player_columns].fillna('') | |
| # Get all unique players and create a mapping to numeric IDs | |
| all_players = set() | |
| for col in player_columns: | |
| unique_vals = player_data[col].unique() | |
| for val in unique_vals: | |
| if isinstance(val, str) and val.strip() != '': | |
| all_players.add(val) | |
| # Create player ID mapping | |
| player_to_id = {player: idx for idx, player in enumerate(sorted(all_players))} | |
| # Convert each row to a binary vector (1 if player is present, 0 if not) | |
| n_players = len(all_players) | |
| n_rows = len(portfolio) | |
| binary_matrix = np.zeros((n_rows, n_players), dtype=np.int8) | |
| for i, (_, row) in enumerate(player_data.iterrows()): | |
| for val in row.values: | |
| if isinstance(val, str) and str(val).strip() != '' and str(val) in player_to_id: | |
| binary_matrix[i, player_to_id[str(val)]] = 1 | |
| # Vectorized Jaccard distance calculation | |
| # Use matrix operations to compute all pairwise distances at once | |
| similarity_scores = np.zeros(n_rows) | |
| # Compute intersection and union matrices | |
| # intersection[i,j] = number of players in common between row i and row j | |
| # union[i,j] = total number of unique players between row i and row j | |
| intersection_matrix = np.dot(binary_matrix, binary_matrix.T) | |
| # For union, we need: |A ∪ B| = |A| + |B| - |A ∩ B| | |
| row_sums = np.sum(binary_matrix, axis=1) | |
| union_matrix = row_sums[:, np.newaxis] + row_sums - intersection_matrix | |
| # Calculate Jaccard distance: 1 - (intersection / union) | |
| # Avoid division by zero | |
| with np.errstate(divide='ignore', invalid='ignore'): | |
| jaccard_similarity = np.divide(intersection_matrix, union_matrix, | |
| out=np.zeros_like(intersection_matrix, dtype=float), | |
| where=union_matrix != 0) | |
| # Convert similarity to distance and calculate average distance for each row | |
| jaccard_distance = 1 - jaccard_similarity | |
| # For each row, calculate average distance to all other rows | |
| # Exclude self-comparison (diagonal elements) | |
| np.fill_diagonal(jaccard_distance, 0) | |
| row_counts = n_rows - 1 # Exclude self | |
| similarity_scores = np.sum(jaccard_distance, axis=1) / row_counts | |
| # Normalize to 0-1 scale where 1 = most unique/different | |
| if similarity_scores.max() > similarity_scores.min(): | |
| similarity_scores = (similarity_scores - similarity_scores.min()) / (similarity_scores.max() - similarity_scores.min()) | |
| return similarity_scores | |
| def predict_dupes(portfolio, maps_dict, site_var, type_var, Contest_Size, strength_var, sport_var): | |
| if strength_var == 'Weak': | |
| dupes_multiplier = .75 | |
| percentile_multiplier = .90 | |
| elif strength_var == 'Average': | |
| dupes_multiplier = 1.00 | |
| percentile_multiplier = 1.00 | |
| elif strength_var == 'Sharp': | |
| dupes_multiplier = 1.25 | |
| percentile_multiplier = 1.10 | |
| max_ownership = max(maps_dict['own_map'].values()) / 100 | |
| average_ownership = np.mean(list(maps_dict['own_map'].values())) / 100 | |
| if site_var == 'Fanduel': | |
| if type_var == 'Showdown': | |
| dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank'] | |
| own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own'] | |
| calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] | |
| # Get the original player columns (first 5 columns excluding salary, median, Own) | |
| player_columns = [col for col in portfolio.columns[:5] if col not in ['salary', 'median', 'Own']] | |
| flex_ownerships = pd.concat([ | |
| portfolio.iloc[:,1].map(maps_dict['own_map']), | |
| portfolio.iloc[:,2].map(maps_dict['own_map']), | |
| portfolio.iloc[:,3].map(maps_dict['own_map']), | |
| portfolio.iloc[:,4].map(maps_dict['own_map']) | |
| ]) | |
| flex_rank = flex_ownerships.rank(pct=True) | |
| # Assign ranks back to individual columns using the same rank scale | |
| portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) | |
| portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100 | |
| portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100 | |
| portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100 | |
| portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100 | |
| portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100 | |
| portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) | |
| portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 | |
| portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) | |
| portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) | |
| # Calculate dupes formula | |
| portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100) | |
| portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier | |
| # Round and handle negative values | |
| portfolio['Dupes'] = np.where( | |
| np.round(portfolio['dupes_calc'], 0) <= 0, | |
| 0, | |
| np.round(portfolio['dupes_calc'], 0) - 1 | |
| ) | |
| elif type_var == 'Classic': | |
| num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']]) | |
| dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)] | |
| own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)] | |
| calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'own_ratio', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] | |
| # Get the original player columns (first num_players columns excluding salary, median, Own) | |
| player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']] | |
| for i in range(1, num_players + 1): | |
| portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank']) | |
| portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100 | |
| portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) | |
| portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 | |
| portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) | |
| portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) | |
| portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (60000 - portfolio['Own'])) / 100) - ((60000 - portfolio['salary']) / 100) | |
| portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier | |
| # Round and handle negative values | |
| portfolio['Dupes'] = np.where( | |
| np.round(portfolio['dupes_calc'], 0) <= 0, | |
| 0, | |
| np.round(portfolio['dupes_calc'], 0) - 1 | |
| ) | |
| elif site_var == 'Draftkings': | |
| if type_var == 'Showdown': | |
| dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank'] | |
| own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own'] | |
| calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] | |
| # Get the original player columns (first 6 columns excluding salary, median, Own) | |
| player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']] | |
| flex_ownerships = pd.concat([ | |
| portfolio.iloc[:,1].map(maps_dict['own_map']), | |
| portfolio.iloc[:,2].map(maps_dict['own_map']), | |
| portfolio.iloc[:,3].map(maps_dict['own_map']), | |
| portfolio.iloc[:,4].map(maps_dict['own_map']), | |
| portfolio.iloc[:,5].map(maps_dict['own_map']) | |
| ]) | |
| flex_rank = flex_ownerships.rank(pct=True) | |
| # Assign ranks back to individual columns using the same rank scale | |
| portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) | |
| portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100 | |
| portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100 | |
| portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100 | |
| portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100 | |
| portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100 | |
| portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']) / 100 | |
| portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) | |
| portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 | |
| portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) | |
| portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) | |
| # Calculate dupes formula | |
| portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100) | |
| portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier | |
| # Round and handle negative values | |
| portfolio['Dupes'] = np.where( | |
| np.round(portfolio['dupes_calc'], 0) <= 0, | |
| 0, | |
| np.round(portfolio['dupes_calc'], 0) - 1 | |
| ) | |
| elif type_var == 'Classic': | |
| if sport_var == 'CS2': | |
| dup_count_columns = ['CPT_Own_percent_rank', 'FLEX1_Own_percent_rank', 'FLEX2_Own_percent_rank', 'FLEX3_Own_percent_rank', 'FLEX4_Own_percent_rank', 'FLEX5_Own_percent_rank'] | |
| own_columns = ['CPT_Own', 'FLEX1_Own', 'FLEX2_Own', 'FLEX3_Own', 'FLEX4_Own', 'FLEX5_Own'] | |
| calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] | |
| # Get the original player columns (first 6 columns excluding salary, median, Own) | |
| player_columns = [col for col in portfolio.columns[:6] if col not in ['salary', 'median', 'Own']] | |
| flex_ownerships = pd.concat([ | |
| portfolio.iloc[:,1].map(maps_dict['own_map']), | |
| portfolio.iloc[:,2].map(maps_dict['own_map']), | |
| portfolio.iloc[:,3].map(maps_dict['own_map']), | |
| portfolio.iloc[:,4].map(maps_dict['own_map']), | |
| portfolio.iloc[:,5].map(maps_dict['own_map']) | |
| ]) | |
| flex_rank = flex_ownerships.rank(pct=True) | |
| # Assign ranks back to individual columns using the same rank scale | |
| portfolio['CPT_Own_percent_rank'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']).rank(pct=True) | |
| portfolio['FLEX1_Own_percent_rank'] = portfolio.iloc[:,1].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX2_Own_percent_rank'] = portfolio.iloc[:,2].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX3_Own_percent_rank'] = portfolio.iloc[:,3].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX4_Own_percent_rank'] = portfolio.iloc[:,4].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['FLEX5_Own_percent_rank'] = portfolio.iloc[:,5].map(maps_dict['own_map']).map(lambda x: flex_rank[flex_ownerships == x].iloc[0]) | |
| portfolio['CPT_Own'] = portfolio.iloc[:,0].map(maps_dict['cpt_own_map']) / 100 | |
| portfolio['FLEX1_Own'] = portfolio.iloc[:,1].map(maps_dict['own_map']) / 100 | |
| portfolio['FLEX2_Own'] = portfolio.iloc[:,2].map(maps_dict['own_map']) / 100 | |
| portfolio['FLEX3_Own'] = portfolio.iloc[:,3].map(maps_dict['own_map']) / 100 | |
| portfolio['FLEX4_Own'] = portfolio.iloc[:,4].map(maps_dict['own_map']) / 100 | |
| portfolio['FLEX5_Own'] = portfolio.iloc[:,5].map(maps_dict['own_map']) / 100 | |
| portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) | |
| portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 | |
| portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) | |
| portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) | |
| # Calculate dupes formula | |
| portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100) | |
| portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier | |
| # Round and handle negative values | |
| portfolio['Dupes'] = np.where( | |
| np.round(portfolio['dupes_calc'], 0) <= 0, | |
| 0, | |
| np.round(portfolio['dupes_calc'], 0) - 1 | |
| ) | |
| elif sport_var != 'CS2': | |
| num_players = len([col for col in portfolio.columns if col not in ['salary', 'median', 'Own']]) | |
| dup_count_columns = [f'player_{i}_percent_rank' for i in range(1, num_players + 1)] | |
| own_columns = [f'player_{i}_own' for i in range(1, num_players + 1)] | |
| calc_columns = ['own_product', 'own_average', 'own_sum', 'avg_own_rank', 'dupes_calc', 'low_own_count', 'Ref_Proj', 'Max_Proj', 'Min_Proj', 'Avg_Ref', 'own_ratio'] | |
| # Get the original player columns (first num_players columns excluding salary, median, Own) | |
| player_columns = [col for col in portfolio.columns[:num_players] if col not in ['salary', 'median', 'Own']] | |
| for i in range(1, num_players + 1): | |
| portfolio[f'player_{i}_percent_rank'] = portfolio.iloc[:,i-1].map(maps_dict['own_percent_rank']) | |
| portfolio[f'player_{i}_own'] = portfolio.iloc[:,i-1].map(maps_dict['own_map']) / 100 | |
| portfolio['own_product'] = (portfolio[own_columns].product(axis=1)) | |
| portfolio['own_average'] = (portfolio['Own'].max() * .33) / 100 | |
| portfolio['own_sum'] = portfolio[own_columns].sum(axis=1) | |
| portfolio['avg_own_rank'] = portfolio[dup_count_columns].mean(axis=1) | |
| portfolio['dupes_calc'] = (portfolio['own_product'] * portfolio['avg_own_rank']) * Contest_Size + ((portfolio['salary'] - (50000 - portfolio['Own'])) / 100) - ((50000 - portfolio['salary']) / 100) | |
| portfolio['dupes_calc'] = portfolio['dupes_calc'] * dupes_multiplier | |
| # Round and handle negative values | |
| portfolio['Dupes'] = np.where( | |
| np.round(portfolio['dupes_calc'], 0) <= 0, | |
| 0, | |
| np.round(portfolio['dupes_calc'], 0) - 1 | |
| ) | |
| portfolio['Dupes'] = np.round(portfolio['Dupes'], 0) | |
| portfolio['own_ratio'] = np.where( | |
| portfolio[own_columns].isin([max_ownership]).any(axis=1), | |
| portfolio['own_sum'] / portfolio['own_average'], | |
| (portfolio['own_sum'] - max_ownership) / portfolio['own_average'] | |
| ) | |
| percentile_cut_scalar = portfolio['median'].max() # Get scalar value | |
| if type_var == 'Classic': | |
| if sport_var == 'CS2': | |
| own_ratio_nerf = 2 | |
| elif sport_var != 'CS2': | |
| own_ratio_nerf = 1.5 | |
| elif type_var == 'Showdown': | |
| own_ratio_nerf = 1.5 | |
| portfolio['Finish_percentile'] = portfolio.apply( | |
| lambda row: .0005 if (row['own_ratio'] - own_ratio_nerf) / ((5 * (row['median'] / percentile_cut_scalar)) / 3) < .0005 | |
| else ((row['own_ratio'] - own_ratio_nerf) / ((5 * (row['median'] / percentile_cut_scalar)) / 3)) / 2, | |
| axis=1 | |
| ) | |
| portfolio['Ref_Proj'] = portfolio['median'].max() | |
| portfolio['Max_Proj'] = portfolio['Ref_Proj'] + 10 | |
| portfolio['Min_Proj'] = portfolio['Ref_Proj'] - 10 | |
| portfolio['Avg_Ref'] = (portfolio['Max_Proj'] + portfolio['Min_Proj']) / 2 | |
| portfolio['Win%'] = (((portfolio['median'] / portfolio['Avg_Ref']) - (0.1 + ((portfolio['Ref_Proj'] - portfolio['median'])/100))) / (Contest_Size / 1000)) / 10 | |
| max_allowed_win = (1 / Contest_Size) * 5 | |
| portfolio['Win%'] = portfolio['Win%'] / portfolio['Win%'].max() * max_allowed_win | |
| portfolio['Finish_percentile'] = portfolio['Finish_percentile'] + .005 + (.005 * (Contest_Size / 10000)) | |
| portfolio['Finish_percentile'] = portfolio['Finish_percentile'] * percentile_multiplier | |
| portfolio['Win%'] = portfolio['Win%'] * (1 - portfolio['Finish_percentile']) | |
| portfolio['low_own_count'] = portfolio[own_columns].apply(lambda row: (row < 0.10).sum(), axis=1) | |
| portfolio['Finish_percentile'] = portfolio.apply(lambda row: row['Finish_percentile'] if row['low_own_count'] <= 0 else row['Finish_percentile'] / row['low_own_count'], axis=1) | |
| portfolio['Lineup Edge'] = portfolio['Win%'] * ((.5 - portfolio['Finish_percentile']) * (Contest_Size / 2.5)) | |
| portfolio['Lineup Edge'] = portfolio.apply(lambda row: row['Lineup Edge'] / (row['Dupes'] + 1) if row['Dupes'] > 0 else row['Lineup Edge'], axis=1) | |
| portfolio['Lineup Edge'] = portfolio['Lineup Edge'] - portfolio['Lineup Edge'].mean() | |
| portfolio['Weighted Own'] = portfolio[own_columns].apply(calculate_weighted_ownership, axis=1) | |
| portfolio['Geomean'] = np.power((portfolio[own_columns] * 100).product(axis=1), 1 / len(own_columns)) | |
| # Calculate similarity score based on actual player selection | |
| portfolio['Diversity'] = calculate_player_similarity_score(portfolio, player_columns) | |
| portfolio = portfolio.drop(columns=dup_count_columns) | |
| portfolio = portfolio.drop(columns=own_columns) | |
| portfolio = portfolio.drop(columns=calc_columns) | |
| return portfolio |