James McCool
commited on
Commit
·
8833483
1
Parent(s):
7c0c068
Implement reassess_finish_percentile function in reassess_edge.py to recalculate finish_percentile based on ownership and contest size, enhancing accuracy in ownership adjustments. Remove debug print statements from app.py for cleaner code.
Browse files- app.py +1 -8
- global_func/reassess_edge.py +31 -1
app.py
CHANGED
|
@@ -38,8 +38,6 @@ def create_position_export_dict(column_name, csv_file, site_var, type_var, sport
|
|
| 38 |
# Remove any numbers from the column name to get the position
|
| 39 |
import re
|
| 40 |
position_filter = re.sub(r'\d+$', '', column_name)
|
| 41 |
-
|
| 42 |
-
print(position_filter)
|
| 43 |
|
| 44 |
# Filter CSV file by position
|
| 45 |
if 'Position' in csv_file.columns:
|
|
@@ -81,12 +79,10 @@ def create_position_export_dict(column_name, csv_file, site_var, type_var, sport
|
|
| 81 |
|
| 82 |
try:
|
| 83 |
filtered_df = filtered_df.sort_values(by='Salary', ascending=False).drop_duplicates(subset=['Name'])
|
| 84 |
-
print(dict(zip(filtered_df['Name'], filtered_df['Name + ID'])))
|
| 85 |
return dict(zip(filtered_df['Name'], filtered_df['Name + ID']))
|
| 86 |
except:
|
| 87 |
try:
|
| 88 |
filtered_df = filtered_df.sort_values(by='Salary', ascending=False).drop_duplicates(subset=['Nickname'])
|
| 89 |
-
print(dict(zip(filtered_df['Nickname'], filtered_df['Id'])))
|
| 90 |
return dict(zip(filtered_df['Nickname'], filtered_df['Id']))
|
| 91 |
except:
|
| 92 |
# Final fallback
|
|
@@ -1097,7 +1093,6 @@ with tab2:
|
|
| 1097 |
st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
|
| 1098 |
st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
|
| 1099 |
|
| 1100 |
-
print(st.session_state['working_frame'].head(10))
|
| 1101 |
st.session_state['base_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
| 1102 |
st.session_state['working_frame'] = st.session_state['base_frame'].copy()
|
| 1103 |
# st.session_state['highest_owned_teams'] = st.session_state['projections_df'][~st.session_state['projections_df']['position'].isin(['P', 'SP'])].groupby('team')['ownership'].sum().sort_values(ascending=False).head(3).index.tolist()
|
|
@@ -1513,8 +1508,7 @@ with tab2:
|
|
| 1513 |
st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
|
| 1514 |
st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
|
| 1515 |
|
| 1516 |
-
|
| 1517 |
-
st.session_state['working_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
| 1518 |
st.session_state['export_merge'] = st.session_state['working_frame'].copy()
|
| 1519 |
elif exp_submitted:
|
| 1520 |
st.session_state['settings_base'] = False
|
|
@@ -1611,7 +1605,6 @@ with tab2:
|
|
| 1611 |
st.session_state['export_base']['median'] = st.session_state['export_base']['median'].astype('float32')
|
| 1612 |
st.session_state['export_base']['salary'] = st.session_state['export_base']['salary'].astype('uint16')
|
| 1613 |
|
| 1614 |
-
print(st.session_state['export_base'].head(10))
|
| 1615 |
# st.session_state['export_base'] = predict_dupes(st.session_state['export_base'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
| 1616 |
st.session_state['export_base'] = reassess_edge(st.session_state['export_base'], prior_frame, st.session_state['map_dict'], Contest_Size, salary_max)
|
| 1617 |
st.session_state['export_merge'] = st.session_state['export_base'].copy()
|
|
|
|
| 38 |
# Remove any numbers from the column name to get the position
|
| 39 |
import re
|
| 40 |
position_filter = re.sub(r'\d+$', '', column_name)
|
|
|
|
|
|
|
| 41 |
|
| 42 |
# Filter CSV file by position
|
| 43 |
if 'Position' in csv_file.columns:
|
|
|
|
| 79 |
|
| 80 |
try:
|
| 81 |
filtered_df = filtered_df.sort_values(by='Salary', ascending=False).drop_duplicates(subset=['Name'])
|
|
|
|
| 82 |
return dict(zip(filtered_df['Name'], filtered_df['Name + ID']))
|
| 83 |
except:
|
| 84 |
try:
|
| 85 |
filtered_df = filtered_df.sort_values(by='Salary', ascending=False).drop_duplicates(subset=['Nickname'])
|
|
|
|
| 86 |
return dict(zip(filtered_df['Nickname'], filtered_df['Id']))
|
| 87 |
except:
|
| 88 |
# Final fallback
|
|
|
|
| 1093 |
st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
|
| 1094 |
st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
|
| 1095 |
|
|
|
|
| 1096 |
st.session_state['base_frame'] = predict_dupes(st.session_state['working_frame'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
| 1097 |
st.session_state['working_frame'] = st.session_state['base_frame'].copy()
|
| 1098 |
# st.session_state['highest_owned_teams'] = st.session_state['projections_df'][~st.session_state['projections_df']['position'].isin(['P', 'SP'])].groupby('team')['ownership'].sum().sort_values(ascending=False).head(3).index.tolist()
|
|
|
|
| 1508 |
st.session_state['working_frame']['median'] = st.session_state['working_frame']['median'].astype('float32')
|
| 1509 |
st.session_state['working_frame']['salary'] = st.session_state['working_frame']['salary'].astype('uint16')
|
| 1510 |
|
| 1511 |
+
st.session_state['working_frame'] = reassess_edge(st.session_state['working_frame'], prior_frame, st.session_state['map_dict'], Contest_Size, salary_max)
|
|
|
|
| 1512 |
st.session_state['export_merge'] = st.session_state['working_frame'].copy()
|
| 1513 |
elif exp_submitted:
|
| 1514 |
st.session_state['settings_base'] = False
|
|
|
|
| 1605 |
st.session_state['export_base']['median'] = st.session_state['export_base']['median'].astype('float32')
|
| 1606 |
st.session_state['export_base']['salary'] = st.session_state['export_base']['salary'].astype('uint16')
|
| 1607 |
|
|
|
|
| 1608 |
# st.session_state['export_base'] = predict_dupes(st.session_state['export_base'], st.session_state['map_dict'], site_var, type_var, Contest_Size, strength_var, sport_var)
|
| 1609 |
st.session_state['export_base'] = reassess_edge(st.session_state['export_base'], prior_frame, st.session_state['map_dict'], Contest_Size, salary_max)
|
| 1610 |
st.session_state['export_merge'] = st.session_state['export_base'].copy()
|
global_func/reassess_edge.py
CHANGED
|
@@ -44,6 +44,34 @@ def calculate_weighted_ownership_single_row(row_ownerships):
|
|
| 44 |
# Convert back to percentage form
|
| 45 |
return weighted * 10000
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
def reassess_dupes(row: pd.Series, salary_max: int) -> float:
|
| 48 |
# Convert to signed integers to avoid uint16 wrap-around
|
| 49 |
salary = int(row['salary'])
|
|
@@ -105,10 +133,12 @@ def reassess_edge(refactored_frame: pd.DataFrame, original_frame: pd.DataFrame,
|
|
| 105 |
|
| 106 |
for lineups in change_mask.index:
|
| 107 |
refactored_df.loc[lineups, 'Dupes'] = reassess_dupes(refactored_df.loc[lineups, :], salary_max)
|
| 108 |
-
refactored_df.loc[lineups, 'Finish_percentile'] = refactored_df.loc[lineups,
|
| 109 |
refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%']
|
| 110 |
refactored_df.loc[lineups, 'Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size)
|
| 111 |
refactored_df.loc[lineups, 'Weighted Own'] = calculate_weighted_ownership_single_row(refactored_df.loc[lineups, own_columns])
|
| 112 |
refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(), 1 / len(own_columns))
|
|
|
|
|
|
|
| 113 |
|
| 114 |
return refactored_df
|
|
|
|
| 44 |
# Convert back to percentage form
|
| 45 |
return weighted * 10000
|
| 46 |
|
| 47 |
+
def reassess_finish_percentile(row: pd.Series, own_columns: list, Contest_Size: int) -> float:
|
| 48 |
+
"""
|
| 49 |
+
Recalculate Finish_percentile for an individual row.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
row: pandas Series containing the row data
|
| 53 |
+
own_columns: list of ownership column names
|
| 54 |
+
|
| 55 |
+
Returns:
|
| 56 |
+
float: new finish_percentile value
|
| 57 |
+
"""
|
| 58 |
+
percentile_multiplier = 1.00
|
| 59 |
+
|
| 60 |
+
# Step 1: Adjust Finish_percentile
|
| 61 |
+
finish_percentile = row['Finish_percentile'] + 0.005 + (0.005 * (Contest_Size / 10000))
|
| 62 |
+
|
| 63 |
+
# Step 2: Apply multiplier and ownership adjustment
|
| 64 |
+
finish_percentile = finish_percentile * percentile_multiplier * (row['Own'] / (100 + (Contest_Size / 1000)))
|
| 65 |
+
|
| 66 |
+
# Step 3: Count low ownership players
|
| 67 |
+
low_own_count = sum(1 for col in own_columns if row[col] < 0.10)
|
| 68 |
+
|
| 69 |
+
# Step 4: Apply low ownership adjustment
|
| 70 |
+
if low_own_count > 0:
|
| 71 |
+
finish_percentile = finish_percentile / low_own_count
|
| 72 |
+
|
| 73 |
+
return finish_percentile
|
| 74 |
+
|
| 75 |
def reassess_dupes(row: pd.Series, salary_max: int) -> float:
|
| 76 |
# Convert to signed integers to avoid uint16 wrap-around
|
| 77 |
salary = int(row['salary'])
|
|
|
|
| 133 |
|
| 134 |
for lineups in change_mask.index:
|
| 135 |
refactored_df.loc[lineups, 'Dupes'] = reassess_dupes(refactored_df.loc[lineups, :], salary_max)
|
| 136 |
+
refactored_df.loc[lineups, 'Finish_percentile'] = reassess_finish_percentile(refactored_df.loc[lineups, :], own_columns, Contest_Size)
|
| 137 |
refactored_df.loc[lineups, 'Win%'] = refactored_df.loc[lineups, 'Win%']
|
| 138 |
refactored_df.loc[lineups, 'Edge'] = reassess_lineup_edge(refactored_df.loc[lineups, :], Contest_Size)
|
| 139 |
refactored_df.loc[lineups, 'Weighted Own'] = calculate_weighted_ownership_single_row(refactored_df.loc[lineups, own_columns])
|
| 140 |
refactored_df.loc[lineups, 'Geomean'] = np.power((refactored_df.loc[lineups, own_columns] * 100).product(), 1 / len(own_columns))
|
| 141 |
+
|
| 142 |
+
refactored_df = refactored_df.drop(columns=own_columns)
|
| 143 |
|
| 144 |
return refactored_df
|