Paydirt_model_updates / src /sports /nhl_functions.py
Multichem's picture
Update src/sports/nhl_functions.py
676aabb verified
import time
import re
# Numpy
from numpy import where as np_where
from numpy import random as np_random
from numpy import array as np_array
from numpy import nan as np_nan
from numpy import inf as np_inf
from numpy import zeros as np_zeros
from numpy import clip as np_clip
# Pandas
from pandas import DataFrame
from pandas import merge as pd_merge
from pandas import concat as pd_concat
from pandas import Series as pd_Series
from pandas import options as pd_options
from pandas import errors as pd_errors
from pandas import merge, to_numeric
from pandas import options as poptions
from pandas import set_option
# Time
import time
from time import sleep as time_sleep
from pytz import timezone as tz
from pytz import timezone as pytz_timezone
from datetime import datetime
from datetime import date
# MISC
import json
import os
import subprocess
from ortools.linear_solver import pywraplp
from random import random
from random import choice
pd_options.mode.chained_assignment = None # default='warn'
from warnings import simplefilter
simplefilter(action="ignore", category=pd_errors.PerformanceWarning)
poptions.mode.chained_assignment = None # default='warn'
set_option('future.no_silent_downcasting', True)
import streamlit as st
from database import *
nan_value = float("NaN")
sim_teams = []
cut_slate = 0
total_sims = 1000
model_version = 1
sh = gc.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('Slate_Info')
slate_info = DataFrame(worksheet.get_all_records())
slate_info = slate_info.replace('', np_nan)
slate_options = ['Main Slate', 'Secondary Slate', 'Late Slate']
dk_player_hold = DataFrame(columns=['Player', 'Position', 'Line', 'PP Unit', 'Team', 'Opp', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'Own', 'Small Field Own%', 'Large Field Own%', 'Cash Own%', 'CPT_Own', 'Site', 'Type', 'Slate', 'player_id'])
dk_stacks_hold = DataFrame(columns=['Player', 'SK1', 'SK2', 'SK3', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '50+%', '2x%', '3x%', '4x%', 'Own', 'Site', 'Type', 'Slate'])
dk_pp_stacks_hold = DataFrame(columns=['Player', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '75+%', '2x%', '3x%', '4x%', 'Own', 'Site', 'Type', 'Slate'])
fd_player_hold = DataFrame(columns=['Player', 'Position', 'Line', 'PP Unit', 'Team', 'Opp', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'Own', 'Small Field Own%', 'Large Field Own%', 'Cash Own%', 'CPT_Own', 'Site', 'Type', 'Slate', 'player_id'])
fd_stacks_hold = DataFrame(columns=['Player', 'SK1', 'SK2', 'SK3', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '50+%', '2x%', '3x%', '4x%', 'Own', 'Site', 'Type', 'Slate'])
fd_pp_stacks_hold = DataFrame(columns=['Player', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '75+%', '2x%', '3x%', '4x%', 'Own', 'Site', 'Type', 'Slate'])
wrong_names = ['Arseniy Gritsyuk', 'Benjamin Kindel', 'Christopher Tanev', 'Fredrick Gaudreau', 'Jack Hughes', 'Joshua Mahura', 'Maxim Shabanov', 'Mikey Eyssimont', 'Nate Bastian', 'Zachary Aston-Reese', 'Zachary Bolduc', 'Jacob Middleton', 'Mitchell Marner', 'Maxwell Crozier']
right_names = ['Arseny Gritsyuk', 'Ben Kindel', 'Chris Tanev', 'Freddy Gaudreau', 'Jack Hughes', 'Josh Mahura', 'Max Shabanov', 'Michael Eyssimont', 'Nathan Bastian', 'Zach Aston-Reese', 'Zack Bolduc', 'Jake Middleton', 'Mitch Marner', 'Max Crozier']
def run_go_classic_lineup_generator(site="DK", sport="NHL"):
"""Run the Go lineup generator after Python data processing"""
try:
st.write(f"Starting Go {sport} lineup generation...")
start_time = time.time()
# Determine the path to the Go binary
# __file__ is at /app/src/sports/nhl_functions.py
# Binaries are at /app/dk_nhl_go/ and /app/fd_nhl_go/ (per Dockerfile)
current_dir = os.path.dirname(os.path.abspath(__file__)) # /app/src/sports/
project_root = os.path.dirname(os.path.dirname(current_dir)) # /app/
if site == "DK":
go_binary = os.path.join(project_root, "dk_nhl_go", "NHL_seed_frames")
else:
go_binary = os.path.join(project_root, "fd_nhl_go", "NHL_seed_frames")
st.write(f"Looking for binary at: {go_binary}")
# Run the Go executable
result = subprocess.run(
[go_binary, site, sport],
capture_output=True,
text=True,
check=True,
cwd=project_root # Run from project root
)
end_time = time.time()
st.write(f"Go {sport} processing completed in {end_time - start_time:.2f} seconds")
if result.stdout:
st.write("Go output:", result.stdout)
if result.stderr:
st.write("Go warnings:", result.stderr)
return True
except subprocess.CalledProcessError as e:
st.write(f"Go process failed with exit code {e.returncode}")
if e.stdout:
st.write("Stdout:", e.stdout)
if e.stderr:
st.write("Error output:", e.stderr)
return False
except FileNotFoundError as e:
st.write(f"Error: Go binary not found at {go_binary}")
st.write(f"FileNotFoundError: {e}")
st.write(f"Current working directory: {os.getcwd()}")
st.write(f"Project root: {project_root}")
return False
except Exception as e:
st.write(f"Unexpected error: {e}")
return False
def init_team_results(model_source: DataFrame, position_reqs: dict, salary_cap: int, max_skaters: int, teams_to_optimize: list, combos: int, player_team_map):
df = model_source.copy()
# Parse eligible positions for each player
df['eligible_positions'] = df['Position'].apply(lambda x: x.split('/'))
# Normalize position_reqs: combine FLEX1, FLEX2, etc. into a single FLEX count
normalized_position_reqs = {}
flex_count = 0
for pos, req in position_reqs.items():
if pos.startswith('FLEX'):
# Sum all FLEX-related positions (FLEX, FLEX1, FLEX2, etc.)
flex_count += req
else:
normalized_position_reqs[pos] = req
# Add the combined FLEX count if there are any FLEX positions
if flex_count > 0:
normalized_position_reqs['FLEX'] = flex_count
# Use normalized_position_reqs for the rest of the function
position_reqs = normalized_position_reqs
# Store results for all teams
all_team_results = []
for target_team in teams_to_optimize:
# Create a filtered dataframe that excludes Gs opposing the target team
# Get the opponent team for the target team
target_team_players = df[df['Team'] == target_team]
if not target_team_players.empty:
# Get the opponent team from any player on the target team
opponent_team = target_team_players.iloc[0]['Opp']
# Filter out Gs who are on the opponent team
filtered_df = df.copy()
opposing_sp_mask = (filtered_df['Position'].str.contains('G')) & (filtered_df['Team'] == opponent_team)
filtered_df = filtered_df[~opposing_sp_mask]
else:
# If no players found for target team, use original dataframe
filtered_df = df
# Create a new solver for each team
solver = pywraplp.Solver.CreateSolver('SCIP')
if not solver:
raise Exception("Could not create solver.")
# Variables: x[i, pos] = 1 if player i is used at position pos
x = {}
for i, row in filtered_df.iterrows():
for pos in row['eligible_positions']:
x[(i, pos)] = solver.BoolVar(f'x_{i}_{pos}')
# Special UTIL variable for FanDuel
if 'FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs:
util_x = {}
for i, row in filtered_df.iterrows():
util_positions = ['C', 'W', 'D']
if any(pos in row['eligible_positions'] for pos in util_positions):
util_x[i] = solver.BoolVar(f'util_{i}')
# Each player can only be used once (including UTIL)
for i in filtered_df.index:
if ('FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs) and i in util_x:
solver.Add(solver.Sum([x[(i, pos)] for pos in filtered_df.loc[i, 'eligible_positions']]) + util_x[i] <= 1)
else:
solver.Add(solver.Sum([x[(i, pos)] for pos in filtered_df.loc[i, 'eligible_positions']]) <= 1)
# Handle position requirements based on whether we're using special requirements or not
if 'FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs:
# Regular positions (excluding C and 1B since they're handled by C_1B, and excluding P, C_1B, UTIL)
for pos, req in position_reqs.items():
if pos not in ['P', 'C_1B', 'UTIL', 'C', '1B']: # Exclude C and 1B since they're handled by C_1B
pos_players = [i for i in filtered_df.index if pos in filtered_df.loc[i, 'eligible_positions']]
if pos_players:
solver.Add(solver.Sum([x[(i, pos)] for i in pos_players]) == req)
# UTIL position (separate variable that doesn't conflict with other positions)
if 'FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs:
util_players = [i for i in util_x.keys()]
if util_players:
solver.Add(solver.Sum([util_x[i] for i in util_players]) == position_reqs['FLEX'])
else:
# Standard DraftKings requirements
for pos, req in position_reqs.items():
pos_players = [i for i in filtered_df.index if pos in filtered_df.loc[i, 'eligible_positions']]
if pos_players:
solver.Add(solver.Sum([x[(i, pos)] for i in pos_players]) == req)
# Salary cap (include UTIL players)
if 'FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs:
solver.Add(
solver.Sum(
x[(i, pos)] * filtered_df.loc[i, 'Salary']
for i in filtered_df.index for pos in filtered_df.loc[i, 'eligible_positions']
) + solver.Sum(
util_x[i] * filtered_df.loc[i, 'Salary']
for i in util_x.keys()
) <= salary_cap
)
else:
solver.Add(
solver.Sum(
x[(i, pos)] * filtered_df.loc[i, 'Salary']
for i in filtered_df.index for pos in filtered_df.loc[i, 'eligible_positions']
) <= salary_cap
)
# No opposing skaters against selected G
for i, row in filtered_df.iterrows():
if 'G' in row['eligible_positions']:
goalie_opp = row['Opp']
goalie_var = x.get((i, 'G'))
if goalie_var:
for j, srow in filtered_df.iterrows():
if srow['Team'] == goalie_opp and 'G' not in srow['eligible_positions']:
for pos in srow['eligible_positions']:
if (j, pos) in x:
solver.Add(goalie_var + x[(j, pos)] <= 1)
if ('FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs) and j in util_x:
solver.Add(goalie_var + util_x[j] <= 1)
# Max skaters per team (exclude P/SP positions, include UTIL)
teams = filtered_df['Team'].unique()
for team in teams:
# Determine which positions to exclude based on requirements
exclude_positions = ['G']
team_constraint = solver.Sum(
x[(i, pos)]
for i in filtered_df.index
for pos in filtered_df.loc[i, 'eligible_positions']
if filtered_df.loc[i, 'Team'] == team and pos not in exclude_positions
)
# Add UTIL players from this team
if 'FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs:
team_util_players = [i for i in util_x.keys() if filtered_df.loc[i, 'Team'] == team]
team_constraint += solver.Sum([util_x[i] for i in team_util_players])
solver.Add(team_constraint <= max_skaters)
# At least 3 different teams constraint
team_used = {}
for team in teams:
team_used[team] = solver.BoolVar(f'team_used_{team}')
team_players_sum = solver.Sum(
x[(i, pos)]
for i in filtered_df.index
for pos in filtered_df.loc[i, 'eligible_positions']
if filtered_df.loc[i, 'Team'] == team
)
if 'FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs:
team_util = [util_x[i] for i in util_x.keys() if filtered_df.loc[i, 'Team'] == team]
if team_util:
team_players_sum += solver.Sum(team_util)
M = sum(position_reqs.values())
solver.Add(team_players_sum <= M * team_used[team])
solver.Add(team_used[team] <= team_players_sum)
solver.Add(solver.Sum([team_used[team] for team in teams]) >= 3)
# Total players constraint (include UTIL)
total_players = solver.Sum(x[(i, pos)] for i in filtered_df.index for pos in filtered_df.loc[i, 'eligible_positions'])
if 'FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs:
total_players += solver.Sum([util_x[i] for i in util_x.keys()])
solver.Add(total_players == sum(position_reqs.values()))
# Find the highest median player from the target team (excluding SP/P positions)
exclude_positions = ['G']
target_team_skaters = filtered_df[
(filtered_df['Team'] == target_team) &
(~filtered_df['Position'].str.contains('|'.join(exclude_positions)))
]
if not target_team_skaters.empty:
# Get the highest median player from the target team
highest_median_player = target_team_skaters.loc[target_team_skaters['Median'].idxmax()]
highest_median_idx = highest_median_player.name
# Force the highest median player to be included in the lineup
# Find which position(s) this player is eligible for
eligible_positions = highest_median_player['eligible_positions']
# Create constraint to force this player to be used in at least one eligible position
highest_median_constraint = solver.Sum([
x[(highest_median_idx, pos)] for pos in eligible_positions
])
# Add UTIL constraint if applicable
if ('FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs) and highest_median_idx in util_x:
highest_median_constraint += util_x[highest_median_idx]
solver.Add(highest_median_constraint >= 1)
# Force exactly max_skaters players from target team (exclude P/SP positions, include UTIL)
exclude_positions = ['G']
target_team_constraint = solver.Sum(
x[(i, pos)]
for i in filtered_df.index
for pos in filtered_df.loc[i, 'eligible_positions']
if filtered_df.loc[i, 'Team'] == target_team and pos not in exclude_positions
)
# Add UTIL players from target team
if 'FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs:
target_team_util_players = [i for i in util_x.keys() if filtered_df.loc[i, 'Team'] == target_team]
target_team_constraint += solver.Sum([util_x[i] for i in target_team_util_players])
solver.Add(target_team_constraint == max_skaters)
# Objective: maximize total median (include UTIL)
objective = solver.Sum(
x[(i, pos)] * filtered_df.loc[i, 'Median']
for i in filtered_df.index for pos in filtered_df.loc[i, 'eligible_positions']
)
if 'FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs:
objective += solver.Sum([util_x[i] * filtered_df.loc[i, 'Median'] for i in util_x.keys()])
solver.Maximize(objective)
status = solver.Solve()
if status == pywraplp.Solver.OPTIMAL or status == pywraplp.Solver.FEASIBLE:
selected = [
(i, pos)
for (i, pos), var in x.items()
if var.solution_value() > 0.5
]
# Add UTIL players to the selected list
if 'FLEX' in position_reqs or 'FLEX1' in position_reqs or 'FLEX2' in position_reqs:
util_selected = [
(i, 'FLEX')
for i, var in util_x.items()
if var.solution_value() > 0.5
]
selected.extend(util_selected)
lineup = DataFrame([
{**filtered_df.loc[i].to_dict(), 'assigned_position': pos}
for (i, pos) in selected
])
# Assign unique position labels for duplicate positions (e.g., OF1, OF2, OF3, SP1, SP2)
position_counts = {}
position_labels = []
for pos in lineup['assigned_position']:
if pos not in position_counts:
position_counts[pos] = 1
else:
position_counts[pos] += 1
if flex_count == 1:
label = f"{pos}{position_counts[pos]}" if position_counts[pos] > 1 or pos in ['C', 'W', 'D'] else pos
else:
label = f"{pos}{position_counts[pos]}" if position_counts[pos] > 1 or pos in ['C', 'W', 'D', 'FLEX'] else pos
position_labels.append(label)
lineup['position_label'] = position_labels
# Build the single-row dictionary
row_dict = {row['position_label']: row['Name'] for _, row in lineup.iterrows()}
row_dict['Total_Salary'] = lineup['Salary'].sum()
row_dict['Total_Median'] = lineup['Median'].sum()
row_dict['Own'] = lineup['Own'].sum()
row_dict['Target_Team'] = target_team
# Convert to DataFrame
result_df = DataFrame([row_dict])
# Generate variations for this team
result_rows = [row_dict.copy()]
for _ in range(combos):
# Copy the current optimal lineup row
new_row = row_dict.copy()
# Get all position labels (e.g., 'OF1', 'SP2', etc.)
pos_labels = [k for k in new_row.keys() if k not in ['Total_Salary', 'Total_Median', 'Target_Team', 'Own']]
# Track which positions have been swapped in this iteration
swapped_positions = []
# Determine number of positions to swap (1 or 2)
num_swaps = choice([1, 2, 3])
for swap_num in range(num_swaps):
# Get available positions (exclude already swapped positions)
available_positions = [pos for pos in pos_labels if pos not in swapped_positions]
if not available_positions:
break
# 50% chance to force a pitcher swap on first swap
if random() < 0.50:
# Find pitcher positions (SP for DraftKings, P for FanDuel)
pitcher_positions = [pos for pos in available_positions if pos == 'G']
if pitcher_positions:
pos_to_swap = choice(pitcher_positions)
else:
pos_to_swap = choice(available_positions)
else:
pos_to_swap = choice(available_positions)
# Add the position to our swapped tracking list
swapped_positions.append(pos_to_swap)
# Find the assigned position (e.g., 'OF', 'SP', etc.)
match = re.match(r'[A-Z]+', pos_to_swap.strip())
if match:
assigned_pos = match.group()
else:
continue
# Get current player being swapped and their team
current_player = new_row[pos_to_swap]
current_player_team = player_team_map.get(current_player)
# Count teams in current lineup (before swap)
current_lineup_players = [new_row[pos] for pos in pos_labels]
current_teams = set(player_team_map.get(p) for p in current_lineup_players if player_team_map.get(p))
# Count how many players from the current player's team are in the lineup
players_from_same_team = sum(1 for p in current_lineup_players if player_team_map.get(p) == current_player_team)
# If this player is the only one from their team, we need to be careful
must_maintain_team = players_from_same_team == 1 and len(current_teams) <= 3
# Handle special positions
if assigned_pos == 'FLEX' or assigned_pos == 'FLEX1' or assigned_pos == 'FLEX2':
util_positions = ['C', 'W', 'D']
# Get all eligible replacement players for UTIL position not already in the lineup
eligible = filtered_df[
filtered_df['eligible_positions'].apply(lambda pos_list: any(pos in pos_list for pos in util_positions)) &
(~filtered_df['Name'].isin(new_row.values()))
]
else:
# Regular position - must match exactly (including SP, P, 2B, 3B, SS, OF)
if assigned_pos == 'G':
eligible = filtered_df[
filtered_df['eligible_positions'].apply(lambda pos_list: assigned_pos in pos_list) &
(~filtered_df['Name'].isin(new_row.values())) &
(filtered_df['Opp'] != target_team)
]
else:
eligible = filtered_df[
filtered_df['eligible_positions'].apply(lambda pos_list: assigned_pos in pos_list) &
(~filtered_df['Name'].isin(new_row.values()))
]
if eligible.empty:
continue
# Filter eligible players to maintain 3-team constraint
if must_maintain_team:
# Must pick from same team OR ensure 3 teams remain
# Get teams that would remain after removing current player
remaining_teams = set(player_team_map.get(p) for p in current_lineup_players if p != current_player and player_team_map.get(p))
# Filter to players whose team either:
# 1. Is the same as current player (maintains that team)
# 2. Is already in remaining_teams AND remaining_teams has at least 2 other teams
def maintains_3_teams(player_team):
if player_team == current_player_team:
return True # Same team, maintains the team
if player_team in remaining_teams and len(remaining_teams) >= 3:
return True # Team already represented and we have enough teams
if player_team not in remaining_teams and len(remaining_teams) >= 2:
return True # New team, but we still have 2 other teams + this one = 3
return False
eligible = eligible[eligible['Team'].apply(maintains_3_teams)]
if eligible.empty:
continue
# Randomly select a replacement
replacement = eligible.sample(1).iloc[0]
new_row[pos_to_swap] = replacement['Name']
# Recalculate totals
# Get the DataFrame rows for all players in the new lineup
player_rows = filtered_df[filtered_df['Name'].isin([new_row[k] for k in pos_labels])]
new_row['Total_Salary'] = player_rows['Salary'].sum()
new_row['Total_Median'] = player_rows['Median'].sum()
new_row['Own'] = player_rows['Own'].sum()
result_rows.append(new_row)
# Create the final DataFrame for this team
final_df = DataFrame(result_rows)
# Get all position columns (exclude totals)
pos_labels = [col for col in final_df.columns if col not in ['Total_Salary', 'Total_Median', 'Target_Team', 'Own']]
# Count target team players in each row
def count_target_team(row):
return sum(1 for pos in pos_labels if player_team_map.get(row[pos], None) == target_team)
final_df['Stack'] = final_df.apply(count_target_team, axis=1)
final_df = final_df.drop_duplicates(subset=['Total_Median', 'Total_Salary'])
final_df = final_df[final_df['Total_Salary'] <= salary_cap]
final_df = final_df[final_df['Stack'] <= max_skaters]
final_df = final_df.sort_values(by='Total_Median', ascending=False)
# Add to all results
all_team_results.append(final_df)
return all_team_results
def format_optimals(all_team_results: list, model_source: DataFrame, required_positions: list, player_team_map):
roo_frame = model_source.copy()
combined_df = pd_concat(all_team_results, ignore_index=True)
combined_df = combined_df.sort_values(by='Total_Median', ascending=False)
# Create the final DataFrame with the specified column format
final_formatted_df = DataFrame()
# Get all position columns (exclude totals and other metadata)
pos_columns = [col for col in combined_df.columns if col not in ['Total_Salary', 'Total_Median', 'Target_Team', 'Stack', 'Own']]
# Create a mapping from player name to team for fast lookup
player_team_map = roo_frame.set_index('Name')['Team'].to_dict()
player_salary_map = roo_frame.set_index('Name')['Salary'].to_dict()
player_median_map = roo_frame.set_index('Name')['Median'].to_dict()
player_own_map = roo_frame.set_index('Name')['Own'].to_dict()
# Process each row to create the formatted structure
formatted_rows = []
for _, row in combined_df.iterrows():
new_row = {}
# Basic columns
new_row['salary'] = row['Total_Salary']
new_row['proj'] = row['Total_Median']
new_row['Own'] = row['Own']
new_row['Team'] = row['Target_Team']
new_row['Team_count'] = row['Stack']
# Calculate Secondary team (second most frequent team)
team_counts = {}
for pos in pos_columns:
player = row[pos]
if player in player_team_map:
team = player_team_map[player]
team_counts[team] = team_counts.get(team, 0) + 1
# Find the second most frequent team (excluding the target team)
sorted_teams = sorted(team_counts.items(), key=lambda x: x[1], reverse=True)
secondary_team = 'None'
secondary_count = 0
for team, count in sorted_teams:
if team != row['Target_Team']:
secondary_team = team
secondary_count = count
break
new_row['Secondary'] = secondary_team
new_row['Secondary_count'] = secondary_count
# Add position columns
for pos in pos_columns:
new_row[pos] = row[pos]
formatted_rows.append(new_row)
final_formatted_df = DataFrame(formatted_rows)
# Ensure all required position columns exist (in case some are missing)
for pos in required_positions:
if pos not in final_formatted_df.columns:
final_formatted_df[pos] = ''
# Reorder columns to match the specified format
column_order = ['salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own'] + required_positions
final_formatted_df = final_formatted_df[column_order]
return final_formatted_df
def build_prop_betting_table(db):
try:
sh = gc.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('Player_Data_Master')
df = DataFrame(worksheet.get_values())
except:
sh = gc2.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('Player_Data_Master')
df = DataFrame(worksheet.get_values())
df.columns = df.iloc[0]
df = df[1:]
df = df.reset_index(drop=True)
df = df.replace([" ", " - ", "", " "], np_nan)
df = df.set_axis(['FantasyDataID', 'FantasyDataName', 'Player', 'Health', 'Team Full', 'Team', 'Opp', 'Line', 'PP Unit', 'GameInfo',
'Position', 'FD Position', 'Salary', 'Final DK Projection', 'DK uploadID', 'FD Salary', 'Final FD Projection',
'FD uploadID', 'Goals', 'Assists', 'Saves', 'Goals Against', 'MainSlateDK', 'MainSlateFD', 'Line_Conf', 'PPR_Conf', 'DKO_raw', 'Own', 'FDO_Raw', 'FD_Own', 'Team_Total',
'Player SOG', 'Goals_Baseline', 'Player Goals', 'Player Assists', 'Player TP', 'Player Saves', 'Player Blocks', 'Median', 'FD_Median'], axis=1)
df = df.dropna(subset='FantasyDataName')
df['Team_Total'] = df['Team_Total'].astype(float)
df['Player SOG'] = df['Player SOG'].astype(float)
df['Player Goals'] = df['Player Goals'].astype(float)
df['Player Assists'] = df['Player Assists'].astype(float)
df['Player TP'] = df['Player TP'].astype(float)
df['Player Saves'] = df['Player Saves'].astype(float)
df['Player Blocks'] = df['Player Blocks'].astype(float)
df.replace(np_nan, 0, inplace=True)
prop_table = df[['Player', 'Position', 'Team', 'Opp', 'Team_Total', 'Player SOG', 'Player Goals', 'Player Assists', 'Player TP', 'Player Blocks', 'Player Saves']]
prop_table = prop_table.sort_values(by='Player TP', ascending=False)
prop_table['Player'] = prop_table['Player'].replace(wrong_names, right_names)
worksheet = sh.worksheet('Prop_Betting_Table')
worksheet.batch_clear(['A:J'])
worksheet.update([prop_table.columns.values.tolist()] + prop_table.values.tolist())
collection = db['Prop_Betting_Table']
prop_table.reset_index(inplace=True)
chunk_size = 100000
collection.drop()
for i in range(0, len(prop_table), chunk_size):
for _ in range(5):
try:
df_chunk = prop_table.iloc[i:i + chunk_size]
collection.insert_many(df_chunk.to_dict('records'), ordered=False)
break
except Exception as e:
st.write(f"Retry due to error: {e}")
time_sleep(1)
#------ BUILD PLAYER LEVEL BASIC OUTCOMES ------#
def build_dk_player_level_basic_outcomes(slate_info, dk_player_hold, fd_player_hold, db):
try:
sh = gc.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('Player_Data_Master')
raw_df = DataFrame(worksheet.get_values())
except:
sh = gc2.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('Player_Data_Master')
raw_df = DataFrame(worksheet.get_values())
raw_df.columns = raw_df.iloc[0]
raw_df = raw_df[1:]
raw_df = raw_df.reset_index(drop=True)
raw_df = raw_df.replace([" ", " - ", "", " "], np_nan)
dk_df = raw_df.set_axis(['FantasyDataID', 'FantasyDataName', 'Player', 'Health', 'Team Full', 'Team', 'Opp', 'Line', 'PP Unit', 'GameInfo',
'Position', 'FD Position', 'Salary', 'Final DK Projection', 'DK uploadID', 'FD Salary', 'Final FD Projection',
'FD uploadID', 'Goals', 'Assists', 'Saves', 'Goals Against', 'MainSlateDK', 'MainSlateFD', 'Line_Conf', 'PPR_Conf', 'DKO_raw', 'Own', 'FDO_Raw', 'FD_Own', 'Team_Total',
'Player SOG', 'Goals_Baseline', 'Player Goals', 'Player Assists', 'Player TP', 'Player Saves', 'Player Blocks', 'Median', 'FD_Median'], axis=1)
dk_df['DK uploadID'] = dk_df['DK uploadID'].str.replace(',', '').str.split('.').str[0]
dk_df['DK uploadID'] = dk_df['DK uploadID'].str.replace('.0', '')
for wrong, right in zip(wrong_names, right_names):
dk_df['DK uploadID'] = dk_df['DK uploadID'].str.replace(wrong, right)
dk_id_dict = dict(zip(dk_df['Player'].str.strip(), dk_df['DK uploadID'].str.strip()))
line_dict = dict(zip(dk_df['Player'].str.strip(), dk_df['Line'].str.strip()))
pp_dict = dict(zip(dk_df['Player'].str.strip(), dk_df['PP Unit'].str.strip()))
dk_df.dropna(subset='Position', inplace=True)
dk_df.dropna(subset='Median', inplace=True)
dk_df = dk_df[dk_df['Opp'] != '']
dk_df['Median'].replace(' - ', 0, inplace=True)
dk_df['Salary'] = dk_df['Salary'].str.replace(',', '')
dk_df['Salary'] = dk_df['Salary'].str.strip()
dk_df['Salary'].replace('', np_nan, inplace=True)
dk_df['Salary'] = dk_df['Salary'].astype(float)
dk_df['Median'] = dk_df['Median'].astype(float)
dk_df['Median'] = np_where(dk_df['Median'] >= 50, dk_df['Median'] / 5, dk_df['Median'])
dk_df['Own'] = dk_df['Own'].astype(float)
dk_df['FD_Own'] = dk_df['FD_Own'].astype(float)
dk_df['Team_Total'] = dk_df['Team_Total'].astype(float)
dk_df['Player SOG'] = dk_df['Player SOG'].astype(float)
dk_df['Player Goals'] = dk_df['Player Goals'].astype(float)
dk_df['Player Assists'] = dk_df['Player Assists'].astype(float)
dk_df['Player TP'] = dk_df['Player TP'].astype(float)
dk_df['Player Saves'] = dk_df['Player Saves'].astype(float)
dk_df = dk_df.loc[dk_df['Median'] > 0]
dk_df['Own'].replace(nan_value, 0, inplace=True)
dk_df.replace(np_nan, 0, inplace=True)
dk_df['Player'] = dk_df['Player'].replace(wrong_names, right_names)
fd_df = raw_df.set_axis(['FantasyDataID', 'FantasyDataName', 'Player', 'Health', 'Team Full', 'Team', 'Opp', 'Line', 'PP Unit', 'GameInfo',
'DK Position', 'Position', 'DK Salary', 'Final DK Projection', 'DK uploadID', 'Salary', 'Final FD Projection',
'FD uploadID', 'Goals', 'Assists', 'Saves', 'Goals Against', 'MainSlateDK', 'MainSlateFD', 'Line_Conf', 'PPR_Conf', 'DKO_raw', 'DK_Own', 'FDO_Raw', 'Own', 'Team_Total',
'Player SOG', 'Goals_Baseline', 'Player Goals', 'Player Assists', 'Player TP', 'Player Saves', 'Player Blocks', 'DK_Median', 'Median'], axis=1)
fd_df.dropna(subset='Position', inplace=True)
fd_df.dropna(subset='Median', inplace=True)
fd_df = fd_df[fd_df['Opp'] != '']
fd_df['Median'].replace(' - ', 0, inplace=True)
fd_df['Salary'] = fd_df['Salary'].str.replace(',', '')
fd_df['Salary'] = fd_df['Salary'].str.strip()
fd_df['Salary'].replace('', np_nan, inplace=True)
fd_df['Salary'] = fd_df['Salary'].astype(float)
fd_df['Median'] = fd_df['Median'].astype(float)
fd_df['Median'] = np_where(fd_df['Median'] >= 50, fd_df['Median'] / 5, fd_df['Median'])
fd_df['Own'] = fd_df['Own'].astype(float)
fd_df = fd_df.loc[fd_df['Median'] > 0]
fd_df['Own'].replace(nan_value, 0, inplace=True)
fd_df['Player'] = fd_df['Player'].replace(wrong_names, right_names)
fd_id_dict = dict(zip(fd_df['Player'].str.strip(), fd_df['FD uploadID'].str.strip()))
prop_table = dk_df[['Player', 'Position', 'Team', 'Opp', 'Team_Total', 'Player SOG', 'Player Goals', 'Player Assists', 'Player TP', 'Player Saves']]
prop_table = prop_table.sort_values(by='Player TP', ascending=False)
pred_dicts = {}
for slates in slate_options:
if slates == 'Main Slate':
roo_team_list = slate_info['DK_Main_Dumb'].dropna().values.tolist()
elif slates == 'Secondary Slate':
roo_team_list = slate_info['DK_Secondary_Dumb'].dropna().values.tolist()
elif slates == 'Auxiliary Slate':
roo_team_list = slate_info['DK_Third_Dumb'].dropna().values.tolist()
basic_own_df = dk_df.copy()
basic_own_df = basic_own_df[basic_own_df['Team'].isin(roo_team_list)]
basic_own_df['name_team'] = basic_own_df['Player'] + basic_own_df['Position']
basic_own_df['Combo_powered'] = basic_own_df['Own']
own_dict = dict(zip(basic_own_df.Player, basic_own_df.Own))
pred_dict = dict(zip(basic_own_df.Player.str.strip(), basic_own_df.Combo_powered))
pred_dicts[slates] = pred_dict
basic_team_dict = dict(zip(basic_own_df.name_team, basic_own_df.Team))
basic_opp_dict = dict(zip(basic_own_df.Player, basic_own_df.Opp))
flex_file = basic_own_df.copy()
flex_file['Floor_raw'] = flex_file['Median'] * .25
flex_file['Ceiling_raw'] = flex_file['Median'] * 2
flex_file['Floor'] = np_where(flex_file['Position'] == 'G', flex_file['Median'] * .5, flex_file['Floor_raw'])
flex_file['Floor'] = np_where(flex_file['Position'] == 'D', flex_file['Median'] * .1, flex_file['Floor_raw'])
flex_file['Ceiling'] = np_where(flex_file['Position'] == 'G', flex_file['Median'] * 1.75, flex_file['Ceiling_raw'])
flex_file['Ceiling'] = np_where(flex_file['Position'] == 'D', flex_file['Median'] * 1.75, flex_file['Ceiling_raw'])
flex_file['STD'] = flex_file['Median'] / 3
flex_file = flex_file[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD']]
flex_file = flex_file.reset_index(drop=True)
hold_file = flex_file.copy()
overall_file = flex_file.copy()
salary_file = flex_file.copy()
try:
overall_median_gpu = np_array(overall_file['Median'])
overall_std_gpu = np_array(overall_file['STD'])
overall_salary_gpu = np_array(overall_file['Salary'])
data_shape = (len(overall_file['Player']), total_sims) # Example: 1000 rows
salary_array = np_zeros(data_shape)
sim_array = np_zeros(data_shape)
for x in range(0, total_sims):
result_gpu = overall_salary_gpu
salary_array[:, x] = result_gpu
cupy_array = salary_array
salary_file = salary_file.reset_index(drop=True)
salary_cupy = DataFrame(cupy_array, columns=list(range(0, total_sims)))
salary_check_file = pd_concat([salary_file, salary_cupy], axis=1)
except:
for x in range(0,total_sims):
salary_file[x] = salary_file['Salary']
salary_check_file = salary_file.copy()
salary_file=salary_check_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
salary_file = salary_file.div(1000)
try:
for x in range(0, total_sims):
result_gpu = np_random.normal(overall_median_gpu, overall_std_gpu)
sim_array[:, x] = result_gpu
add_array = sim_array
overall_file = overall_file.reset_index(drop=True)
df2 = DataFrame(add_array, columns=list(range(0, total_sims)))
check_file = pd_concat([overall_file, df2], axis=1)
except:
for x in range(0,total_sims):
overall_file[x] = np_random.normal(overall_file['Median'],overall_file['STD'])
check_file = overall_file.copy()
overall_file=check_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
players_only = hold_file[['Player']]
raw_lineups_file = players_only
for x in range(0,total_sims):
maps_dict = {'proj_map':dict(zip(hold_file.Player,overall_file[x]))}
raw_lineups_file[x] = sum([raw_lineups_file['Player'].map(maps_dict['proj_map'])])
players_only[x] = raw_lineups_file[x].rank(ascending=False)
players_only=players_only.drop(['Player'], axis=1)
salary_2x_check = (overall_file - (salary_file*2))
salary_3x_check = (overall_file - (salary_file*3))
salary_4x_check = (overall_file - (salary_file*4))
players_only['Average_Rank'] = players_only.mean(axis=1)
players_only['Top_finish'] = players_only[players_only == 1].count(axis=1)/total_sims
players_only['Top_5_finish'] = players_only[players_only <= 5].count(axis=1)/total_sims
players_only['Top_10_finish'] = players_only[players_only <= 10].count(axis=1)/total_sims
players_only['20+%'] = overall_file[overall_file >= 20].count(axis=1)/float(total_sims)
players_only['2x%'] = salary_2x_check[salary_2x_check >= 1].count(axis=1)/float(total_sims)
players_only['3x%'] = salary_3x_check[salary_3x_check >= 1].count(axis=1)/float(total_sims)
players_only['4x%'] = salary_4x_check[salary_4x_check >= 1].count(axis=1)/float(total_sims)
players_only['Player'] = hold_file[['Player']]
final_outcomes = players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%']]
final_Proj = merge(hold_file, final_outcomes, on="Player")
final_Proj = final_Proj[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%']]
final_Proj['Own'] = final_Proj['Player'].str.strip().map(pred_dict)
final_Proj = final_Proj.replace('', np_nan)
final_Proj = final_Proj.dropna(subset=['Own'])
final_Proj['Own'] = final_Proj['Own'].astype('float')
own_norm = 900 / final_Proj['Own'].sum()
final_Proj['Own'] = final_Proj['Own'] * own_norm
final_Proj['Small Field Own%'] = final_Proj['Own'] + (.2 * (final_Proj['Own'] - final_Proj['Own'].mean()))
own_norm = 900 / final_Proj['Small Field Own%'].sum()
final_Proj['Small Field Own%'] = final_Proj['Small Field Own%'] * own_norm
final_Proj['Large Field Own%'] = final_Proj['Own'] - (.2 * (final_Proj['Own'] - final_Proj['Own'].mean()))
own_norm = 900 / final_Proj['Large Field Own%'].sum()
final_Proj['Large Field Own%'] = final_Proj['Large Field Own%'] * own_norm
final_Proj['Cash Own%'] = final_Proj['Own'] + (.33 * (final_Proj['Own'] - final_Proj['Own'].mean()))
own_norm = 900 / final_Proj['Cash Own%'].sum()
final_Proj['Cash Own%'] = final_Proj['Cash Own%'] * own_norm
final_Proj['Own'] = final_Proj['Own'].clip(upper=85, lower=.01)
final_Proj['Small Field Own%'] = final_Proj['Small Field Own%'].clip(upper=95, lower=.01)
final_Proj['Large Field Own%'] = final_Proj['Large Field Own%'].clip(upper=80, lower=.1)
final_Proj['Cash Own%'] = final_Proj['Cash Own%'].clip(upper=99, lower=0)
final_Proj['name_team'] = final_Proj['Player'] + final_Proj['Position']
final_Proj['Team'] = final_Proj['name_team'].map(basic_team_dict)
final_Proj['Opp'] = final_Proj['Player'].map(basic_opp_dict)
final_Proj['Line'] = final_Proj['Player'].str.strip().map(line_dict)
final_Proj['PP Unit'] = final_Proj['Player'].str.strip().map(pp_dict)
final_Proj = final_Proj[['Player', 'Position', 'Line', 'PP Unit', 'Team', 'Opp', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'Own', 'Small Field Own%', 'Large Field Own%', 'Cash Own%']]
final_Proj['CPT_Own'] = final_Proj['Own'] / 6
final_Proj['Site'] = 'Draftkings'
final_Proj['Type'] = 'Basic'
final_Proj['Slate'] = slates
final_Proj = final_Proj.drop_duplicates(subset='Player')
final_Proj_basic_dk = final_Proj.sort_values(by='Median', ascending=False)
dk_player_hold = pd_concat([dk_player_hold, final_Proj_basic_dk])
dk_player_hold =dk_player_hold.replace([np_nan, np_inf, -np_inf], '')
for slates in slate_options:
if slates == 'Main Slate':
roo_team_list = slate_info['FD_Main_Dumb'].dropna().values.tolist()
elif slates == 'Secondary Slate':
roo_team_list = slate_info['FD_Secondary_Dumb'].dropna().values.tolist()
elif slates == 'Auxiliary Slate':
roo_team_list = slate_info['FD_Third_Dumb'].dropna().values.tolist()
basic_own_df = fd_df.copy()
basic_own_df = basic_own_df[basic_own_df['Team'].isin(roo_team_list)]
basic_own_df['name_team'] = basic_own_df['Player'] + basic_own_df['Position']
def calculate_ownership(df, position):
# Filter the dataframe based on the position
frame = df[df['Position'].str.contains(position)]
frame['Base Own%'] = np_where(
(frame['Own'] - frame['Own'].mean() >= 0),
frame['Own'] * (5 * (frame['Own'] - (frame['Own'].mean() / 1.5)) / 100) + frame['Own'].mean(),
frame['Own']
)
frame['Base Own%'] = np_where(
frame['Base Own%'] > 75,
75,
frame['Base Own%']
)
# Calculate Small Field Own%
frame['Small Field Own%'] = np_where(
(frame['Own'] - frame['Own'].mean() >= 0),
frame['Own'] * (6 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
frame['Own']
)
frame['Small Field Own%'] = np_where(
frame['Small Field Own%'] > 75,
75,
frame['Small Field Own%']
)
# Calculate Large Field Own%
frame['Large Field Own%'] = np_where(
(frame['Own'] - frame['Own'].mean() >= 0),
frame['Own'] * (2.5 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
frame['Own']
)
frame['Large Field Own%'] = np_where(
frame['Large Field Own%'] > 75,
75,
frame['Large Field Own%']
)
# Calculate Cash Own%
frame['Cash Own%'] = np_where(
(frame['Own'] - frame['Own'].mean() >= 0),
frame['Own'] * (8 * (frame['Own'] - frame['Own'].mean()) / 100) + frame['Own'].mean(),
frame['Own']
)
frame['Cash Own%'] = np_where(
frame['Cash Own%'] > 75,
75,
frame['Cash Own%']
)
return frame
# Apply the function to each dataframe
w_frame = calculate_ownership(basic_own_df, 'W')
c_frame = calculate_ownership(basic_own_df, 'C')
d_frame = calculate_ownership(basic_own_df, 'D')
g_frame = calculate_ownership(basic_own_df, 'G')
w_reg_norm_var = 295 / w_frame['Base Own%'].sum()
w_small_norm_var = 295 / w_frame['Small Field Own%'].sum()
w_large_norm_var = 295 / w_frame['Large Field Own%'].sum()
w_cash_norm_var = 295 / w_frame['Cash Own%'].sum()
w_frame['Own'] = w_frame['Base Own%'] * w_reg_norm_var
w_frame['Small Field Own%'] = w_frame['Small Field Own%'] * w_small_norm_var
w_frame['Large Field Own%'] = w_frame['Large Field Own%'] * w_large_norm_var
w_frame['Cash Own%'] = w_frame['Cash Own%'] * w_cash_norm_var
c_reg_norm_var = 295 / c_frame['Base Own%'].sum()
c_small_norm_var = 295 / c_frame['Small Field Own%'].sum()
c_large_norm_var = 295 / c_frame['Large Field Own%'].sum()
c_cash_norm_var = 295 / c_frame['Cash Own%'].sum()
c_frame['Own'] = c_frame['Base Own%'] * c_reg_norm_var
c_frame['Small Field Own%'] = c_frame['Small Field Own%'] * c_small_norm_var
c_frame['Large Field Own%'] = c_frame['Large Field Own%'] * c_large_norm_var
c_frame['Cash Own%'] = c_frame['Cash Own%'] * c_cash_norm_var
d_reg_norm_var = 210 / d_frame['Base Own%'].sum()
d_small_norm_var = 210 / d_frame['Small Field Own%'].sum()
d_large_norm_var = 210 / d_frame['Large Field Own%'].sum()
d_cash_norm_var = 210 / d_frame['Cash Own%'].sum()
d_frame['Own'] = d_frame['Base Own%'] * d_reg_norm_var
d_frame['Small Field Own%'] = d_frame['Small Field Own%'] * d_small_norm_var
d_frame['Large Field Own%'] = d_frame['Large Field Own%'] * d_large_norm_var
d_frame['Cash Own%'] = d_frame['Cash Own%'] * d_cash_norm_var
g_reg_norm_var = 100 / g_frame['Base Own%'].sum()
g_small_norm_var = 100 / g_frame['Small Field Own%'].sum()
g_large_norm_var = 100 / g_frame['Large Field Own%'].sum()
g_cash_norm_var = 100 / g_frame['Cash Own%'].sum()
g_frame['Own'] = g_frame['Base Own%'] * g_reg_norm_var
g_frame['Small Field Own%'] = g_frame['Small Field Own%'] * g_small_norm_var
g_frame['Large Field Own%'] = g_frame['Large Field Own%'] * g_large_norm_var
g_frame['Cash Own%'] = g_frame['Cash Own%'] * g_cash_norm_var
basic_own_df = pd_concat([w_frame, c_frame, d_frame, g_frame])
basic_own_dict = dict(zip(basic_own_df.Player, basic_own_df.Own))
small_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Small Field Own%']))
large_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Large Field Own%']))
cash_own_dict = dict(zip(basic_own_df.Player, basic_own_df['Cash Own%']))
basic_team_dict = dict(zip(basic_own_df.name_team, basic_own_df.Team))
basic_opp_dict = dict(zip(basic_own_df.Player, basic_own_df.Opp))
flex_file = basic_own_df.copy()
flex_file['Floor_raw'] = flex_file['Median'] * .25
flex_file['Ceiling_raw'] = flex_file['Median'] * 2
flex_file['Floor'] = np_where(flex_file['Position'] == 'G', flex_file['Median'] * .5, flex_file['Floor_raw'])
flex_file['Floor'] = np_where(flex_file['Position'] == 'D', flex_file['Median'] * .1, flex_file['Floor_raw'])
flex_file['Ceiling'] = np_where(flex_file['Position'] == 'G', flex_file['Median'] * 1.75, flex_file['Ceiling_raw'])
flex_file['Ceiling'] = np_where(flex_file['Position'] == 'D', flex_file['Median'] * 1.75, flex_file['Ceiling_raw'])
flex_file['STD'] = flex_file['Median'] / 3
flex_file = flex_file[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD']]
flex_file = flex_file.reset_index(drop=True)
hold_file = flex_file.copy()
overall_file = flex_file.copy()
salary_file = flex_file.copy()
try:
overall_median_gpu = np_array(overall_file['Median'])
overall_std_gpu = np_array(overall_file['STD'])
overall_salary_gpu = np_array(overall_file['Salary'])
data_shape = (len(overall_file['Player']), total_sims) # Example: 1000 rows
salary_array = np_zeros(data_shape)
sim_array = np_zeros(data_shape)
for x in range(0, total_sims):
result_gpu = overall_salary_gpu
salary_array[:, x] = result_gpu
cupy_array = salary_array
salary_file = salary_file.reset_index(drop=True)
salary_cupy = DataFrame(cupy_array, columns=list(range(0, total_sims)))
salary_check_file = pd_concat([salary_file, salary_cupy], axis=1)
except:
for x in range(0,total_sims):
salary_file[x] = salary_file['Salary']
salary_check_file = salary_file.copy()
salary_file=salary_check_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
salary_file = salary_file.div(1000)
try:
for x in range(0, total_sims):
result_gpu = np_random.normal(overall_median_gpu, overall_std_gpu)
sim_array[:, x] = result_gpu
add_array = sim_array
overall_file = overall_file.reset_index(drop=True)
df2 = DataFrame(add_array, columns=list(range(0, total_sims)))
check_file = pd_concat([overall_file, df2], axis=1)
except:
for x in range(0,total_sims):
overall_file[x] = np_random.normal(overall_file['Median'],overall_file['STD'])
check_file = overall_file.copy()
overall_file=check_file.drop(['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
players_only = hold_file[['Player']]
raw_lineups_file = players_only
for x in range(0,total_sims):
maps_dict = {'proj_map':dict(zip(hold_file.Player,overall_file[x]))}
raw_lineups_file[x] = sum([raw_lineups_file['Player'].map(maps_dict['proj_map'])])
players_only[x] = raw_lineups_file[x].rank(ascending=False)
players_only=players_only.drop(['Player'], axis=1)
salary_2x_check = (overall_file - (salary_file*2))
salary_3x_check = (overall_file - (salary_file*3))
salary_4x_check = (overall_file - (salary_file*4))
players_only['Average_Rank'] = players_only.mean(axis=1)
players_only['Top_finish'] = players_only[players_only == 1].count(axis=1)/total_sims
players_only['Top_5_finish'] = players_only[players_only <= 5].count(axis=1)/total_sims
players_only['Top_10_finish'] = players_only[players_only <= 10].count(axis=1)/total_sims
players_only['20+%'] = overall_file[overall_file >= 20].count(axis=1)/float(total_sims)
players_only['2x%'] = salary_2x_check[salary_2x_check >= 1].count(axis=1)/float(total_sims)
players_only['3x%'] = salary_3x_check[salary_3x_check >= 1].count(axis=1)/float(total_sims)
players_only['4x%'] = salary_4x_check[salary_4x_check >= 1].count(axis=1)/float(total_sims)
players_only['Player'] = hold_file[['Player']]
final_outcomes = players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%']]
final_Proj = merge(hold_file, final_outcomes, on="Player")
final_Proj = final_Proj[['Player', 'Position', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%']]
final_Proj['Own'] = final_Proj['Player'].map(basic_own_dict).astype(float)
final_Proj['Small Field Own%'] = final_Proj['Player'].map(small_own_dict).astype(float)
final_Proj['Large Field Own%'] = final_Proj['Player'].map(large_own_dict).astype(float)
final_Proj['Cash Own%'] = final_Proj['Player'].map(cash_own_dict).astype(float)
final_Proj['name_team'] = final_Proj['Player'] + final_Proj['Position']
final_Proj['Team'] = final_Proj['name_team'].map(basic_team_dict)
final_Proj['Opp'] = final_Proj['Player'].map(basic_opp_dict)
final_Proj['Line'] = final_Proj['Player'].str.strip().map(line_dict)
final_Proj['PP Unit'] = final_Proj['Player'].str.strip().map(pp_dict)
final_Proj = final_Proj[['Player', 'Position', 'Line', 'PP Unit', 'Team', 'Opp', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%', 'Own', 'Small Field Own%', 'Large Field Own%', 'Cash Own%']]
final_Proj['CPT_Own'] = final_Proj['Own'] / 6
final_Proj['Site'] = 'Fanduel'
final_Proj['Type'] = 'Basic'
final_Proj['Slate'] = slates
final_Proj = final_Proj.drop_duplicates(subset='Player')
final_Proj_basic_fd = final_Proj.sort_values(by='Median', ascending=False)
fd_player_hold = pd_concat([fd_player_hold, final_Proj_basic_fd])
fd_player_hold = fd_player_hold.replace([np_nan, np_inf, -np_inf], '')
final_Proj = pd_concat([dk_player_hold, fd_player_hold])
tz = pytz_timezone('US/Central')
central_tz = datetime.now(tz)
current_time = central_tz.strftime("%H:%M:%S")
final_Proj['timestamp'] = current_time
final_Proj_basic_dk['Timestamp'] = str(date.today())
final_Proj_basic_fd['Timestamp'] = str(date.today())
solver_dk = final_Proj[final_Proj['Site'] == 'Draftkings']
solver_dk = solver_dk[solver_dk['Slate'] == 'Main Slate']
solver_dk['Player'] = solver_dk['Player'].str.strip()
solver_dk['Position'] = solver_dk['Position'].str.strip()
solver_dk['Team'] = solver_dk['Team'].str.strip()
solver_dk['Opp'] = solver_dk['Opp'].str.strip()
solver_dk['Timestamp'] = str(date.today())
solver_fd = final_Proj[final_Proj['Site'] == 'Fanduel']
solver_fd = solver_fd[solver_fd['Slate'] == 'Main Slate']
solver_fd['Player'] = solver_fd['Player'].str.strip()
solver_fd['Position'] = solver_fd['Position'].str.strip()
solver_fd['Team'] = solver_fd['Team'].str.strip()
solver_fd['Opp'] = solver_fd['Opp'].str.strip()
solver_fd['Timestamp'] = str(date.today())
final_Proj['Player'] = final_Proj['Player'].str.strip()
final_Proj['Position'] = final_Proj['Position'].str.strip()
final_Proj['Team'] = final_Proj['Team'].str.strip()
final_Proj['Opp'] = final_Proj['Opp'].str.strip()
final_Proj['player_id'] = np_where(
final_Proj['Site'] == 'Draftkings',
final_Proj['Player'] + " (" + final_Proj['Player'].map(dk_id_dict) + ")",
final_Proj['Player'].map(fd_id_dict) + ":" + final_Proj['Player']
)
final_Proj = final_Proj.dropna()
final_Proj['Player'] = final_Proj['Player'].replace(wrong_names, right_names)
worksheet = sh.worksheet('Player_Level_ROO')
worksheet.batch_clear(['A:Y'])
worksheet.update([final_Proj.columns.values.tolist()] + final_Proj.values.tolist())
collection = db['Player_Level_ROO']
final_Proj.reset_index(inplace=True)
chunk_size = 100000
collection.drop()
for i in range(0, len(final_Proj), chunk_size):
for _ in range(5):
try:
df_chunk = final_Proj.iloc[i:i + chunk_size]
collection.insert_many(df_chunk.to_dict('records'), ordered=False)
break
except Exception as e:
print(f"Retry due to error: {e}")
time_sleep(1)
print("NHL Player ROO inserted")
time.sleep(1)
sh = gc.open_by_url('https://docs.google.com/spreadsheets/d/1H7kdaxVF7Bv3kb1DSa_3Dq6OaC9ajq9UAQfVyDluXzk/edit#gid=2022043283')
worksheet = sh.worksheet('NHL DK')
worksheet.batch_clear(['A:Z'])
worksheet.update([solver_dk.columns.values.tolist()] + solver_dk.values.tolist())
time.sleep(1)
worksheet = sh.worksheet('NHL FD')
worksheet.batch_clear(['A:Z'])
worksheet.update([solver_fd.columns.values.tolist()] + solver_fd.values.tolist())
time.sleep(1)
return final_Proj, pred_dicts
#------ BUILD STACK MATRIX BASIC OUTCOMES ------#
def build_dk_stack_matrix_basic_outcomes(slate_info, dk_stacks_hold, own_dict):
try:
sh = gc.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('DK_Stack_Matrix')
stacks_df = DataFrame(worksheet.get_values())
stacks_df.columns = stacks_df.iloc[0]
except:
sh = gc2.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('DK_Stack_Matrix')
stacks_df = DataFrame(worksheet.get_values())
stacks_df.columns = stacks_df.iloc[0]
stacks_df = stacks_df[1:]
stacks_df = stacks_df.reset_index(drop=True)
stacks_df = stacks_df[['Line', 'SK1', 'SK2', 'SK3', 'Cost', 'Team Total', 'TP/$', 'Projection', 'Own']]
stacks_df.replace("", nan_value, inplace=True)
stacks_df.dropna(subset=['Cost'], inplace=True)
stacks_df['Cost'] = stacks_df['Cost'].astype(int)
stacks_df['Team Total'] = stacks_df['Team Total'].astype(float)
stacks_df['TP/$'] = stacks_df['TP/$'].astype(float)
stacks_df['Projection'] = stacks_df['Projection'].astype(float)
stacks_df['SK1'] = stacks_df['SK1'].replace(wrong_names, right_names)
stacks_df['SK2'] = stacks_df['SK2'].replace(wrong_names, right_names)
stacks_df['SK3'] = stacks_df['SK3'].replace(wrong_names, right_names)
for slates in slate_options:
if slates == 'Main Slate':
roo_team_list = slate_info['DK_Main'].str.strip().dropna().values.tolist()
elif slates == 'Secondary Slate':
roo_team_list = slate_info['DK_Secondary'].str.strip().dropna().values.tolist()
elif slates == 'Auxiliary Slate':
roo_team_list = slate_info['DK_Third'].str.strip().dropna().values.tolist()
basic_stack_own_df = stacks_df.copy()
basic_stack_own_df = basic_stack_own_df[basic_stack_own_df['Line'].str.contains('|'.join(roo_team_list))]
try:
stacks_flex_file = basic_stack_own_df[['Line', 'SK1', 'SK2', 'SK3', 'Cost', 'Team Total', 'TP/$', 'Projection']]
stacks_flex_file.rename(columns={"Line": "Player", "Projection": "Median", "Cost": "Salary"}, inplace = True)
except:
stacks_flex_file = basic_stack_own_df[['Player', 'SK1', 'SK2', 'SK3', 'Cost', 'Team Total', 'TP/$', 'Projection']]
stacks_flex_file.rename(columns={"Projection": "Median", "Cost": "Salary"}, inplace = True)
stacks_flex_file['Floor'] = stacks_flex_file['Median'] * .25
stacks_flex_file['Ceiling'] = stacks_flex_file['Median'] * 2
stacks_flex_file['STD'] = (stacks_flex_file['Median'] / 3)
stacks_flex_file = stacks_flex_file[['Player', 'SK1', 'SK2', 'SK3', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD']]
stacks_flex_file = stacks_flex_file.reset_index(drop=True)
stacks_hold_file = stacks_flex_file.copy()
stacks_overall_file = stacks_flex_file.copy()
stacks_salary_file = stacks_flex_file.copy()
stacks_total_sims = total_sims
try:
stacks_overall_median_gpu = np_array(stacks_overall_file['Median'])
stacks_overall_std_gpu = np_array(stacks_overall_file['STD'])
stacks_overall_salary_gpu = np_array(stacks_overall_file['Salary'])
stacks_data_shape = (len(stacks_overall_file['Player']), stacks_total_sims)
stacks_salary_array = np_zeros(stacks_data_shape)
stacks_sim_array = np_zeros(stacks_data_shape)
for x in range(0, stacks_total_sims):
stacks_result_gpu = stacks_overall_salary_gpu
stacks_salary_array[:, x] = stacks_result_gpu
stacks_cupy_array = stacks_salary_array
stacks_salary_file = stacks_salary_file.reset_index(drop=True)
stacks_salary_cupy = DataFrame(stacks_cupy_array, columns=list(range(0, stacks_total_sims)))
stacks_salary_check_file = pd_concat([stacks_salary_file, stacks_salary_cupy], axis=1)
except:
for x in range(0,stacks_total_sims):
stacks_salary_file[x] = stacks_salary_file['Salary']
stacks_salary_check_file = stacks_salary_file.copy()
stacks_salary_file=stacks_salary_check_file.drop(['Player', 'SK1', 'SK2', 'SK3', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
stacks_salary_file = stacks_salary_file.div(1000)
try:
for x in range(0, stacks_total_sims):
stacks_result_gpu = np_random.normal(stacks_overall_median_gpu, stacks_overall_std_gpu)
stacks_sim_array[:, x] = stacks_result_gpu
stacks_add_array = stacks_sim_array
stacks_overall_file = stacks_overall_file.reset_index(drop=True)
stacks_df2 = DataFrame(stacks_add_array, columns=list(range(0, stacks_total_sims)))
stacks_check_file = pd_concat([stacks_overall_file, stacks_df2], axis=1)
except:
for x in range(0,stacks_total_sims):
stacks_overall_file[x] = np_random.normal(stacks_overall_file['Median'],stacks_overall_file['STD'])
stacks_check_file = stacks_overall_file.copy()
stacks_overall_file=stacks_check_file.drop(['Player', 'SK1', 'SK2', 'SK3', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
stacks_players_only = stacks_hold_file[['Player']]
stacks_raw_lineups_file = stacks_players_only
for x in range(0,stacks_total_sims):
stacks_maps_dict = {'proj_map':dict(zip(stacks_hold_file.Player,stacks_overall_file[x]))}
stacks_raw_lineups_file[x] = sum([stacks_raw_lineups_file['Player'].map(stacks_maps_dict['proj_map'])])
stacks_players_only[x] = stacks_raw_lineups_file[x].rank(ascending=False)
stacks_players_only=stacks_players_only.drop(['Player'], axis=1)
stacks_salary_2x_check = (stacks_overall_file - (stacks_salary_file*2))
stacks_salary_3x_check = (stacks_overall_file - (stacks_salary_file*3))
stacks_salary_4x_check = (stacks_overall_file - (stacks_salary_file*4))
stacks_players_only['Average_Rank'] = stacks_players_only.mean(axis=1)
stacks_players_only['Top_finish'] = stacks_players_only[stacks_players_only == 1].count(axis=1)/stacks_total_sims
stacks_players_only['Top_5_finish'] = stacks_players_only[stacks_players_only <= 5].count(axis=1)/stacks_total_sims
stacks_players_only['Top_10_finish'] = stacks_players_only[stacks_players_only <= 10].count(axis=1)/stacks_total_sims
stacks_players_only['50+%'] = stacks_overall_file[stacks_overall_file >= 50].count(axis=1)/float(stacks_total_sims)
stacks_players_only['2x%'] = stacks_salary_2x_check[stacks_salary_2x_check >= 1].count(axis=1)/float(stacks_total_sims)
stacks_players_only['3x%'] = stacks_salary_3x_check[stacks_salary_3x_check >= 1].count(axis=1)/float(stacks_total_sims)
stacks_players_only['4x%'] = stacks_salary_4x_check[stacks_salary_4x_check >= 1].count(axis=1)/float(stacks_total_sims)
stacks_players_only['Player'] = stacks_hold_file[['Player']]
stacks_final_outcomes = stacks_players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '50+%', '2x%', '3x%', '4x%']]
stacks_final_Proj = merge(stacks_hold_file, stacks_final_outcomes, on="Player")
stacks_final_Proj = stacks_final_Proj[['Player', 'SK1', 'SK2', 'SK3', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '50+%', '2x%', '3x%', '4x%']]
stacks_final_Proj['Own_SK1'] = stacks_final_Proj['SK1'].str.strip().map(own_dict[slates]).fillna(0).astype(float)
stacks_final_Proj['Own_SK2'] = stacks_final_Proj['SK2'].str.strip().map(own_dict[slates]).fillna(0).astype(float)
stacks_final_Proj['Own_SK3'] = stacks_final_Proj['SK3'].str.strip().map(own_dict[slates]).fillna(0).astype(float)
stacks_final_Proj['Own'] = stacks_final_Proj['Own_SK1'] + stacks_final_Proj['Own_SK2'] + stacks_final_Proj['Own_SK3']
stacks_final_Proj = stacks_final_Proj.drop(columns=['Own_SK1', 'Own_SK2', 'Own_SK3'])
stacks_final_Proj['Site'] = 'Draftkings'
stacks_final_Proj['Type'] = 'Basic'
stacks_final_Proj['Slate'] = slates
stacks_final_Proj_line_dk_basic = stacks_final_Proj.sort_values(by='Median', ascending=False)
dk_stacks_hold = pd_concat([dk_stacks_hold, stacks_final_Proj_line_dk_basic])
st.write(f'finished {slates} DK basic stack matrix')
st.write(f'finished DK basic stack matrix')
return dk_stacks_hold
#------ BUILD PP MATRIX BASIC OUTCOMES ------#
def build_dk_pp_stack_matrix_basic_outcomes(slate_info, dk_pp_stacks_hold, own_dict):
try:
sh = gc.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('DK_PP_Matrix')
stacks_df = DataFrame(worksheet.get_values())
except:
sh = gc2.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('DK_PP_Matrix')
stacks_df = DataFrame(worksheet.get_values())
stacks_df.columns = stacks_df.iloc[0]
stacks_df = stacks_df[1:]
stacks_df = stacks_df.reset_index(drop=True)
stacks_df = stacks_df[['Line', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Cost', 'Team Total', 'TP/$', 'Projection', 'Own']]
stacks_df.replace("", nan_value, inplace=True)
stacks_df.dropna(subset=['Cost'], inplace=True)
stacks_df['Cost'] = stacks_df['Cost'].astype(int)
stacks_df['Team Total'] = stacks_df['Team Total'].astype(float)
stacks_df['TP/$'] = stacks_df['TP/$'].astype(float)
stacks_df['Projection'] = stacks_df['Projection'].astype(float)
stacks_df['SK1'] = stacks_df['SK1'].replace(wrong_names, right_names)
stacks_df['SK2'] = stacks_df['SK2'].replace(wrong_names, right_names)
stacks_df['SK3'] = stacks_df['SK3'].replace(wrong_names, right_names)
stacks_df['SK4'] = stacks_df['SK4'].replace(wrong_names, right_names)
stacks_df['SK5'] = stacks_df['SK5'].replace(wrong_names, right_names)
for slates in slate_options:
if slates == 'Main Slate':
roo_team_list = slate_info['DK_Main'].str.strip().dropna().values.tolist()
elif slates == 'Secondary Slate':
roo_team_list = slate_info['DK_Secondary'].str.strip().dropna().values.tolist()
elif slates == 'Auxiliary Slate':
roo_team_list = slate_info['DK_Third'].str.strip().dropna().values.tolist()
basic_PP_stack_own_df = stacks_df.copy()
basic_PP_stack_own_df = basic_PP_stack_own_df[basic_PP_stack_own_df['Line'].str.contains('|'.join(roo_team_list))]
try:
stacks_flex_file = basic_PP_stack_own_df[['Line', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Cost', 'Team Total', 'TP/$', 'Projection']]
stacks_flex_file.rename(columns={"Line": "Player", "Projection": "Median", "Cost": "Salary"}, inplace = True)
except:
stacks_flex_file = basic_PP_stack_own_df[['Player', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Cost', 'Team Total', 'TP/$', 'Projection']]
stacks_flex_file.rename(columns={"Projection": "Median", "Cost": "Salary"}, inplace = True)
stacks_flex_file['Floor'] = stacks_flex_file['Median'] * .25
stacks_flex_file['Ceiling'] = stacks_flex_file['Median'] * 2
stacks_flex_file['STD'] = (stacks_flex_file['Median'] / 3)
stacks_flex_file = stacks_flex_file[['Player', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD']]
stacks_flex_file = stacks_flex_file.reset_index(drop=True)
stacks_hold_file = stacks_flex_file.copy()
stacks_overall_file = stacks_flex_file.copy()
stacks_salary_file = stacks_flex_file.copy()
stacks_total_sims = total_sims
try:
stacks_overall_median_gpu = np_array(stacks_overall_file['Median'])
stacks_overall_std_gpu = np_array(stacks_overall_file['STD'])
stacks_overall_salary_gpu = np_array(stacks_overall_file['Salary'])
stacks_data_shape = (len(stacks_overall_file['Player']), stacks_total_sims)
stacks_salary_array = np_zeros(stacks_data_shape)
stacks_sim_array = np_zeros(stacks_data_shape)
for x in range(0, stacks_total_sims):
stacks_result_gpu = stacks_overall_salary_gpu
stacks_salary_array[:, x] = stacks_result_gpu
stacks_cupy_array = stacks_salary_array
stacks_salary_file = stacks_salary_file.reset_index(drop=True)
stacks_salary_cupy = DataFrame(stacks_cupy_array, columns=list(range(0, stacks_total_sims)))
stacks_salary_check_file = pd_concat([stacks_salary_file, stacks_salary_cupy], axis=1)
except:
for x in range(0,stacks_total_sims):
stacks_salary_file[x] = stacks_salary_file['Salary']
stacks_salary_check_file = stacks_salary_file.copy()
stacks_salary_file=stacks_salary_check_file.drop(['Player', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
stacks_salary_file = stacks_salary_file.div(1000)
try:
for x in range(0, stacks_total_sims):
stacks_result_gpu = np_random.normal(stacks_overall_median_gpu, stacks_overall_std_gpu)
stacks_sim_array[:, x] = stacks_result_gpu
stacks_add_array = stacks_sim_array
stacks_overall_file = stacks_overall_file.reset_index(drop=True)
stacks_df2 = DataFrame(stacks_add_array, columns=list(range(0, stacks_total_sims)))
stacks_check_file = pd_concat([stacks_overall_file, stacks_df2], axis=1)
except:
for x in range(0,stacks_total_sims):
stacks_overall_file[x] = np_random.normal(stacks_overall_file['Median'],stacks_overall_file['STD'])
stacks_check_file = stacks_overall_file.copy()
stacks_overall_file=stacks_check_file.drop(['Player', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
stacks_players_only = stacks_hold_file[['Player']]
stacks_raw_lineups_file = stacks_players_only
for x in range(0,stacks_total_sims):
stacks_maps_dict = {'proj_map':dict(zip(stacks_hold_file.Player,stacks_overall_file[x]))}
stacks_raw_lineups_file[x] = sum([stacks_raw_lineups_file['Player'].map(stacks_maps_dict['proj_map'])])
stacks_players_only[x] = stacks_raw_lineups_file[x].rank(ascending=False)
stacks_players_only=stacks_players_only.drop(['Player'], axis=1)
stacks_salary_2x_check = (stacks_overall_file - (stacks_salary_file*2))
stacks_salary_3x_check = (stacks_overall_file - (stacks_salary_file*3))
stacks_salary_4x_check = (stacks_overall_file - (stacks_salary_file*4))
stacks_players_only['Average_Rank'] = stacks_players_only.mean(axis=1)
stacks_players_only['Top_finish'] = stacks_players_only[stacks_players_only == 1].count(axis=1)/stacks_total_sims
stacks_players_only['Top_5_finish'] = stacks_players_only[stacks_players_only <= 5].count(axis=1)/stacks_total_sims
stacks_players_only['Top_10_finish'] = stacks_players_only[stacks_players_only <= 10].count(axis=1)/stacks_total_sims
stacks_players_only['75+%'] = stacks_overall_file[stacks_overall_file >= 75].count(axis=1)/float(stacks_total_sims)
stacks_players_only['2x%'] = stacks_salary_2x_check[stacks_salary_2x_check >= 1].count(axis=1)/float(stacks_total_sims)
stacks_players_only['3x%'] = stacks_salary_3x_check[stacks_salary_3x_check >= 1].count(axis=1)/float(stacks_total_sims)
stacks_players_only['4x%'] = stacks_salary_4x_check[stacks_salary_4x_check >= 1].count(axis=1)/float(stacks_total_sims)
stacks_players_only['Player'] = stacks_hold_file[['Player']]
stacks_final_outcomes = stacks_players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '75+%', '2x%', '3x%', '4x%']]
stacks_final_Proj = merge(stacks_hold_file, stacks_final_outcomes, on="Player")
stacks_final_Proj = stacks_final_Proj[['Player', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '75+%', '2x%', '3x%', '4x%']]
stacks_final_Proj['Own_SK1'] = stacks_final_Proj['SK1'].str.strip().map(own_dict[slates]).fillna(0).astype(float)
stacks_final_Proj['Own_SK2'] = stacks_final_Proj['SK2'].str.strip().map(own_dict[slates]).fillna(0).astype(float)
stacks_final_Proj['Own_SK3'] = stacks_final_Proj['SK3'].str.strip().map(own_dict[slates]).fillna(0).astype(float)
stacks_final_Proj['Own_SK4'] = stacks_final_Proj['SK4'].str.strip().map(own_dict[slates]).fillna(0).astype(float)
stacks_final_Proj['Own_SK5'] = stacks_final_Proj['SK5'].str.strip().map(own_dict[slates]).fillna(0).astype(float)
stacks_final_Proj['Own'] = stacks_final_Proj['Own_SK1'] + stacks_final_Proj['Own_SK2'] + stacks_final_Proj['Own_SK3'] + stacks_final_Proj['Own_SK4'] + stacks_final_Proj['Own_SK5']
stacks_final_Proj = stacks_final_Proj.drop(columns=['Own_SK1', 'Own_SK2', 'Own_SK3', 'Own_SK4', 'Own_SK5'])
stacks_final_Proj['Site'] = 'Draftkings'
stacks_final_Proj['Type'] = 'Basic'
stacks_final_Proj['Slate'] = slates
stacks_final_Proj_pp_dk_basic = stacks_final_Proj.sort_values(by='Median', ascending=False)
dk_pp_stacks_hold = pd_concat([dk_pp_stacks_hold, stacks_final_Proj_pp_dk_basic])
return dk_pp_stacks_hold
#------ BUILD STACK MATRIX BASIC OUTCOMES ------#
def build_fd_stack_matrix_basic_outcomes(slate_info, fd_stacks_hold):
try:
sh = gc.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('FD_Stack_Matrix')
stacks_df = DataFrame(worksheet.get_values())
except:
sh = gc2.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('FD_Stack_Matrix')
stacks_df = DataFrame(worksheet.get_values())
stacks_df.columns = stacks_df.iloc[0]
stacks_df = stacks_df[1:]
stacks_df = stacks_df.reset_index(drop=True)
stacks_df = stacks_df[['Line', 'SK1', 'SK2', 'SK3', 'Cost', 'Team Total', 'TP/$', 'Projection', 'Own']]
stacks_df.replace("", nan_value, inplace=True)
stacks_df.dropna(subset=['Cost'], inplace=True)
stacks_df['Cost'] = stacks_df['Cost'].astype(int)
stacks_df['Team Total'] = stacks_df['Team Total'].astype(float)
stacks_df['TP/$'] = stacks_df['TP/$'].astype(float)
stacks_df['Projection'] = stacks_df['Projection'].astype(float)
stacks_df['SK1'] = stacks_df['SK1'].replace(wrong_names, right_names)
stacks_df['SK2'] = stacks_df['SK2'].replace(wrong_names, right_names)
stacks_df['SK3'] = stacks_df['SK3'].replace(wrong_names, right_names)
for slates in slate_options:
if slates == 'Main Slate':
roo_team_list = slate_info['FD_Main'].str.strip().dropna().values.tolist()
elif slates == 'Secondary Slate':
roo_team_list = slate_info['FD_Secondary'].str.strip().dropna().values.tolist()
elif slates == 'Auxiliary Slate':
roo_team_list = slate_info['FD_Third'].str.strip().dropna().values.tolist()
basic_stack_own_df = stacks_df.copy()
basic_stack_own_df = basic_stack_own_df[basic_stack_own_df['Line'].str.contains('|'.join(roo_team_list))]
basic_stacks_own_dict = dict(zip(basic_stack_own_df.Line, basic_stack_own_df.Own))
try:
stacks_flex_file = basic_stack_own_df[['Line', 'SK1', 'SK2', 'SK3', 'Cost', 'Team Total', 'TP/$', 'Projection']]
stacks_flex_file.rename(columns={"Line": "Player", "Projection": "Median", "Cost": "Salary"}, inplace = True)
except:
stacks_flex_file = basic_stack_own_df[['Player', 'SK1', 'SK2', 'SK3', 'Cost', 'Team Total', 'TP/$', 'Projection']]
stacks_flex_file.rename(columns={"Projection": "Median", "Cost": "Salary"}, inplace = True)
stacks_flex_file['Floor'] = stacks_flex_file['Median'] * .25
stacks_flex_file['Ceiling'] = stacks_flex_file['Median'] * 2
stacks_flex_file['STD'] = (stacks_flex_file['Median'] / 3)
stacks_flex_file = stacks_flex_file[['Player', 'SK1', 'SK2', 'SK3', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD']]
stacks_flex_file = stacks_flex_file.reset_index(drop=True)
stacks_hold_file = stacks_flex_file.copy()
stacks_overall_file = stacks_flex_file.copy()
stacks_salary_file = stacks_flex_file.copy()
stacks_total_sims = total_sims
try:
stacks_overall_median_gpu = np_array(stacks_overall_file['Median'])
stacks_overall_std_gpu = np_array(stacks_overall_file['STD'])
stacks_overall_salary_gpu = np_array(stacks_overall_file['Salary'])
stacks_data_shape = (len(stacks_overall_file['Player']), stacks_total_sims)
stacks_salary_array = np_zeros(stacks_data_shape)
stacks_sim_array = np_zeros(stacks_data_shape)
for x in range(0, stacks_total_sims):
stacks_result_gpu = stacks_overall_salary_gpu
stacks_salary_array[:, x] = stacks_result_gpu
stacks_cupy_array = stacks_salary_array
stacks_salary_file = stacks_salary_file.reset_index(drop=True)
stacks_salary_cupy = DataFrame(stacks_cupy_array, columns=list(range(0, stacks_total_sims)))
stacks_salary_check_file = pd_concat([stacks_salary_file, stacks_salary_cupy], axis=1)
except:
for x in range(0,stacks_total_sims):
stacks_salary_file[x] = stacks_salary_file['Salary']
stacks_salary_check_file = stacks_salary_file.copy()
stacks_salary_file=stacks_salary_check_file.drop(['Player', 'SK1', 'SK2', 'SK3', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
stacks_salary_file = stacks_salary_file.div(1000)
try:
for x in range(0, stacks_total_sims):
stacks_result_gpu = np_random.normal(stacks_overall_median_gpu, stacks_overall_std_gpu)
stacks_sim_array[:, x] = stacks_result_gpu
stacks_add_array = stacks_sim_array
stacks_overall_file = stacks_overall_file.reset_index(drop=True)
stacks_df2 = DataFrame(stacks_add_array, columns=list(range(0, stacks_total_sims)))
stacks_check_file = pd_concat([stacks_overall_file, stacks_df2], axis=1)
except:
for x in range(0,stacks_total_sims):
stacks_overall_file[x] = np_random.normal(stacks_overall_file['Median'],stacks_overall_file['STD'])
stacks_check_file = stacks_overall_file.copy()
stacks_overall_file=stacks_check_file.drop(['Player', 'SK1', 'SK2', 'SK3', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
stacks_players_only = stacks_hold_file[['Player']]
stacks_raw_lineups_file = stacks_players_only
for x in range(0,stacks_total_sims):
stacks_maps_dict = {'proj_map':dict(zip(stacks_hold_file.Player,stacks_overall_file[x]))}
stacks_raw_lineups_file[x] = sum([stacks_raw_lineups_file['Player'].map(stacks_maps_dict['proj_map'])])
stacks_players_only[x] = stacks_raw_lineups_file[x].rank(ascending=False)
stacks_players_only=stacks_players_only.drop(['Player'], axis=1)
stacks_salary_2x_check = (stacks_overall_file - (stacks_salary_file*2))
stacks_salary_3x_check = (stacks_overall_file - (stacks_salary_file*3))
stacks_salary_4x_check = (stacks_overall_file - (stacks_salary_file*4))
stacks_players_only['Average_Rank'] = stacks_players_only.mean(axis=1)
stacks_players_only['Top_finish'] = stacks_players_only[stacks_players_only == 1].count(axis=1)/stacks_total_sims
stacks_players_only['Top_5_finish'] = stacks_players_only[stacks_players_only <= 5].count(axis=1)/stacks_total_sims
stacks_players_only['Top_10_finish'] = stacks_players_only[stacks_players_only <= 10].count(axis=1)/stacks_total_sims
stacks_players_only['50+%'] = stacks_overall_file[stacks_overall_file >= 50].count(axis=1)/float(stacks_total_sims)
stacks_players_only['2x%'] = stacks_salary_2x_check[stacks_salary_2x_check >= 1].count(axis=1)/float(stacks_total_sims)
stacks_players_only['3x%'] = stacks_salary_3x_check[stacks_salary_3x_check >= 1].count(axis=1)/float(stacks_total_sims)
stacks_players_only['4x%'] = stacks_salary_4x_check[stacks_salary_4x_check >= 1].count(axis=1)/float(stacks_total_sims)
stacks_players_only['Player'] = stacks_hold_file[['Player']]
stacks_final_outcomes = stacks_players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '50+%', '2x%', '3x%', '4x%']]
stacks_final_Proj = merge(stacks_hold_file, stacks_final_outcomes, on="Player")
stacks_final_Proj = stacks_final_Proj[['Player', 'SK1', 'SK2', 'SK3', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '50+%', '2x%', '3x%', '4x%']]
stacks_final_Proj['Own'] = stacks_final_Proj['Player'].map(basic_stacks_own_dict).astype(float)
stacks_final_Proj['Site'] = 'Fanduel'
stacks_final_Proj['Type'] = 'Basic'
stacks_final_Proj['Slate'] = slates
stacks_final_Proj_line_fd_basic = stacks_final_Proj.sort_values(by='Median', ascending=False)
fd_stacks_hold = pd_concat([fd_stacks_hold, stacks_final_Proj_line_fd_basic])
st.write(f'finished {slates} FD basic stack matrix')
st.write(f'finished FD basic stack matrix')
return fd_stacks_hold
#------ BUILD PP MATRIX BASIC OUTCOMES ------#
def build_fd_pp_stack_matrix_basic_outcomes(slate_info, fd_pp_stacks_hold):
try:
sh = gc.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('FD_PP_Matrix')
stacks_df = DataFrame(worksheet.get_values())
except:
sh = gc2.open_by_url(NHL_Master_hold)
worksheet = sh.worksheet('FD_PP_Matrix')
stacks_df = DataFrame(worksheet.get_values())
stacks_df.columns = stacks_df.iloc[0]
stacks_df = stacks_df[1:]
stacks_df = stacks_df.reset_index(drop=True)
stacks_df = stacks_df[['Line', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Cost', 'Team Total', 'TP/$', 'Projection', 'Own']]
stacks_df.replace("", nan_value, inplace=True)
stacks_df.dropna(subset=['Cost'], inplace=True)
stacks_df['Cost'] = stacks_df['Cost'].astype(int)
stacks_df['Team Total'] = stacks_df['Team Total'].astype(float)
stacks_df['TP/$'] = stacks_df['TP/$'].astype(float)
stacks_df['Projection'] = stacks_df['Projection'].astype(float)
stacks_df['SK1'] = stacks_df['SK1'].replace(wrong_names, right_names)
stacks_df['SK2'] = stacks_df['SK2'].replace(wrong_names, right_names)
stacks_df['SK3'] = stacks_df['SK3'].replace(wrong_names, right_names)
stacks_df['SK4'] = stacks_df['SK4'].replace(wrong_names, right_names)
stacks_df['SK5'] = stacks_df['SK5'].replace(wrong_names, right_names)
for slates in slate_options:
if slates == 'Main Slate':
roo_team_list = slate_info['FD_Main'].str.strip().dropna().values.tolist()
elif slates == 'Secondary Slate':
roo_team_list = slate_info['FD_Secondary'].str.strip().dropna().values.tolist()
elif slates == 'Auxiliary Slate':
roo_team_list = slate_info['FD_Third'].str.strip().dropna().values.tolist()
basic_PP_stack_own_df = stacks_df.copy()
basic_PP_stack_own_df = basic_PP_stack_own_df[basic_PP_stack_own_df['Line'].str.contains('|'.join(roo_team_list))]
basic_PP_stacks_own_dict = dict(zip(basic_PP_stack_own_df.Line, basic_PP_stack_own_df.Own))
try:
stacks_flex_file = basic_PP_stack_own_df[['Line', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Cost', 'Team Total', 'TP/$', 'Projection']]
stacks_flex_file.rename(columns={"Line": "Player", "Projection": "Median", "Cost": "Salary"}, inplace = True)
except:
stacks_flex_file = basic_PP_stack_own_df[['Player', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Cost', 'Team Total', 'TP/$', 'Projection']]
stacks_flex_file.rename(columns={"Projection": "Median", "Cost": "Salary"}, inplace = True)
stacks_flex_file['Floor'] = stacks_flex_file['Median'] * .25
stacks_flex_file['Ceiling'] = stacks_flex_file['Median'] * 2
stacks_flex_file['STD'] = (stacks_flex_file['Median'] / 3)
stacks_flex_file = stacks_flex_file[['Player', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD']]
stacks_flex_file = stacks_flex_file.reset_index(drop=True)
stacks_hold_file = stacks_flex_file.copy()
stacks_overall_file = stacks_flex_file.copy()
stacks_salary_file = stacks_flex_file.copy()
stacks_total_sims = total_sims
try:
stacks_overall_median_gpu = np_array(stacks_overall_file['Median'])
stacks_overall_std_gpu = np_array(stacks_overall_file['STD'])
stacks_overall_salary_gpu = np_array(stacks_overall_file['Salary'])
stacks_data_shape = (len(stacks_overall_file['Player']), stacks_total_sims)
stacks_salary_array = np_zeros(stacks_data_shape)
stacks_sim_array = np_zeros(stacks_data_shape)
for x in range(0, stacks_total_sims):
stacks_result_gpu = stacks_overall_salary_gpu
stacks_salary_array[:, x] = stacks_result_gpu
stacks_cupy_array = stacks_salary_array
stacks_salary_file = stacks_salary_file.reset_index(drop=True)
stacks_salary_cupy = DataFrame(stacks_cupy_array, columns=list(range(0, stacks_total_sims)))
stacks_salary_check_file = pd_concat([stacks_salary_file, stacks_salary_cupy], axis=1)
except:
for x in range(0,stacks_total_sims):
stacks_salary_file[x] = stacks_salary_file['Salary']
stacks_salary_check_file = stacks_salary_file.copy()
stacks_salary_file=stacks_salary_check_file.drop(['Player', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
stacks_salary_file = stacks_salary_file.div(1000)
try:
for x in range(0, stacks_total_sims):
stacks_result_gpu = np_random.normal(stacks_overall_median_gpu, stacks_overall_std_gpu)
stacks_sim_array[:, x] = stacks_result_gpu
stacks_add_array = stacks_sim_array
stacks_overall_file = stacks_overall_file.reset_index(drop=True)
stacks_df2 = DataFrame(stacks_add_array, columns=list(range(0, stacks_total_sims)))
stacks_check_file = pd_concat([stacks_overall_file, stacks_df2], axis=1)
except:
for x in range(0,stacks_total_sims):
stacks_overall_file[x] = np_random.normal(stacks_overall_file['Median'],stacks_overall_file['STD'])
stacks_check_file = stacks_overall_file.copy()
stacks_overall_file=stacks_check_file.drop(['Player', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Salary', 'Floor', 'Median', 'Ceiling', 'STD'], axis=1)
stacks_players_only = stacks_hold_file[['Player']]
stacks_raw_lineups_file = stacks_players_only
for x in range(0,stacks_total_sims):
stacks_maps_dict = {'proj_map':dict(zip(stacks_hold_file.Player,stacks_overall_file[x]))}
stacks_raw_lineups_file[x] = sum([stacks_raw_lineups_file['Player'].map(stacks_maps_dict['proj_map'])])
stacks_players_only[x] = stacks_raw_lineups_file[x].rank(ascending=False)
stacks_players_only=stacks_players_only.drop(['Player'], axis=1)
stacks_salary_2x_check = (stacks_overall_file - (stacks_salary_file*2))
stacks_salary_3x_check = (stacks_overall_file - (stacks_salary_file*3))
stacks_salary_4x_check = (stacks_overall_file - (stacks_salary_file*4))
stacks_players_only['Average_Rank'] = stacks_players_only.mean(axis=1)
stacks_players_only['Top_finish'] = stacks_players_only[stacks_players_only == 1].count(axis=1)/stacks_total_sims
stacks_players_only['Top_5_finish'] = stacks_players_only[stacks_players_only <= 5].count(axis=1)/stacks_total_sims
stacks_players_only['Top_10_finish'] = stacks_players_only[stacks_players_only <= 10].count(axis=1)/stacks_total_sims
stacks_players_only['75+%'] = stacks_overall_file[stacks_overall_file >= 75].count(axis=1)/float(stacks_total_sims)
stacks_players_only['2x%'] = stacks_salary_2x_check[stacks_salary_2x_check >= 1].count(axis=1)/float(stacks_total_sims)
stacks_players_only['3x%'] = stacks_salary_3x_check[stacks_salary_3x_check >= 1].count(axis=1)/float(stacks_total_sims)
stacks_players_only['4x%'] = stacks_salary_4x_check[stacks_salary_4x_check >= 1].count(axis=1)/float(stacks_total_sims)
stacks_players_only['Player'] = stacks_hold_file[['Player']]
stacks_final_outcomes = stacks_players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '75+%', '2x%', '3x%', '4x%']]
stacks_final_Proj = merge(stacks_hold_file, stacks_final_outcomes, on="Player")
stacks_final_Proj = stacks_final_Proj[['Player', 'SK1', 'SK2', 'SK3', 'SK4', 'SK5', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '75+%', '2x%', '3x%', '4x%']]
stacks_final_Proj['Own'] = stacks_final_Proj['Player'].map(basic_PP_stacks_own_dict).astype(float)
stacks_final_Proj['Site'] = 'Fanduel'
stacks_final_Proj['Type'] = 'Basic'
stacks_final_Proj['Slate'] = slates
stacks_final_Proj_pp_fd_basic = stacks_final_Proj.sort_values(by='Median', ascending=False)
fd_pp_stacks_hold = pd_concat([fd_pp_stacks_hold, stacks_final_Proj_pp_fd_basic])
return fd_pp_stacks_hold
############----------FUNCTION FOR DRAFTKINGS NHL SEED FRAME CREATION----------############
def DK_NHL_seed_frame(db, roo_file):
wrong_team_names = ['TB', 'NJ', 'SJ', 'LA']
right_team_names = ['TBL', 'NJD', 'SJS', 'LAK']
source_frame = roo_file.copy()
source_frame.replace(['0', ''], [np_nan, np_nan], inplace=True)
source_frame = source_frame.dropna(subset='Salary')
source_frame = source_frame.dropna(subset='Median')
source_frame = source_frame[source_frame['Site'] == 'Draftkings']
baseline_proj = source_frame.copy()
for slates in slate_options:
optimal_lineups = []
Overall_Proj = baseline_proj[baseline_proj['Slate'] == slates]
Overall_Proj['salary_Value'] = (Overall_Proj['Salary'] / 1000) / Overall_Proj['Median']
Overall_Proj['proj_Value'] = Overall_Proj['Median'].rank(pct = True)
Overall_Proj['own_Value'] = Overall_Proj['Own'].rank(pct = True)
Overall_Proj['sort_Value'] = Overall_Proj[['own_Value', 'salary_Value']].mean(axis=1)
Overall_Proj['Line'] = Overall_Proj['Line'].fillna(0)
Overall_Proj['PP Unit'] = Overall_Proj['PP Unit'].fillna(0)
Overall_Proj.rename(columns={"Player": "Name"}, inplace = True)
Overall_Proj = Overall_Proj.dropna()
Overall_Proj['Team'] = Overall_Proj['Team'].replace(wrong_team_names, right_team_names)
Overall_Proj['Opp'] = Overall_Proj['Opp'].replace(wrong_team_names, right_team_names)
Overall_Proj['Name'] = Overall_Proj['Name'].replace(wrong_names, right_names)
Overall_Proj = Overall_Proj.reset_index(drop=True)
Team_list = DataFrame(Overall_Proj['Team'].unique(), columns=['Team'])
Team_list['team_var'] = Team_list.index
players_full = Overall_Proj.sort_values(by='own_Value', ascending=False)
players_median = players_full.drop_duplicates(subset ='Name', keep ='first')
players_median = pd_merge(players_median, Team_list, how='left', on='Team')
players_median['Var'] = players_median.index
# Add slate identifier and collect data for JSON export
players_median_copy = players_median.copy()
players_median_copy['Slate'] = slates
# Create maps for Go processing
players_name_map = {str(int(idx)): str(name) for idx, name in players_median_copy.set_index('Var')['Name'].items()}
players_salary_map = {str(int(idx)): int(salary) for idx, salary in players_median_copy.set_index('Var')['Salary'].items()}
players_projection_map = {str(int(idx)): float(proj) for idx, proj in players_median_copy.set_index('Var')['Median'].items()}
players_ownership_map = {str(int(idx)): float(own) for idx, own in players_median_copy.set_index('Var')['Own'].items()}
players_team_map = {str(int(idx)): str(team) for idx, team in players_median_copy.set_index('Var')['Team'].items()}
players_opp_map = {str(int(idx)): str(opp) for idx, opp in players_median_copy.set_index('Var')['Opp'].items()}
# Create output data structure for Go
output_data = {
"players_median": {
"players": [],
"maps": {
"name_map": players_name_map,
"salary_map": players_salary_map,
"projection_map": players_projection_map,
"ownership_map": players_ownership_map,
"team_map": players_team_map,
"opp_map": players_opp_map
}
}
}
# Convert players to Go struct format
for idx, row in players_median_copy.iterrows():
player = {
"id": int(row['Var']),
"name": str(row['Name']),
"position": str(row['Position']),
"salary": int(row['Salary']),
"projection": float(row['Median']),
"ownership": float(row['Own']),
"salary_value": float(row['salary_Value']),
"proj_value": float(row['proj_Value']),
"own_value": float(row['own_Value']),
"sort_value": float(row['sort_Value']),
"slate": str(row['Slate'])
}
output_data["players_median"]["players"].append(player)
# Calculate project root for file paths
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(os.path.dirname(current_dir))
dk_nhl_dir = os.path.join(project_root, 'dk_nhl_go')
os.makedirs(dk_nhl_dir, exist_ok=True)
# Write JSON data for Go processing
player_data_path = os.path.join(dk_nhl_dir, 'player_data.json')
with open(player_data_path, 'w') as f:
json.dump(output_data, f)
if slates == 'Main Slate':
collection = db['DK_NHL_name_map']
elif slates == 'Secondary Slate':
collection = db['DK_NHL_Secondary_name_map']
elif slates == 'Late Slate':
collection = db['DK_NHL_Late_name_map']
master_name_map = pd_Series(players_median.Name.values,index=players_median.Var).to_dict()
master_salary_map = pd_Series(players_median.Salary.values,index=players_median.Var).to_dict()
master_projection_map = pd_Series(players_median.Median.values,index=players_median.Var).to_dict()
master_team_num_map = pd_Series(players_median.team_var.values,index=players_median.Var).to_dict()
master_team_map = pd_Series(players_median.Team.values,index=players_median.Var).to_dict()
master_own_map = pd_Series(players_median['Own'].values,index=players_median.Var).to_dict()
master_name_index = pd_Series(players_median.Var.values, index=players_median.Name).to_dict()
position_requirements = {
'C': 2,
'W': 3,
'D': 2,
'G': 1,
'FLEX': 1
}
salary_cap = 50000
max_team_skaters = 5
# Teams to loop through
teams_to_optimize = players_median['Team'].unique().tolist()
required_positions = ['C1', 'C2', 'W1', 'W2', 'W3', 'D1', 'D2', 'G', 'FLEX']
collection.drop()
try:
# Convert dictionary to format suitable for MongoDB
mongo_docs = [{"key": k, "value": v} for k, v in master_name_map.items()]
collection.insert_many(mongo_docs, ordered=False)
except Exception as e:
st.write(f"Error inserting name map: {e}")
time_sleep(1)
optimals = init_team_results(players_median, position_requirements, salary_cap, max_team_skaters, teams_to_optimize, 1000, players_team_map)
formatted_optimals = format_optimals(optimals, players_median, required_positions, players_team_map)
for col in required_positions:
if col in formatted_optimals.columns:
formatted_optimals[col] = formatted_optimals[col].map(master_name_index).fillna(formatted_optimals[col])
# formatted_optimals['proj'] = formatted_optimals['proj'] + (formatted_optimals['proj'] * ((formatted_optimals['Team_count'] - 3) * .05))
formatted_optimals['proj'] = formatted_optimals['proj'].astype(float)
# Convert this slate's optimals to JSON format and add slate info
for idx, row in formatted_optimals.iterrows():
optimal_lineup = {
"slate": slates, # Add slate identifier
"salary": int(row['salary']),
"projection": float(row['proj']),
"team": str(row['Team']),
"team_count": int(row['Team_count']),
"secondary": str(row['Secondary']),
"secondary_count": int(row['Secondary_count']),
"ownership": float(row['Own']),
"players": [int(row['C1']), int(row['C2']), int(row['W1']),
int(row['W2']), int(row['W3']), int(row['D1']),
int(row['D2']), int(row['G']), int(row['FLEX'])]
}
optimal_lineups.append(optimal_lineup)
st.write(f"Generated {len(formatted_optimals)} optimal lineups for slate {slates}")
optimal_lineups_path = os.path.join(dk_nhl_dir, 'optimal_lineups.json')
with open(optimal_lineups_path, 'w') as f:
json.dump(optimal_lineups, f)
run_go_classic_lineup_generator("DK", "NHL")
st.write("NHL lineup generation for DK completed successfully!")
pass
############----------FUNCTION FOR FANDUEL NHL SEED FRAME CREATION----------############
def FD_NHL_seed_frame(db, roo_file):
NHL_Master_hold = 'https://docs.google.com/spreadsheets/d/1NmKa-b-2D3w7rRxwMPSchh31GKfJ1XcDI2GU8rXWnHI/edit?gid=1401252991#gid=1401252991'
wrong_team_names = ['TB', 'NJ', 'SJ', 'LA']
right_team_names = ['TBL', 'NJD', 'SJS', 'LAK']
source_frame = roo_file.copy()
source_frame.replace(['0', ''], [np_nan, np_nan], inplace=True)
source_frame = source_frame.dropna(subset='Salary')
source_frame = source_frame.dropna(subset='Median')
source_frame = source_frame[source_frame['Site'] == 'Fanduel']
baseline_proj = source_frame.copy()
for slates in slate_options:
optimal_lineups = []
Overall_Proj = baseline_proj[baseline_proj['Slate'] == slates]
Overall_Proj['salary_Value'] = (Overall_Proj['Salary'] / 1000) / Overall_Proj['Median']
Overall_Proj['proj_Value'] = Overall_Proj['Median'].rank(pct = True)
Overall_Proj['own_Value'] = Overall_Proj['Own'].rank(pct = True)
Overall_Proj['sort_Value'] = Overall_Proj[['own_Value', 'salary_Value']].mean(axis=1)
Overall_Proj['Line'] = Overall_Proj['Line'].fillna(0)
Overall_Proj['PP Unit'] = Overall_Proj['PP Unit'].fillna(0)
Overall_Proj.rename(columns={"Player": "Name"}, inplace = True)
Overall_Proj = Overall_Proj.dropna()
Overall_Proj['Name'] = Overall_Proj['Name'].replace(wrong_names, right_names)
Overall_Proj['Team'] = Overall_Proj['Team'].replace(wrong_team_names, right_team_names)
Overall_Proj = Overall_Proj.reset_index(drop=True)
Team_list = DataFrame(Overall_Proj['Team'].unique(), columns=['Team'])
Team_list['team_var'] = Team_list.index
players_full = Overall_Proj.sort_values(by='own_Value', ascending=False)
players_median = players_full.drop_duplicates(subset ='Name', keep ='first')
players_median = pd_merge(players_median, Team_list, how='left', on='Team')
players_median['Var'] = players_median.index
# Add slate identifier and collect data for JSON export
players_median_copy = players_median.copy()
players_median_copy['Slate'] = slates
# Create maps for Go processing
players_name_map = {str(int(idx)): str(name) for idx, name in players_median_copy.set_index('Var')['Name'].items()}
players_salary_map = {str(int(idx)): int(salary) for idx, salary in players_median_copy.set_index('Var')['Salary'].items()}
players_projection_map = {str(int(idx)): float(proj) for idx, proj in players_median_copy.set_index('Var')['Median'].items()}
players_ownership_map = {str(int(idx)): float(own) for idx, own in players_median_copy.set_index('Var')['Own'].items()}
players_team_map = {str(int(idx)): str(team) for idx, team in players_median_copy.set_index('Var')['Team'].items()}
players_opp_map = {str(int(idx)): str(opp) for idx, opp in players_median_copy.set_index('Var')['Opp'].items()}
# Create output data structure for Go
output_data = {
"players_median": {
"players": [],
"maps": {
"name_map": players_name_map,
"salary_map": players_salary_map,
"projection_map": players_projection_map,
"ownership_map": players_ownership_map,
"team_map": players_team_map,
"opp_map": players_opp_map
}
}
}
# Convert players to Go struct format
for idx, row in players_median_copy.iterrows():
player = {
"id": int(row['Var']),
"name": str(row['Name']),
"position": str(row['Position']),
"salary": int(row['Salary']),
"projection": float(row['Median']),
"ownership": float(row['Own']),
"salary_value": float(row['salary_Value']),
"proj_value": float(row['proj_Value']),
"own_value": float(row['own_Value']),
"sort_value": float(row['sort_Value']),
"slate": str(row['Slate'])
}
output_data["players_median"]["players"].append(player)
# Calculate project root for file paths
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(os.path.dirname(current_dir))
fd_nhl_dir = os.path.join(project_root, 'fd_nhl_go')
os.makedirs(fd_nhl_dir, exist_ok=True)
player_data_path = os.path.join(fd_nhl_dir, 'player_data.json')
with open(player_data_path, 'w') as f:
json.dump(output_data, f)
if slates == 'Main Slate':
collection = db['FD_NHL_name_map']
elif slates == 'Secondary Slate':
collection = db['FD_NHL_Secondary_name_map']
elif slates == 'Late Slate':
collection = db['FD_NHL_Late_name_map']
master_name_map = pd_Series(players_median.Name.values,index=players_median.Var).to_dict()
master_salary_map = pd_Series(players_median.Salary.values,index=players_median.Var).to_dict()
master_projection_map = pd_Series(players_median.Median.values,index=players_median.Var).to_dict()
master_team_num_map = pd_Series(players_median.team_var.values,index=players_median.Var).to_dict()
master_team_map = pd_Series(players_median.Team.values,index=players_median.Var).to_dict()
master_own_map = pd_Series(players_median['Own'].values,index=players_median.Var).to_dict()
master_name_index = pd_Series(players_median.Var.values, index=players_median.Name).to_dict()
position_requirements = {
'C': 2,
'W': 2,
'D': 2,
'FLEX': 2,
'G': 1,
}
salary_cap = 55000
max_team_skaters = 4
# Teams to loop through
teams_to_optimize = players_median['Team'].unique().tolist()
required_positions = ['C1', 'C2', 'W1', 'W2', 'D1', 'D2', 'FLEX1', 'FLEX2', 'G']
collection.drop()
try:
# Convert dictionary to format suitable for MongoDB
mongo_docs = [{"key": k, "value": v} for k, v in master_name_map.items()]
collection.insert_many(mongo_docs, ordered=False)
except Exception as e:
st.write(f"Error inserting name map: {e}")
time_sleep(1)
optimals = init_team_results(players_median, position_requirements, salary_cap, max_team_skaters, teams_to_optimize, 1000, players_team_map)
formatted_optimals = format_optimals(optimals, players_median, required_positions, players_team_map)
for col in required_positions:
if col in formatted_optimals.columns:
formatted_optimals[col] = formatted_optimals[col].map(master_name_index).fillna(formatted_optimals[col])
# formatted_optimals['proj'] = formatted_optimals['proj'] + (formatted_optimals['proj'] * ((formatted_optimals['Team_count'] - 3) * .05))
formatted_optimals['proj'] = formatted_optimals['proj'].astype(float)
# Convert this slate's optimals to JSON format and add slate info
for idx, row in formatted_optimals.iterrows():
optimal_lineup = {
"slate": slates, # Add slate identifier
"salary": int(row['salary']),
"projection": float(row['proj']),
"team": str(row['Team']),
"team_count": int(row['Team_count']),
"secondary": str(row['Secondary']),
"secondary_count": int(row['Secondary_count']),
"ownership": float(row['Own']),
"players": [int(row['C1']), int(row['C2']), int(row['W1']),
int(row['W2']), int(row['D1']), int(row['D2']),
int(row['FLEX1']), int(row['FLEX2']), int(row['G'])]
}
optimal_lineups.append(optimal_lineup)
st.write(f"Generated {len(formatted_optimals)} optimal lineups for slate {slates}")
optimal_lineups_path = os.path.join(fd_nhl_dir, 'optimal_lineups.json')
with open(optimal_lineups_path, 'w') as f:
json.dump(optimal_lineups, f)
run_go_classic_lineup_generator("FD", "NHL")
st.write("NHL lineup generation for FD completed successfully!")
pass