NFL_DFS_Contest_Sims / src /sim_func_hold /regular_functions.py
James McCool
Refactor seed frame initialization functions to handle multiple slate types for DraftKings and FanDuel, improving data retrieval and organization based on slate selection.
043582a
raw
history blame
6.27 kB
import streamlit as st
import pandas as pd
import numpy as np
from pymongo import MongoClient
from database import db
@st.cache_data(ttl = 600)
def init_DK_seed_frames(slate_var, sharp_split):
if slate_var == 'Main Slate':
collection = db['DK_NFL_name_map']
cursor = collection.find()
raw_data = pd.DataFrame(list(cursor))
names_dict = dict(zip(raw_data['key'], raw_data['value']))
collection = db[f"DK_NFL_seed_frame"]
elif slate_var == 'Secondary Slate':
collection = db['DK_NFL_Secondary_name_map']
cursor = collection.find()
raw_data = pd.DataFrame(list(cursor))
names_dict = dict(zip(raw_data['key'], raw_data['value']))
collection = db[f"DK_NFL_Secondary_seed_frame"]
elif slate_var == 'Auxiliary Slate':
collection = db['DK_NFL_Auxiliary_name_map']
cursor = collection.find()
raw_data = pd.DataFrame(list(cursor))
names_dict = dict(zip(raw_data['key'], raw_data['value']))
collection = db[f"DK_NFL_Auxiliary_seed_frame"]
cursor = collection.find().limit(sharp_split)
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[['QB', 'RB1', 'RB2', 'WR1', 'WR2', 'WR3', 'TE', 'FLEX', 'DST', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
dict_columns = ['QB', 'RB1', 'RB2', 'WR1', 'WR2', 'WR3', 'TE', 'FLEX', 'DST']
for col in dict_columns:
raw_display[col] = raw_display[col].map(names_dict)
DK_seed = raw_display.to_numpy()
return DK_seed
@st.cache_data(ttl = 599)
def init_FD_seed_frames(slate_var, sharp_split):
if slate_var == 'Main Slate':
collection = db['FD_NFL_name_map']
cursor = collection.find()
raw_data = pd.DataFrame(list(cursor))
names_dict = dict(zip(raw_data['key'], raw_data['value']))
collection = db[f"FD_NFL_seed_frame"]
cursor = collection.find().limit(sharp_split)
elif slate_var == 'Secondary Slate':
collection = db['FD_NFL_Secondary_name_map']
cursor = collection.find()
raw_data = pd.DataFrame(list(cursor))
names_dict = dict(zip(raw_data['key'], raw_data['value']))
collection = db[f"FD_NFL_Secondary_seed_frame"]
cursor = collection.find().limit(sharp_split)
elif slate_var == 'Auxiliary Slate':
collection = db['FD_NFL_Auxiliary_name_map']
cursor = collection.find()
raw_data = pd.DataFrame(list(cursor))
names_dict = dict(zip(raw_data['key'], raw_data['value']))
collection = db[f"FD_NFL_Auxiliary_seed_frame"]
cursor = collection.find().limit(sharp_split)
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[['QB', 'RB1', 'RB2', 'WR1', 'WR2', 'WR3', 'TE', 'FLEX', 'DST', 'salary', 'proj', 'Team', 'Team_count', 'Secondary', 'Secondary_count', 'Own']]
dict_columns = ['QB', 'RB1', 'RB2', 'WR1', 'WR2', 'WR3', 'TE', 'FLEX', 'DST']
for col in dict_columns:
raw_display[col] = raw_display[col].map(names_dict)
FD_seed = raw_display.to_numpy()
return FD_seed
@st.cache_data(ttl = 599)
def init_baselines(slate_var):
collection = db["DK_NFL_ROO"]
cursor = collection.find()
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[raw_display['slate'] == slate_var]
raw_display = raw_display[raw_display['version'] == 'overall']
dk_raw = raw_display[['Player', 'Position', 'Team', 'Opp', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%',
'Own', 'Small_Field_Own', 'Large_Field_Own', 'Cash_Field_Own', 'CPT_Own', 'LevX', 'version', 'slate', 'timestamp', 'player_ID', 'site']]
dk_raw['STDev'] = (dk_raw['Ceiling'] - dk_raw['Floor']) / 4
collection = db["FD_NFL_ROO"]
cursor = collection.find()
raw_display = pd.DataFrame(list(cursor))
raw_display = raw_display[raw_display['slate'] == slate_var]
raw_display = raw_display[raw_display['version'] == 'overall']
fd_raw = raw_display[['Player', 'Position', 'Team', 'Opp', 'Salary', 'Floor', 'Median', 'Ceiling', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '20+%', '2x%', '3x%', '4x%',
'Own', 'Small_Field_Own', 'Large_Field_Own', 'Cash_Field_Own', 'CPT_Own', 'LevX', 'version', 'slate', 'timestamp', 'player_ID', 'site']]
fd_raw['STDev'] = (fd_raw['Ceiling'] - fd_raw['Floor']) / 4
return dk_raw, fd_raw
@st.cache_data
def convert_df(array):
array = pd.DataFrame(array, columns=column_names)
return array.to_csv().encode('utf-8')
@st.cache_data
def calculate_DK_value_frequencies(np_array):
unique, counts = np.unique(np_array[:, :9], return_counts=True)
frequencies = counts / len(np_array) # Normalize by the number of rows
combined_array = np.column_stack((unique, frequencies))
return combined_array
@st.cache_data
def calculate_FD_value_frequencies(np_array):
unique, counts = np.unique(np_array[:, :9], return_counts=True)
frequencies = counts / len(np_array) # Normalize by the number of rows
combined_array = np.column_stack((unique, frequencies))
return combined_array
@st.cache_data
def sim_contest(Sim_size, seed_frame, maps_dict, Contest_Size):
SimVar = 1
Sim_Winners = []
fp_array = seed_frame.copy()
# Pre-vectorize functions
vec_projection_map = np.vectorize(maps_dict['Projection_map'].__getitem__)
vec_stdev_map = np.vectorize(maps_dict['STDev_map'].__getitem__)
st.write('Simulating contest on frames')
while SimVar <= Sim_size:
fp_random = fp_array[np.random.choice(fp_array.shape[0], Contest_Size)]
sample_arrays1 = np.c_[
fp_random,
np.sum(np.random.normal(
loc=vec_projection_map(fp_random[:, :-7]),
scale=vec_stdev_map(fp_random[:, :-7])),
axis=1)
]
sample_arrays = sample_arrays1
final_array = sample_arrays[sample_arrays[:, 10].argsort()[::-1]]
best_lineup = final_array[final_array[:, -1].argsort(kind='stable')[::-1][:1]]
Sim_Winners.append(best_lineup)
SimVar += 1
return Sim_Winners