Spaces:

Hatman
/

NBA-Fantasy-Game

Sleeping

Hatmanstack Claude Opus 4.5 commited on Feb 3

Commit

6424951

1 Parent(s): a63f84a

Refactor app with security fixes, error handling, and type safety

- Fix SQL injection: use parameterized queries in src/database/queries.py
- Fix XSS: add HTML escaping in src/utils/html.py
- Re-enable CORS/XSRF protection in devcontainer.json
- Add Pydantic models for validation in src/models/player.py
- Add session state management with safe defaults
- Cache ML model loading with @st .cache_resource
- Fix N+1 query with batch IN clause
- Add loop guards to prevent infinite loops
- Add comprehensive test suite
- Add pyproject.toml with mypy strict mode and ruff config
- Move compile_model.py to scripts/ with main() guard

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (30) hide show

.devcontainer/devcontainer.json +1 -1
app.py +15 -5
compile_model.py +0 -73
pages/1_home_team.py +128 -84
pages/2_play_game.py +179 -99
pyproject.toml +104 -0
requirements-dev.txt +6 -0
requirements.txt +6 -4
scripts/compile_model.py +243 -0
src/__init__.py +1 -0
src/config.py +93 -0
src/database/__init__.py +23 -0
src/database/connection.py +111 -0
src/database/queries.py +127 -0
src/ml/__init__.py +15 -0
src/ml/model.py +114 -0
src/models/__init__.py +5 -0
src/models/player.py +144 -0
src/state/__init__.py +5 -0
src/state/session.py +160 -0
src/utils/__init__.py +5 -0
src/utils/html.py +108 -0
src/validation/__init__.py +9 -0
src/validation/inputs.py +111 -0
tests/__init__.py +1 -0
tests/conftest.py +123 -0
tests/test_database.py +220 -0
tests/test_ml.py +147 -0
tests/test_models.py +140 -0
tests/test_validation.py +130 -0

.devcontainer/devcontainer.json CHANGED Viewed

@@ -19,7 +19,7 @@
   },
   "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
   "postAttachCommand": {
-    "server": "streamlit run app.py --server.enableCORS false --server.enableXsrfProtection false"
   },
   "portsAttributes": {
     "8501": {

   },
   "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
   "postAttachCommand": {
+    "server": "streamlit run app.py"
   },
   "portsAttributes": {
     "8501": {

app.py CHANGED Viewed

@@ -1,13 +1,23 @@
 import streamlit as st
-import pandas as pd
-import snowflake.connector
-def on_page_load():
     st.set_page_config(layout="wide")
 on_page_load()
-st.markdown("<h1 style='text-align: center; color: steelblue;'>NBA</h1>", unsafe_allow_html=True)
-st.markdown("<h5 style='text-align: center; color: white;'>A Simple app to test your skill in building a Team based on career stats to compete with a Computer</h5>", unsafe_allow_html=True)

+"""NBA Team Builder Application - Entry Point."""
 import streamlit as st
+from src.utils.html import safe_heading, safe_paragraph
+def on_page_load() -> None:
+    """Configure page settings."""
     st.set_page_config(layout="wide")
 on_page_load()
+safe_heading("NBA", level=1, color="steelblue")
+safe_paragraph(
+    "A Simple app to test your skill in building a Team based on "
+    "career stats to compete with a Computer",
+    color="white",
+)

compile_model.py DELETED Viewed

@@ -1,73 +0,0 @@
-import pandas as pd
-import numpy as np
-from tensorflow import keras
-from tensorflow.keras import layers
-from tensorflow.keras.losses import BinaryCrossentropy
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import RandomizedSearchCV
-from scikeras.wrappers import KerasClassifier
-def create_stats(roster, schedule):
-    home_stats = []
-    away_stats = []
-    S = []
-    # Loading Relavent Columns from f-test
-    cols = ['TEAM','PTS/G', 'ORB', 'DRB', 'AST', 'STL', 'BLK', 'TOV', '3P%', 'FT%','2P']
-    new_roster = roster[cols]
-    for i in schedule['Home/Neutral']:
-        home_stats.append((new_roster[new_roster['TEAM'] == i]).values.tolist())
-    for i in schedule['Visitor/Neutral']:
-        away_stats.append((new_roster.loc[new_roster['TEAM'] == i]).values.tolist())
-    for i in range(len(home_stats)):
-        arr = []
-        for j in range(len(home_stats[i])):
-            del home_stats[i][j][0]
-            arr += home_stats[i][j]
-        for j in range(len(away_stats[i])):
-            del away_stats[i][j][0]
-            arr += away_stats[i][j]
-        # Create numpy array with all the players on the Home Team's Stats followed by the Away Team's stats
-        S.append(np.nan_to_num(np.array(arr), copy=False))
-    return S
-roster = pd.read_csv('player_stats.txt', delimiter=',')
-schedule = pd.read_csv('schedule.txt', delimiter=',')
-# Create winning condition to train on
-schedule['winner'] = schedule.apply(lambda x: 0 if x['PTS'] > x['PTS.1'] else 1, axis=1)
-X = np.array(create_stats(roster, schedule))
-y = np.array(schedule['winner'])
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-def create_model(optimizer='rmsprop', init='glorot_uniform'):
-    inputs = keras.Input(shape=(100,))
-    dense = layers.Dense(50, activation="relu")
-    x = dense(inputs)
-    x = layers.Dense(64, activation="relu")(x)
-    outputs = layers.Dense(1, activation='sigmoid')(x)
-    model = keras.Model(inputs=inputs, outputs=outputs, name="nba_model")
-    model.compile(loss=BinaryCrossentropy(from_logits=False), optimizer=optimizer, metrics=["accuracy"])
-    return model
-model = KerasClassifier(model=create_model, verbose=0, init='glorot_uniform')
-optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
-init = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
-epochs = [500, 1000, 1500]
-batches = [50, 100, 200]
-param_grid = dict(optimizer=optimizer, epochs=epochs, batch_size=batches, init=init)
-random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=100, verbose=3)
-random_search_result = random_search.fit(X_train, y_train)
-best_model = random_search_result.best_estimator_
-best_model.model_.save('winner.keras')
-best_parameters = random_search_result.best_params_
-print("Best parameters: ", best_parameters)
-test_accuracy = random_search_result.best_estimator_.score(X_test, y_test)
-print("Test accuracy: ", test_accuracy)

pages/1_home_team.py CHANGED Viewed

@@ -1,111 +1,155 @@
-import streamlit as st
 import pandas as pd
-import snowflake.connector
-def on_page_load():
     st.set_page_config(layout="wide")
 on_page_load()
 col1, col2, col3 = st.columns(3)
 with col2:
-    st.markdown("<h1 style='text-align: center; color: steelblue;'>Build Your Team</h1>", unsafe_allow_html=True)
-    player_add = st.text_input('Who\'re you picking?', 'James')
-    player = player_add.lower()
-st.markdown("<p style='text-align: center; color: steelblue;'>Search for a player to populate the dropdown menu then pick and save your team before searching for another player.</p>", unsafe_allow_html=True)
-search_string = 'select full_name from NBA where full_name_lower=\'{}\' or first_name_lower=\'{}\' or last_name_lower=\'{}\';'.format(player, player, player)
-if 'home_team' not in st.session_state:
-        st.session_state['home_team'] = []
-if 'away_team' not in st.session_state:
-        st.session_state['away_team'] = []
-if 'away_stats' not in st.session_state:
-    st.session_state['away_stats'] = []
-if 'home_team_df' not in st.session_state:
-    st.session_state['home_team_df'] = pd.DataFrame()
-if 'radio_index' not in st.session_state:
-    st.session_state['radio_index'] = 0
-def find_player():
-  cnx = snowflake.connector.connect(**st.secrets["snowflake"])
-  data = get_player(cnx)
-  cnx.close()
-  return data
-def get_player(cnx):
-  with cnx.cursor() as cur:
-    cur.execute(search_string)
-    return cur.fetchall()
-player_search = find_player()
-def find_home_team():
-    test =[]
-    cnx = snowflake.connector.connect(**st.secrets["snowflake"])
-    for i in st.session_state.home_team:
-        with cnx.cursor() as cur:
-            cur.execute('SELECT * FROM NBA WHERE FULL_NAME=\'{}\''.format(i))
-            test.append(cur.fetchall()[0])
-    cnx.close()
-    df = pd.DataFrame(test, columns=['FULL_NAME', 'AST', 'BLK', 'DREB', 'FG3A', 'FG3M', 'FG3_PCT', 'FGA', 'FGM', 'FG_PCT', 'FTA', 'FTM', 'FT_PCT','GP', 'GS', 'MIN', 'OREB', 'PF', 'PTS', 'REB', 'STL', 'TOV', 'FIRST_NAME', 'LAST_NAME', 'FULL_NAME_LOWER', 'FIRST_NAME_LOWER', 'LAST_NAME_LOWER', 'IS_ACTIVE'])
-    st.session_state.home_team_df = df
-    return df
 home_team_df = find_home_team()
-player_search = [player[0] for player in player_search]
 if not home_team_df.empty:
-    name_list = home_team_df['FULL_NAME'].tolist()
-    player_search += name_list
-def save_state():
-    saved_players = home_team_df['FULL_NAME'].tolist()
     holder = saved_players + player_selected
     if len(player_selected) > len(saved_players):
-        for i in holder:
-            if i not in st.session_state.home_team:
-                st.session_state.home_team.append(i)
     elif len(player_selected) < len(saved_players):
-        for i in saved_players:
-            if i not in player_selected:
-                st.session_state.home_team.remove(i)
     st.rerun()
-col1, col2 = st.columns([7,1])
 with col1:
-    player_selected = st.multiselect("Search Results:", player_search, home_team_df['FULL_NAME'].tolist(), label_visibility="collapsed")
 with col2:
-    if st.button('Save Team'):
         save_state()
-st.markdown("<h1 style='text-align: center; color: steelblue;'>Preview</h1>", unsafe_allow_html=True)
 st.dataframe(home_team_df)
-radio_index = st.session_state.radio_index
 col1, col2, col3, col4, col5 = st.columns(5)
 with col3:
-    st.markdown("<h3 style='text-align: center; color: steelblue;'>Away Team</h3>", unsafe_allow_html=True)
     difficulty = st.radio(
-        label="Difficulty", index=radio_index, options=['Regular','93\' Bulls', 'All-Stars', 'Dream Team'],
-        label_visibility="collapsed", )
-    if difficulty == 'Regular':
-        st.session_state.away_stats = [850, 400, 200, 60]
-        st.session_state.radio_index = 0
-    elif difficulty == '93\' Bulls':
-        st.session_state.away_stats = [1050, 500, 300, 80]
-        st.session_state.radio_index = 1
-    elif difficulty == 'All-Stars':
-        st.session_state.away_stats = [1250, 600, 400, 100]
-        st.session_state.radio_index = 2
-    elif difficulty == 'Dream Team':
-        st.session_state.away_stats = [1450, 700, 500, 120]
-        st.session_state.radio_index = 3
     else:
         st.write("You didn't select a difficulty.")

+"""Home team builder page."""
+import logging
 import pandas as pd
+import streamlit as st
+from src.config import DIFFICULTY_PRESETS, PLAYER_COLUMNS
+from src.database.connection import DatabaseConnectionError, QueryExecutionError, get_connection
+from src.database.queries import get_players_by_full_names, search_player_by_name
+from src.state.session import init_session_state
+from src.utils.html import safe_heading, safe_paragraph
+from src.validation.inputs import validate_search_term
+logger = logging.getLogger("streamlit_nba")
+def on_page_load() -> None:
+    """Configure page settings."""
     st.set_page_config(layout="wide")
 on_page_load()
+# Initialize session state before any access
+init_session_state()
 col1, col2, col3 = st.columns(3)
 with col2:
+    safe_heading("Build Your Team", level=1, color="steelblue")
+    player_add = st.text_input("Who're you picking?", "James")
+safe_paragraph(
+    "Search for a player to populate the dropdown menu then pick and "
+    "save your team before searching for another player.",
+    color="steelblue",
+)
+def find_player(search_term: str) -> list[str]:
+    """Search for players by name with validation and error handling.
+    Args:
+        search_term: User-provided search term
+    Returns:
+        List of matching player full names
+    """
+    # Validate input
+    validated_term = validate_search_term(search_term)
+    if validated_term is None:
+        st.warning("Invalid search term. Please use only letters, numbers, and basic punctuation.")
+        return []
+    try:
+        with get_connection() as conn:
+            results = search_player_by_name(conn, validated_term)
+            return [player[0] for player in results]
+    except DatabaseConnectionError as e:
+        st.error(f"Could not connect to database. Please try again later.")
+        logger.error(f"Database connection error: {e}")
+        return []
+    except QueryExecutionError as e:
+        st.error("Error searching for players. Please try again.")
+        logger.error(f"Query error: {e}")
+        return []
+def find_home_team() -> pd.DataFrame:
+    """Load home team data from database using batch query.
+    Returns:
+        DataFrame with home team player data
+    """
+    team_names: list[str] = st.session_state.get("home_team", [])
+    if not team_names:
+        return pd.DataFrame(columns=PLAYER_COLUMNS)
+    try:
+        with get_connection() as conn:
+            # Single batch query instead of N+1 queries
+            df = get_players_by_full_names(conn, team_names)
+            st.session_state.home_team_df = df
+            return df
+    except DatabaseConnectionError as e:
+        st.error("Could not connect to database. Please try again later.")
+        logger.error(f"Database connection error: {e}")
+        return pd.DataFrame(columns=PLAYER_COLUMNS)
+    except QueryExecutionError as e:
+        st.error("Error loading team data. Please try again.")
+        logger.error(f"Query error: {e}")
+        return pd.DataFrame(columns=PLAYER_COLUMNS)
+# Load data
+player_search = find_player(player_add)
 home_team_df = find_home_team()
+# Combine search results with current team
 if not home_team_df.empty:
+    name_list = home_team_df["FULL_NAME"].tolist()
+    player_search = player_search + [n for n in name_list if n not in player_search]
+def save_state() -> None:
+    """Save the selected players to session state."""
+    saved_players = home_team_df["FULL_NAME"].tolist() if not home_team_df.empty else []
     holder = saved_players + player_selected
     if len(player_selected) > len(saved_players):
+        for player in holder:
+            if player not in st.session_state.home_team:
+                st.session_state.home_team.append(player)
     elif len(player_selected) < len(saved_players):
+        for player in saved_players:
+            if player not in player_selected:
+                st.session_state.home_team.remove(player)
     st.rerun()
+col1, col2 = st.columns([7, 1])
 with col1:
+    default_selection = home_team_df["FULL_NAME"].tolist() if not home_team_df.empty else []
+    player_selected = st.multiselect(
+        "Search Results:",
+        player_search,
+        default_selection,
+        label_visibility="collapsed",
+    )
 with col2:
+    if st.button("Save Team"):
         save_state()
+safe_heading("Preview", level=1, color="steelblue")
 st.dataframe(home_team_df)
+radio_index: int = st.session_state.get("radio_index", 0)
 col1, col2, col3, col4, col5 = st.columns(5)
 with col3:
+    safe_heading("Away Team", level=3, color="steelblue")
     difficulty = st.radio(
+        label="Difficulty",
+        index=radio_index,
+        options=list(DIFFICULTY_PRESETS.keys()),
+        label_visibility="collapsed",
+    )
+    if difficulty and difficulty in DIFFICULTY_PRESETS:
+        st.session_state.away_stats = list(DIFFICULTY_PRESETS[difficulty])
+        st.session_state.radio_index = list(DIFFICULTY_PRESETS.keys()).index(difficulty)
     else:
         st.write("You didn't select a difficulty.")

pages/2_play_game.py CHANGED Viewed

@@ -1,114 +1,194 @@
-import streamlit as st
-import pandas as pd
-import snowflake.connector
-import numpy as np
-import tensorflow as tf
 import random
-from tensorflow.keras.models import load_model
-def on_page_load():
     st.set_page_config(layout="wide")
 on_page_load()
-stats = st.session_state.away_stats
 teams_good = True
-winner_prediction = 0
-away_point_prediction = 0
-home_point_prediction = 0
-query_string = ('SELECT * FROM (select * from NBA where PTS > {}) sample (2 rows) UNION '.format(stats[0]))
-query_string += ('SELECT * FROM (select * from NBA where REB > {}) sample (1 rows) UNION '.format(stats[1]))
-query_string += ('SELECT * FROM (select * from NBA where AST > {}) sample (1 rows) UNION '.format(stats[2]))
-query_string += ('SELECT * FROM (select * from NBA where STL > {}) sample (1 rows);'.format(stats[3]))
-def get_away_team(cnx, query_string):
-    with cnx.cursor() as cur:
-        cur.execute(query_string)
-        players = cur.fetchall()
-        while len(players) != 5:
-            cur.execute(query_string)
-            players = cur.fetchall()
-    return players
-def find_away_team():
-    cnx = snowflake.connector.connect(**st.secrets["snowflake"])
-    data = get_away_team(cnx, query_string)
-    cnx.close()
-    df = pd.DataFrame(data, columns=['FULL_NAME', 'AST', 'BLK', 'DREB', 'FG3A', 'FG3M', 'FG3_PCT', 'FGA', 'FGM', 'FG_PCT', 'FTA', 'FTM', 'FT_PCT','GP', 'GS', 'MIN', 'OREB', 'PF', 'PTS', 'REB', 'STL', 'TOV', 'FIRST_NAME', 'LAST_NAME', 'FULL_NAME_LOWER', 'FIRST_NAME_LOWER', 'LAST_NAME_LOWER', 'IS_ACTIVE'])
-    return df
-if not st.session_state.home_team_df.shape[0] == 5:
-    st.markdown("<h3 style='text-align: center; color: red;'>Your Team Doesn't Have 5</h3>", unsafe_allow_html=True)
     away_data = pd.DataFrame()
     teams_good = False
-    winner = ''
 else:
-    away_data = find_away_team()
-def analyze_stats(home_stats, away_stats):
-    home=[]
-    away=[]
-    for j in range(len(home_stats)):
-        home += home_stats[j]
-    for j in range(len(away_stats)):
-        away += away_stats[j]
-    return np.array(home).reshape(1,-1), np.array(away).reshape(1,-1), np.array(home + away).reshape(1, -1)
-def get_score_board(p_pred, w_score):
-    score = []
-    quarter_score = int(w_score/4)
-    score.append(quarter_score + random.randint(-7, 7))
-    score.append(quarter_score + random.randint(-3, 3))
-    score.append(quarter_score + random.randint(-8, 8))
-    score.append(w_score - (score[0] + score[1] + score[2]))
-    score.append(w_score)
-    return score
-if teams_good:
-    #first pass algo to determine winner
-    cols = ['PTS', 'OREB', 'DREB', 'AST', 'STL', 'BLK', 'TOV', 'FG3_PCT', 'FT_PCT', 'FGM']
-    home_stats = st.session_state.home_team_df[cols].values.tolist()
-    away_stats = away_data[cols].values.tolist()
-    home, away, winner = analyze_stats(home_stats, away_stats)
-    winner_model = load_model('winner.keras')
-    winner_sigmoid= winner_model.predict(winner)
-    winner_prediction = np.round(winner_sigmoid[0][0])
-    score = []
-    winner_score = random.randint(90, 130)
-    loser_score = random.randint(80, 120)
-    while winner_score <= loser_score:
-        winner_score = random.randint(90, 130)
-        loser_score = random.randint(80, 120)
-    if winner_prediction == 1:
-        score.append(get_score_board(winner_prediction, winner_score))
-        score.append(get_score_board(away_point_prediction, loser_score))
-        winner = 'Winner'
-    else:
-        score.append(get_score_board(winner_prediction, loser_score))
-        score.append(get_score_board(away_point_prediction, winner_score))
-        winner = 'Loser'
-    box_score = pd.DataFrame(score , columns=['1', '2', '3', '4', 'Final'], index=['Home Team', 'Away Team'] )
-    print(f"Prediction: {winner_sigmoid[0][0]}")
-st.markdown("<h1 style='text-align: center; color: steelblue;'>Home Team</h1>", unsafe_allow_html=True)
-st.dataframe(st.session_state.home_team_df)
-if teams_good:
-    print(f"Teams Good")
-    st.markdown(f"<h3 style='text-align: center; color: steelblue;'>{winner}</h3>", unsafe_allow_html=True)
     col1, col2, col3 = st.columns(3)
     with col2:
-      st.dataframe(box_score)
-st.markdown("<h1 style='text-align: center; color: steelblue;'>Away Team</h1>", unsafe_allow_html=True)
 st.dataframe(away_data)
 if st.button("Play New Team"):
-    print("New Team")

+"""Game play page with prediction and scoring."""
+import logging
 import random
+import pandas as pd
+import streamlit as st
+from src.config import (
+    DEFAULT_LOSER_SCORE,
+    DEFAULT_WINNER_SCORE,
+    LOSER_SCORE_RANGE,
+    MAX_QUERY_ATTEMPTS,
+    STAT_COLUMNS,
+    TEAM_SIZE,
+    WINNER_SCORE_RANGE,
+)
+from src.database.connection import DatabaseConnectionError, QueryExecutionError, get_connection
+from src.database.queries import get_away_team_by_stats
+from src.ml.model import ModelLoadError, analyze_team_stats, predict_winner
+from src.state.session import get_away_stats, get_home_team_df, init_session_state
+from src.utils.html import safe_heading
+logger = logging.getLogger("streamlit_nba")
+def on_page_load() -> None:
+    """Configure page settings."""
     st.set_page_config(layout="wide")
 on_page_load()
+# Initialize session state BEFORE any access
+init_session_state()
+# Get stats safely with fallback
+stats = get_away_stats()
 teams_good = True
+def find_away_team(stat_thresholds: list[int]) -> pd.DataFrame:
+    """Generate away team based on difficulty stats.
+    Args:
+        stat_thresholds: List of [pts, reb, ast, stl] thresholds
+    Returns:
+        DataFrame with away team data, or empty DataFrame on error
+    """
+    try:
+        with get_connection() as conn:
+            return get_away_team_by_stats(
+                conn,
+                pts_threshold=stat_thresholds[0],
+                reb_threshold=stat_thresholds[1],
+                ast_threshold=stat_thresholds[2],
+                stl_threshold=stat_thresholds[3],
+                max_attempts=MAX_QUERY_ATTEMPTS,
+            )
+    except DatabaseConnectionError as e:
+        st.error("Could not connect to database. Please try again later.")
+        logger.error(f"Database connection error: {e}")
+        return pd.DataFrame()
+    except QueryExecutionError as e:
+        st.error("Could not generate away team. Please try again.")
+        logger.error(f"Query error: {e}")
+        return pd.DataFrame()
+def get_score_board(final_score: int) -> list[int]:
+    """Generate quarter-by-quarter scores that sum to final score.
+    Args:
+        final_score: Total game score
+    Returns:
+        List of [Q1, Q2, Q3, Q4, Final] scores
+    """
+    quarter_score = final_score // 4
+    scores = [
+        quarter_score + random.randint(-7, 7),
+        quarter_score + random.randint(-3, 3),
+        quarter_score + random.randint(-8, 8),
+    ]
+    # Q4 makes up the difference to hit exact final
+    scores.append(final_score - sum(scores))
+    scores.append(final_score)
+    return scores
+def generate_game_scores() -> tuple[int, int]:
+    """Generate winner and loser scores with loop guard.
+    Returns:
+        Tuple of (winner_score, loser_score)
+    """
+    for _ in range(MAX_QUERY_ATTEMPTS):
+        winner_score = random.randint(*WINNER_SCORE_RANGE)
+        loser_score = random.randint(*LOSER_SCORE_RANGE)
+        if winner_score > loser_score:
+            return winner_score, loser_score
+    # Fallback to guaranteed valid scores
+    logger.warning("Score generation fell back to defaults")
+    return DEFAULT_WINNER_SCORE, DEFAULT_LOSER_SCORE
+# Check if home team is valid
+home_team_df = get_home_team_df()
+if home_team_df.empty or home_team_df.shape[0] != TEAM_SIZE:
+    safe_heading(
+        f"Your Team Doesn't Have {TEAM_SIZE} Players",
+        level=3,
+        color="red",
+    )
     away_data = pd.DataFrame()
     teams_good = False
+    winner_label = ""
+    box_score = pd.DataFrame()
 else:
+    away_data = find_away_team(stats)
+    if away_data.empty:
+        teams_good = False
+        winner_label = ""
+        box_score = pd.DataFrame()
+# Run prediction if both teams are valid
+if teams_good and not away_data.empty:
+    try:
+        # Extract stats for ML model
+        home_stats = home_team_df[STAT_COLUMNS].values.tolist()
+        away_stats_data = away_data[STAT_COLUMNS].values.tolist()
+        # Prepare data and predict
+        _, _, combined = analyze_team_stats(home_stats, away_stats_data)
+        probability, prediction = predict_winner(combined)
+        # Generate scores
+        winner_score, loser_score = generate_game_scores()
+        # Build scoreboard based on prediction
+        if prediction == 1:
+            score_data = [
+                get_score_board(winner_score),
+                get_score_board(loser_score),
+            ]
+            winner_label = "Winner"
+        else:
+            score_data = [
+                get_score_board(loser_score),
+                get_score_board(winner_score),
+            ]
+            winner_label = "Loser"
+        box_score = pd.DataFrame(
+            score_data,
+            columns=["1", "2", "3", "4", "Final"],
+            index=["Home Team", "Away Team"],
+        )
+        logger.info(f"Prediction: {probability:.4f}")
+    except ModelLoadError as e:
+        st.error("Could not load prediction model. Please contact support.")
+        logger.error(f"Model load error: {e}")
+        teams_good = False
+        winner_label = ""
+        box_score = pd.DataFrame()
+    except ValueError as e:
+        st.error("Error processing team stats. Please try again.")
+        logger.error(f"Stats processing error: {e}")
+        teams_good = False
+        winner_label = ""
+        box_score = pd.DataFrame()
+# Display results
+safe_heading("Home Team", level=1, color="steelblue")
+st.dataframe(home_team_df)
+if teams_good and winner_label:
+    logger.info("Teams Good")
+    safe_heading(winner_label, level=3, color="steelblue")
     col1, col2, col3 = st.columns(3)
     with col2:
+        st.dataframe(box_score)
+safe_heading("Away Team", level=1, color="steelblue")
 st.dataframe(away_data)
 if st.button("Play New Team"):
+    logger.info("New Team requested")
+    st.rerun()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,104 @@

+[project]
+name = "streamlit-nba"
+version = "1.0.0"
+description = "NBA team builder and game prediction Streamlit application"
+requires-python = ">=3.11"
+dependencies = [
+    "streamlit>=1.28.0",
+    "snowflake-connector-python>=3.5.0",
+    "tensorflow>=2.15.0",
+    "numpy>=1.24.0",
+    "pandas>=2.0.0",
+    "pydantic>=2.5.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.4.0",
+    "pytest-cov>=4.1.0",
+    "mypy>=1.7.0",
+    "ruff>=0.1.6",
+    "pandas-stubs>=2.0.0",
+]
+[tool.mypy]
+python_version = "3.11"
+strict = true
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+check_untyped_defs = true
+disallow_untyped_decorators = true
+no_implicit_optional = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_no_return = true
+warn_unreachable = true
+# Third-party library ignores
+[[tool.mypy.overrides]]
+module = [
+    "streamlit.*",
+    "snowflake.*",
+    "tensorflow.*",
+    "keras.*",
+    "sklearn.*",
+    "scikeras.*",
+]
+ignore_missing_imports = true
+[tool.ruff]
+target-version = "py311"
+line-length = 88
+[tool.ruff.lint]
+select = [
+    "E",      # pycodestyle errors
+    "W",      # pycodestyle warnings
+    "F",      # pyflakes
+    "I",      # isort
+    "B",      # flake8-bugbear
+    "C4",     # flake8-comprehensions
+    "UP",     # pyupgrade
+    "ARG",    # flake8-unused-arguments
+    "SIM",    # flake8-simplify
+    "TCH",    # flake8-type-checking
+    "PTH",    # flake8-use-pathlib
+    "PL",     # pylint
+    "RUF",    # ruff-specific
+    "S",      # flake8-bandit (security)
+]
+ignore = [
+    "S101",   # assert used (ok in tests)
+    "PLR0913", # too many arguments
+]
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = ["S101", "ARG001", "PLR2004"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+python_functions = ["test_*"]
+addopts = [
+    "-v",
+    "--tb=short",
+    "--strict-markers",
+]
+[tool.coverage.run]
+source = ["src"]
+branch = true
+omit = [
+    "tests/*",
+    "scripts/*",
+]
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "if TYPE_CHECKING:",
+    "raise NotImplementedError",
+]
+fail_under = 80

requirements-dev.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+-r requirements.txt
+pytest>=7.4.0
+pytest-cov>=4.1.0
+mypy>=1.7.0
+ruff>=0.1.6
+pandas-stubs>=2.0.0

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
-snowflake-connector-python
-tensorflow
-numpy
-pandas

+streamlit>=1.28.0
+snowflake-connector-python>=3.5.0
+tensorflow>=2.15.0
+numpy>=1.24.0
+pandas>=2.0.0
+pydantic>=2.5.0

scripts/compile_model.py ADDED Viewed

	@@ -0,0 +1,243 @@

+#!/usr/bin/env python3
+"""NBA game winner prediction model training script.
+This script trains a neural network to predict game winners based on
+team statistics. It uses RandomizedSearchCV to find optimal hyperparameters.
+Usage:
+    python scripts/compile_model.py
+"""
+import logging
+from pathlib import Path
+import numpy as np
+import pandas as pd
+from scikeras.wrappers import KerasClassifier
+from sklearn.model_selection import RandomizedSearchCV, train_test_split
+from tensorflow import keras
+from tensorflow.keras import layers
+from tensorflow.keras.losses import BinaryCrossentropy
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+# Data file paths
+ROSTER_FILE = Path("player_stats.txt")
+SCHEDULE_FILE = Path("schedule.txt")
+OUTPUT_MODEL = Path("winner.keras")
+# Feature columns from roster data
+FEATURE_COLS: list[str] = [
+    "TEAM",
+    "PTS/G",
+    "ORB",
+    "DRB",
+    "AST",
+    "STL",
+    "BLK",
+    "TOV",
+    "3P%",
+    "FT%",
+    "2P",
+]
+# Hyperparameter search space
+OPTIMIZERS: list[str] = [
+    "SGD",
+    "RMSprop",
+    "Adagrad",
+    "Adadelta",
+    "Adam",
+    "Adamax",
+    "Nadam",
+]
+INITIALIZERS: list[str] = [
+    "uniform",
+    "lecun_uniform",
+    "normal",
+    "zero",
+    "glorot_normal",
+    "glorot_uniform",
+    "he_normal",
+    "he_uniform",
+]
+EPOCHS: list[int] = [500, 1000, 1500]
+BATCH_SIZES: list[int] = [50, 100, 200]
+def create_stats(
+    roster: pd.DataFrame, schedule: pd.DataFrame
+) -> list[np.ndarray]:
+    """Create feature arrays from roster and schedule data.
+    Args:
+        roster: DataFrame with player statistics
+        schedule: DataFrame with game schedule and scores
+    Returns:
+        List of numpy arrays, one per game with combined team stats
+    """
+    home_stats: list[list] = []
+    away_stats: list[list] = []
+    features: list[np.ndarray] = []
+    new_roster = roster[FEATURE_COLS]
+    # Get stats for each team in each game
+    for team in schedule["Home/Neutral"]:
+        home_stats.append(new_roster[new_roster["TEAM"] == team].values.tolist())
+    for team in schedule["Visitor/Neutral"]:
+        away_stats.append(new_roster[new_roster["TEAM"] == team].values.tolist())
+    # Combine home and away stats for each game
+    for i in range(len(home_stats)):
+        arr: list[float] = []
+        for j in range(len(home_stats[i])):
+            del home_stats[i][j][0]  # Remove team name
+            arr.extend(home_stats[i][j])
+        for j in range(len(away_stats[i])):
+            del away_stats[i][j][0]  # Remove team name
+            arr.extend(away_stats[i][j])
+        # Handle NaN values
+        features.append(np.nan_to_num(np.array(arr), copy=False))
+    return features
+def create_model(
+    optimizer: str = "rmsprop", init: str = "glorot_uniform"
+) -> keras.Model:
+    """Create the neural network model architecture.
+    Args:
+        optimizer: Optimizer name
+        init: Weight initializer name
+    Returns:
+        Compiled Keras model
+    """
+    inputs = keras.Input(shape=(100,))
+    x = layers.Dense(50, activation="relu", kernel_initializer=init)(inputs)
+    x = layers.Dense(64, activation="relu", kernel_initializer=init)(x)
+    outputs = layers.Dense(1, activation="sigmoid")(x)
+    model = keras.Model(inputs=inputs, outputs=outputs, name="nba_model")
+    model.compile(
+        loss=BinaryCrossentropy(from_logits=False),
+        optimizer=optimizer,
+        metrics=["accuracy"],
+    )
+    return model
+def train_model(
+    x_train: np.ndarray,
+    y_train: np.ndarray,
+    x_test: np.ndarray,
+    y_test: np.ndarray,
+    n_iterations: int = 100,
+) -> tuple[keras.Model, dict, float]:
+    """Train model with hyperparameter search.
+    Args:
+        x_train: Training features
+        y_train: Training labels
+        x_test: Test features
+        y_test: Test labels
+        n_iterations: Number of random search iterations
+    Returns:
+        Tuple of (best_model, best_params, test_accuracy)
+    """
+    model = KerasClassifier(
+        model=create_model,
+        verbose=0,
+        init="glorot_uniform",
+    )
+    param_grid = {
+        "optimizer": OPTIMIZERS,
+        "epochs": EPOCHS,
+        "batch_size": BATCH_SIZES,
+        "init": INITIALIZERS,
+    }
+    logger.info(f"Starting randomized search with {n_iterations} iterations")
+    random_search = RandomizedSearchCV(
+        estimator=model,
+        param_distributions=param_grid,
+        n_iter=n_iterations,
+        verbose=3,
+    )
+    random_search_result = random_search.fit(x_train, y_train)
+    best_model = random_search_result.best_estimator_
+    best_params = random_search_result.best_params_
+    test_accuracy = best_model.score(x_test, y_test)
+    return best_model.model_, best_params, test_accuracy
+def main() -> None:
+    """Main training pipeline."""
+    logger.info("Loading data files")
+    if not ROSTER_FILE.exists():
+        logger.error(f"Roster file not found: {ROSTER_FILE}")
+        raise FileNotFoundError(f"Missing {ROSTER_FILE}")
+    if not SCHEDULE_FILE.exists():
+        logger.error(f"Schedule file not found: {SCHEDULE_FILE}")
+        raise FileNotFoundError(f"Missing {SCHEDULE_FILE}")
+    roster = pd.read_csv(ROSTER_FILE, delimiter=",")
+    schedule = pd.read_csv(SCHEDULE_FILE, delimiter=",")
+    logger.info(f"Loaded {len(roster)} players and {len(schedule)} games")
+    # Create target variable: 0 = home wins, 1 = away wins
+    schedule["winner"] = schedule.apply(
+        lambda x: 0 if x["PTS"] > x["PTS.1"] else 1, axis=1
+    )
+    # Create feature arrays
+    logger.info("Creating feature arrays")
+    X = np.array(create_stats(roster, schedule))
+    y = np.array(schedule["winner"])
+    logger.info(f"Feature shape: {X.shape}, Target shape: {y.shape}")
+    # Split data
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+    logger.info(f"Train size: {len(X_train)}, Test size: {len(X_test)}")
+    # Train model
+    best_model, best_params, test_accuracy = train_model(
+        X_train, y_train, X_test, y_test
+    )
+    # Save model
+    logger.info(f"Saving model to {OUTPUT_MODEL}")
+    best_model.save(OUTPUT_MODEL)
+    logger.info(f"Best parameters: {best_params}")
+    logger.info(f"Test accuracy: {test_accuracy:.4f}")
+if __name__ == "__main__":
+    main()

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """NBA Streamlit application source package."""

src/config.py ADDED Viewed

	@@ -0,0 +1,93 @@

+"""Application configuration, constants, and logging setup."""
+import logging
+from typing import Final
+# Database column names for player data
+PLAYER_COLUMNS: Final[list[str]] = [
+    "FULL_NAME",
+    "AST",
+    "BLK",
+    "DREB",
+    "FG3A",
+    "FG3M",
+    "FG3_PCT",
+    "FGA",
+    "FGM",
+    "FG_PCT",
+    "FTA",
+    "FTM",
+    "FT_PCT",
+    "GP",
+    "GS",
+    "MIN",
+    "OREB",
+    "PF",
+    "PTS",
+    "REB",
+    "STL",
+    "TOV",
+    "FIRST_NAME",
+    "LAST_NAME",
+    "FULL_NAME_LOWER",
+    "FIRST_NAME_LOWER",
+    "LAST_NAME_LOWER",
+    "IS_ACTIVE",
+]
+# Columns used for ML model features
+STAT_COLUMNS: Final[list[str]] = [
+    "PTS",
+    "OREB",
+    "DREB",
+    "AST",
+    "STL",
+    "BLK",
+    "TOV",
+    "FG3_PCT",
+    "FT_PCT",
+    "FGM",
+]
+# Game configuration
+TEAM_SIZE: Final[int] = 5
+MAX_QUERY_ATTEMPTS: Final[int] = 10
+# Difficulty presets: (PTS, REB, AST, STL)
+DIFFICULTY_PRESETS: Final[dict[str, tuple[int, int, int, int]]] = {
+    "Regular": (850, 400, 200, 60),
+    "93' Bulls": (1050, 500, 300, 80),
+    "All-Stars": (1250, 600, 400, 100),
+    "Dream Team": (1450, 700, 500, 120),
+}
+# Score ranges for game simulation
+WINNER_SCORE_RANGE: Final[tuple[int, int]] = (90, 130)
+LOSER_SCORE_RANGE: Final[tuple[int, int]] = (80, 120)
+# Default fallback scores when generation fails
+DEFAULT_WINNER_SCORE: Final[int] = 100
+DEFAULT_LOSER_SCORE: Final[int] = 90
+def setup_logging(level: int = logging.INFO) -> logging.Logger:
+    """Configure and return the application logger.
+    Args:
+        level: Logging level (default: INFO)
+    Returns:
+        Configured logger instance
+    """
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    logger = logging.getLogger("streamlit_nba")
+    logger.setLevel(level)
+    return logger
+# Module-level logger instance
+logger: Final[logging.Logger] = setup_logging()

src/database/__init__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+"""Database module for connection management and queries."""
+from src.database.connection import (
+    get_connection,
+    DatabaseConnectionError,
+    QueryExecutionError,
+)
+from src.database.queries import (
+    search_player_by_name,
+    get_player_by_full_name,
+    get_players_by_full_names,
+    get_away_team_by_stats,
+)
+__all__ = [
+    "get_connection",
+    "DatabaseConnectionError",
+    "QueryExecutionError",
+    "search_player_by_name",
+    "get_player_by_full_name",
+    "get_players_by_full_names",
+    "get_away_team_by_stats",
+]

src/database/connection.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""Database connection management with error handling."""
+import logging
+from contextlib import contextmanager
+from typing import Generator
+import snowflake.connector
+import streamlit as st
+from snowflake.connector import SnowflakeConnection
+from snowflake.connector.errors import DatabaseError, ProgrammingError
+logger = logging.getLogger("streamlit_nba")
+class DatabaseConnectionError(Exception):
+    """Raised when database connection fails."""
+    pass
+class QueryExecutionError(Exception):
+    """Raised when query execution fails."""
+    pass
+@st.cache_resource
+def _get_connection_pool() -> SnowflakeConnection:
+    """Create and cache a Snowflake connection.
+    Returns:
+        Cached Snowflake connection
+    Raises:
+        DatabaseConnectionError: If connection cannot be established
+    """
+    try:
+        return snowflake.connector.connect(**st.secrets["snowflake"])
+    except DatabaseError as e:
+        logger.error(f"Failed to connect to database: {e}")
+        raise DatabaseConnectionError(f"Could not connect to database: {e}") from e
+    except KeyError as e:
+        logger.error("Snowflake credentials not found in secrets")
+        raise DatabaseConnectionError(
+            "Database credentials not configured. Please check st.secrets."
+        ) from e
+@contextmanager
+def get_connection() -> Generator[SnowflakeConnection, None, None]:
+    """Context manager for database connections with error handling.
+    Yields:
+        Active Snowflake connection
+    Raises:
+        DatabaseConnectionError: If connection fails
+    Example:
+        with get_connection() as conn:
+            # use connection
+    """
+    try:
+        conn = snowflake.connector.connect(**st.secrets["snowflake"])
+        yield conn
+    except DatabaseError as e:
+        logger.error(f"Database connection error: {e}")
+        raise DatabaseConnectionError(f"Database connection failed: {e}") from e
+    except KeyError as e:
+        logger.error("Snowflake credentials not found in secrets")
+        raise DatabaseConnectionError(
+            "Database credentials not configured. Please check st.secrets."
+        ) from e
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass  # Connection may already be closed
+def execute_query(
+    conn: SnowflakeConnection,
+    query: str,
+    params: tuple | list | None = None,
+) -> list[tuple]:
+    """Execute a parameterized query safely.
+    Args:
+        conn: Active database connection
+        query: SQL query with %s placeholders
+        params: Query parameters (optional)
+    Returns:
+        List of result tuples
+    Raises:
+        QueryExecutionError: If query execution fails
+    """
+    try:
+        with conn.cursor() as cur:
+            if params:
+                cur.execute(query, params)
+            else:
+                cur.execute(query)
+            return cur.fetchall()
+    except ProgrammingError as e:
+        logger.error(f"Query execution error: {e}")
+        raise QueryExecutionError(f"Query failed: {e}") from e
+    except DatabaseError as e:
+        logger.error(f"Database error during query: {e}")
+        raise QueryExecutionError(f"Database error: {e}") from e

src/database/queries.py ADDED Viewed

	@@ -0,0 +1,127 @@

+"""Parameterized database queries for player data."""
+import logging
+from typing import Any
+import pandas as pd
+from snowflake.connector import SnowflakeConnection
+from src.config import MAX_QUERY_ATTEMPTS, PLAYER_COLUMNS
+from src.database.connection import QueryExecutionError, execute_query
+logger = logging.getLogger("streamlit_nba")
+def search_player_by_name(conn: SnowflakeConnection, name: str) -> list[tuple[str]]:
+    """Search for players by name (first, last, or full name).
+    Args:
+        conn: Active database connection
+        name: Search term (case-insensitive)
+    Returns:
+        List of tuples containing matching full names
+    """
+    name_lower = name.lower().strip()
+    query = """
+        SELECT full_name FROM NBA
+        WHERE full_name_lower = %s
+           OR first_name_lower = %s
+           OR last_name_lower = %s
+    """
+    return execute_query(conn, query, (name_lower, name_lower, name_lower))
+def get_player_by_full_name(
+    conn: SnowflakeConnection, full_name: str
+) -> tuple[Any, ...] | None:
+    """Get a single player's full record by exact name match.
+    Args:
+        conn: Active database connection
+        full_name: Exact full name of player
+    Returns:
+        Player data tuple or None if not found
+    """
+    query = "SELECT * FROM NBA WHERE FULL_NAME = %s"
+    results = execute_query(conn, query, (full_name,))
+    return results[0] if results else None
+def get_players_by_full_names(
+    conn: SnowflakeConnection, names: list[str]
+) -> pd.DataFrame:
+    """Get multiple players' records in a single batch query.
+    This fixes the N+1 query problem by using a single IN clause
+    instead of multiple individual queries.
+    Args:
+        conn: Active database connection
+        names: List of exact full names
+    Returns:
+        DataFrame with player data
+    """
+    if not names:
+        return pd.DataFrame(columns=PLAYER_COLUMNS)
+    # Build parameterized IN clause
+    placeholders = ", ".join(["%s"] * len(names))
+    query = f"SELECT * FROM NBA WHERE FULL_NAME IN ({placeholders})"
+    results = execute_query(conn, query, tuple(names))
+    return pd.DataFrame(results, columns=PLAYER_COLUMNS)
+def get_away_team_by_stats(
+    conn: SnowflakeConnection,
+    pts_threshold: int,
+    reb_threshold: int,
+    ast_threshold: int,
+    stl_threshold: int,
+    max_attempts: int = MAX_QUERY_ATTEMPTS,
+) -> pd.DataFrame:
+    """Get a random away team based on stat thresholds.
+    Uses UNION with SAMPLE to get diverse players meeting stat criteria.
+    Includes a max_attempts guard to prevent infinite loops.
+    Args:
+        conn: Active database connection
+        pts_threshold: Minimum career points
+        reb_threshold: Minimum career rebounds
+        ast_threshold: Minimum career assists
+        stl_threshold: Minimum career steals
+        max_attempts: Maximum query attempts before raising error
+    Returns:
+        DataFrame with 5 players
+    Raises:
+        QueryExecutionError: If unable to get 5 players within max_attempts
+    """
+    query = """
+        SELECT * FROM (SELECT * FROM NBA WHERE PTS > %s) SAMPLE (2 ROWS)
+        UNION
+        SELECT * FROM (SELECT * FROM NBA WHERE REB > %s) SAMPLE (1 ROWS)
+        UNION
+        SELECT * FROM (SELECT * FROM NBA WHERE AST > %s) SAMPLE (1 ROWS)
+        UNION
+        SELECT * FROM (SELECT * FROM NBA WHERE STL > %s) SAMPLE (1 ROWS)
+    """
+    params = (pts_threshold, reb_threshold, ast_threshold, stl_threshold)
+    for attempt in range(max_attempts):
+        results = execute_query(conn, query, params)
+        if len(results) == 5:
+            logger.info(f"Got away team on attempt {attempt + 1}")
+            return pd.DataFrame(results, columns=PLAYER_COLUMNS)
+        logger.debug(f"Attempt {attempt + 1}: got {len(results)} players, need 5")
+    # Fallback: if we can't get exactly 5, raise an error
+    raise QueryExecutionError(
+        f"Could not generate away team with 5 players after {max_attempts} attempts. "
+        f"Last attempt returned {len(results)} players."
+    )

src/ml/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+"""Machine learning module for game prediction."""
+from src.ml.model import (
+    ModelLoadError,
+    analyze_team_stats,
+    get_winner_model,
+    predict_winner,
+)
+__all__ = [
+    "ModelLoadError",
+    "analyze_team_stats",
+    "get_winner_model",
+    "predict_winner",
+]

src/ml/model.py ADDED Viewed

	@@ -0,0 +1,114 @@

+"""Machine learning model loading and prediction with caching."""
+import logging
+from pathlib import Path
+import numpy as np
+import streamlit as st
+from tensorflow.keras.models import Model, load_model
+logger = logging.getLogger("streamlit_nba")
+# Default model path
+DEFAULT_MODEL_PATH = Path("winner.keras")
+class ModelLoadError(Exception):
+    """Raised when model loading fails."""
+    pass
+@st.cache_resource
+def get_winner_model(model_path: str | Path = DEFAULT_MODEL_PATH) -> Model:
+    """Load and cache the winner prediction model.
+    Uses Streamlit's cache_resource to ensure model is only loaded once
+    per session, improving performance significantly.
+    Args:
+        model_path: Path to the Keras model file
+    Returns:
+        Loaded Keras model
+    Raises:
+        ModelLoadError: If model cannot be loaded
+    """
+    path = Path(model_path)
+    if not path.exists():
+        logger.error(f"Model file not found: {path}")
+        raise ModelLoadError(f"Model file not found: {path}")
+    try:
+        logger.info(f"Loading model from {path}")
+        model = load_model(str(path))
+        logger.info("Model loaded successfully")
+        return model
+    except Exception as e:
+        logger.error(f"Failed to load model: {e}")
+        raise ModelLoadError(f"Failed to load model: {e}") from e
+def predict_winner(combined_stats: np.ndarray) -> tuple[float, int]:
+    """Predict game winner from combined team stats.
+    Args:
+        combined_stats: Numpy array of shape (1, 100) containing
+            home team stats followed by away team stats
+    Returns:
+        Tuple of (probability, prediction) where:
+            - probability: Float between 0-1 (sigmoid output)
+            - prediction: 0 (away wins) or 1 (home wins)
+    Raises:
+        ModelLoadError: If model cannot be loaded
+        ValueError: If input shape is invalid
+    """
+    if combined_stats.shape != (1, 100):
+        raise ValueError(
+            f"Expected input shape (1, 100), got {combined_stats.shape}"
+        )
+    model = get_winner_model()
+    sigmoid_output = model.predict(combined_stats, verbose=0)
+    probability = float(sigmoid_output[0][0])
+    prediction = int(np.round(probability))
+    logger.info(f"Prediction: probability={probability:.4f}, winner={prediction}")
+    return probability, prediction
+def analyze_team_stats(
+    home_stats: list[list[float]], away_stats: list[list[float]]
+) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Prepare team stats for model prediction.
+    Flattens per-player stats into team-level arrays suitable for
+    the prediction model.
+    Args:
+        home_stats: List of stat lists for each home player
+        away_stats: List of stat lists for each away player
+    Returns:
+        Tuple of (home_array, away_array, combined_array) where:
+            - home_array: Shape (1, 50) - home team flattened stats
+            - away_array: Shape (1, 50) - away team flattened stats
+            - combined_array: Shape (1, 100) - both teams for prediction
+    """
+    home_flat: list[float] = []
+    away_flat: list[float] = []
+    for player_stats in home_stats:
+        home_flat.extend(player_stats)
+    for player_stats in away_stats:
+        away_flat.extend(player_stats)
+    home_array = np.array(home_flat).reshape(1, -1)
+    away_array = np.array(away_flat).reshape(1, -1)
+    combined_array = np.array(home_flat + away_flat).reshape(1, -1)
+    return home_array, away_array, combined_array

src/models/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Pydantic models for data validation."""
+from src.models.player import PlayerStats, DifficultySettings
+__all__ = ["PlayerStats", "DifficultySettings"]

src/models/player.py ADDED Viewed

	@@ -0,0 +1,144 @@

+"""Pydantic models for player and game data."""
+from typing import ClassVar
+from pydantic import BaseModel, Field, field_validator
+from src.config import DIFFICULTY_PRESETS
+class PlayerStats(BaseModel):
+    """Model representing a player's career statistics."""
+    full_name: str = Field(..., min_length=1, max_length=100)
+    ast: int = Field(..., ge=0, description="Career assists")
+    blk: int = Field(..., ge=0, description="Career blocks")
+    dreb: int = Field(..., ge=0, description="Career defensive rebounds")
+    fg3a: int = Field(..., ge=0, description="Career 3-point attempts")
+    fg3m: int = Field(..., ge=0, description="Career 3-pointers made")
+    fg3_pct: float = Field(..., ge=0.0, le=1.0, description="3-point percentage")
+    fga: int = Field(..., ge=0, description="Career field goal attempts")
+    fgm: int = Field(..., ge=0, description="Career field goals made")
+    fg_pct: float = Field(..., ge=0.0, le=1.0, description="Field goal percentage")
+    fta: int = Field(..., ge=0, description="Career free throw attempts")
+    ftm: int = Field(..., ge=0, description="Career free throws made")
+    ft_pct: float = Field(..., ge=0.0, le=1.0, description="Free throw percentage")
+    gp: int = Field(..., ge=0, description="Games played")
+    gs: int = Field(..., ge=0, description="Games started")
+    min: int = Field(..., ge=0, description="Career minutes")
+    oreb: int = Field(..., ge=0, description="Career offensive rebounds")
+    pf: int = Field(..., ge=0, description="Career personal fouls")
+    pts: int = Field(..., ge=0, description="Career points")
+    reb: int = Field(..., ge=0, description="Career rebounds")
+    stl: int = Field(..., ge=0, description="Career steals")
+    tov: int = Field(..., ge=0, description="Career turnovers")
+    first_name: str = Field(..., max_length=50)
+    last_name: str = Field(..., max_length=50)
+    full_name_lower: str = Field(..., max_length=100)
+    first_name_lower: str = Field(..., max_length=50)
+    last_name_lower: str = Field(..., max_length=50)
+    is_active: bool = Field(default=False)
+    @classmethod
+    def from_db_row(cls, row: tuple) -> "PlayerStats":
+        """Create PlayerStats from a database row tuple.
+        Args:
+            row: Database row tuple in PLAYER_COLUMNS order
+        Returns:
+            PlayerStats instance
+        """
+        return cls(
+            full_name=row[0],
+            ast=row[1],
+            blk=row[2],
+            dreb=row[3],
+            fg3a=row[4],
+            fg3m=row[5],
+            fg3_pct=row[6] or 0.0,
+            fga=row[7],
+            fgm=row[8],
+            fg_pct=row[9] or 0.0,
+            fta=row[10],
+            ftm=row[11],
+            ft_pct=row[12] or 0.0,
+            gp=row[13],
+            gs=row[14],
+            min=row[15],
+            oreb=row[16],
+            pf=row[17],
+            pts=row[18],
+            reb=row[19],
+            stl=row[20],
+            tov=row[21],
+            first_name=row[22],
+            last_name=row[23],
+            full_name_lower=row[24],
+            first_name_lower=row[25],
+            last_name_lower=row[26],
+            is_active=bool(row[27]) if row[27] is not None else False,
+        )
+class DifficultySettings(BaseModel):
+    """Model for game difficulty settings."""
+    VALID_PRESETS: ClassVar[set[str]] = set(DIFFICULTY_PRESETS.keys())
+    name: str = Field(..., min_length=1)
+    pts_threshold: int = Field(..., ge=0, description="Minimum career points")
+    reb_threshold: int = Field(..., ge=0, description="Minimum career rebounds")
+    ast_threshold: int = Field(..., ge=0, description="Minimum career assists")
+    stl_threshold: int = Field(..., ge=0, description="Minimum career steals")
+    @field_validator("name")
+    @classmethod
+    def validate_preset_name(cls, v: str) -> str:
+        """Validate that preset name is recognized."""
+        if v not in cls.VALID_PRESETS:
+            raise ValueError(
+                f"Unknown difficulty preset: {v}. "
+                f"Valid options: {', '.join(sorted(cls.VALID_PRESETS))}"
+            )
+        return v
+    @classmethod
+    def from_preset(cls, preset_name: str) -> "DifficultySettings":
+        """Create DifficultySettings from a named preset.
+        Args:
+            preset_name: Name of difficulty preset (e.g., "Regular", "Dream Team")
+        Returns:
+            DifficultySettings instance
+        Raises:
+            ValueError: If preset_name is not valid
+        """
+        if preset_name not in DIFFICULTY_PRESETS:
+            raise ValueError(
+                f"Unknown difficulty preset: {preset_name}. "
+                f"Valid options: {', '.join(sorted(DIFFICULTY_PRESETS.keys()))}"
+            )
+        pts, reb, ast, stl = DIFFICULTY_PRESETS[preset_name]
+        return cls(
+            name=preset_name,
+            pts_threshold=pts,
+            reb_threshold=reb,
+            ast_threshold=ast,
+            stl_threshold=stl,
+        )
+    def as_tuple(self) -> tuple[int, int, int, int]:
+        """Return thresholds as tuple for backward compatibility.
+        Returns:
+            Tuple of (pts, reb, ast, stl) thresholds
+        """
+        return (
+            self.pts_threshold,
+            self.reb_threshold,
+            self.ast_threshold,
+            self.stl_threshold,
+        )

src/state/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Session state management module."""
+from src.state.session import GameState, init_session_state, get_away_stats
+__all__ = ["GameState", "init_session_state", "get_away_stats"]

src/state/session.py ADDED Viewed

	@@ -0,0 +1,160 @@

+"""Session state management for the Streamlit application."""
+import logging
+from dataclasses import dataclass, field
+import pandas as pd
+import streamlit as st
+from src.config import DIFFICULTY_PRESETS
+logger = logging.getLogger("streamlit_nba")
+# Default difficulty preset
+DEFAULT_DIFFICULTY = "Regular"
+@dataclass
+class GameState:
+    """Dataclass representing the game session state."""
+    home_team: list[str] = field(default_factory=list)
+    away_team: list[str] = field(default_factory=list)
+    away_stats: list[int] = field(
+        default_factory=lambda: list(DIFFICULTY_PRESETS[DEFAULT_DIFFICULTY])
+    )
+    home_team_df: pd.DataFrame = field(default_factory=pd.DataFrame)
+    radio_index: int = 0
+def init_session_state() -> None:
+    """Initialize all session state keys with safe defaults.
+    This should be called at the start of each page to ensure
+    all required state keys exist before access.
+    """
+    defaults = {
+        "home_team": [],
+        "away_team": [],
+        "away_stats": list(DIFFICULTY_PRESETS[DEFAULT_DIFFICULTY]),
+        "home_team_df": pd.DataFrame(),
+        "radio_index": 0,
+    }
+    for key, default_value in defaults.items():
+        if key not in st.session_state:
+            st.session_state[key] = default_value
+            logger.debug(f"Initialized session state: {key}")
+def get_away_stats() -> list[int]:
+    """Safely get away team stats from session state.
+    Returns:
+        List of stat thresholds [pts, reb, ast, stl], or defaults if not set
+    """
+    init_session_state()  # Ensure state is initialized
+    stats = st.session_state.get("away_stats")
+    if stats is None or not isinstance(stats, list) or len(stats) != 4:
+        logger.warning("Invalid away_stats in session, using defaults")
+        default_stats = list(DIFFICULTY_PRESETS[DEFAULT_DIFFICULTY])
+        st.session_state["away_stats"] = default_stats
+        return default_stats
+    return stats
+def get_home_team_df() -> pd.DataFrame:
+    """Safely get home team DataFrame from session state.
+    Returns:
+        DataFrame with home team player data, or empty DataFrame if not set
+    """
+    init_session_state()
+    df = st.session_state.get("home_team_df")
+    if df is None or not isinstance(df, pd.DataFrame):
+        logger.warning("Invalid home_team_df in session, using empty DataFrame")
+        return pd.DataFrame()
+    return df
+def get_home_team_names() -> list[str]:
+    """Safely get home team player names from session state.
+    Returns:
+        List of player names on home team
+    """
+    init_session_state()
+    team = st.session_state.get("home_team")
+    if team is None or not isinstance(team, list):
+        return []
+    return team
+def set_difficulty(preset_name: str) -> None:
+    """Set the difficulty level by preset name.
+    Args:
+        preset_name: Name of difficulty preset
+    """
+    if preset_name not in DIFFICULTY_PRESETS:
+        logger.error(f"Invalid difficulty preset: {preset_name}")
+        return
+    index = list(DIFFICULTY_PRESETS.keys()).index(preset_name)
+    st.session_state["away_stats"] = list(DIFFICULTY_PRESETS[preset_name])
+    st.session_state["radio_index"] = index
+    logger.info(f"Set difficulty to {preset_name}")
+def add_player_to_team(player_name: str) -> bool:
+    """Add a player to the home team.
+    Args:
+        player_name: Full name of player to add
+    Returns:
+        True if added, False if already on team or team is full
+    """
+    init_session_state()
+    team = st.session_state.get("home_team", [])
+    if len(team) >= 5:
+        logger.warning("Cannot add player: team is full")
+        return False
+    if player_name in team:
+        logger.debug(f"Player {player_name} already on team")
+        return False
+    team.append(player_name)
+    st.session_state["home_team"] = team
+    logger.info(f"Added {player_name} to team")
+    return True
+def remove_player_from_team(player_name: str) -> bool:
+    """Remove a player from the home team.
+    Args:
+        player_name: Full name of player to remove
+    Returns:
+        True if removed, False if not on team
+    """
+    init_session_state()
+    team = st.session_state.get("home_team", [])
+    if player_name not in team:
+        logger.debug(f"Player {player_name} not on team")
+        return False
+    team.remove(player_name)
+    st.session_state["home_team"] = team
+    logger.info(f"Removed {player_name} from team")
+    return True

src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Utility functions for HTML sanitization and other helpers."""
+from src.utils.html import safe_heading, safe_paragraph
+__all__ = ["safe_heading", "safe_paragraph"]

src/utils/html.py ADDED Viewed

	@@ -0,0 +1,108 @@

+"""HTML sanitization utilities for XSS protection."""
+import html
+from typing import Literal
+import streamlit as st
+# Valid heading levels
+HeadingLevel = Literal[1, 2, 3, 4, 5, 6]
+def escape_html(text: str) -> str:
+    """Escape HTML special characters to prevent XSS.
+    Args:
+        text: Raw text that may contain HTML
+    Returns:
+        Escaped text safe for HTML insertion
+    """
+    return html.escape(str(text))
+def safe_heading(
+    text: str,
+    level: HeadingLevel = 1,
+    color: str = "steelblue",
+    align: str = "center",
+) -> None:
+    """Render a heading with escaped text to prevent XSS.
+    Args:
+        text: Heading text (will be escaped)
+        level: Heading level 1-6
+        color: CSS color value
+        align: CSS text-align value
+    """
+    # Escape all user-provided values
+    safe_text = escape_html(text)
+    safe_color = escape_html(color)
+    safe_align = escape_html(align)
+    st.markdown(
+        f"<h{level} style='text-align: {safe_align}; color: {safe_color};'>"
+        f"{safe_text}</h{level}>",
+        unsafe_allow_html=True,
+    )
+def safe_paragraph(
+    text: str,
+    color: str = "white",
+    align: str = "center",
+) -> None:
+    """Render a paragraph with escaped text to prevent XSS.
+    Args:
+        text: Paragraph text (will be escaped)
+        color: CSS color value
+        align: CSS text-align value
+    """
+    safe_text = escape_html(text)
+    safe_color = escape_html(color)
+    safe_align = escape_html(align)
+    st.markdown(
+        f"<p style='text-align: {safe_align}; color: {safe_color};'>"
+        f"{safe_text}</p>",
+        unsafe_allow_html=True,
+    )
+def safe_styled_text(
+    text: str,
+    tag: str = "span",
+    color: str | None = None,
+    align: str | None = None,
+    **styles: str,
+) -> str:
+    """Generate HTML string with escaped text and validated styles.
+    Args:
+        text: Text content (will be escaped)
+        tag: HTML tag to use
+        color: Optional CSS color
+        align: Optional CSS text-align
+        **styles: Additional CSS properties
+    Returns:
+        Safe HTML string
+    """
+    safe_text = escape_html(text)
+    safe_tag = escape_html(tag)
+    style_parts: list[str] = []
+    if color:
+        style_parts.append(f"color: {escape_html(color)}")
+    if align:
+        style_parts.append(f"text-align: {escape_html(align)}")
+    for prop, value in styles.items():
+        # Convert underscores to hyphens for CSS properties
+        css_prop = prop.replace("_", "-")
+        style_parts.append(f"{escape_html(css_prop)}: {escape_html(value)}")
+    style_str = "; ".join(style_parts)
+    if style_str:
+        return f"<{safe_tag} style='{style_str}'>{safe_text}</{safe_tag}>"
+    return f"<{safe_tag}>{safe_text}</{safe_tag}>"

src/validation/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""Input validation module."""
+from src.validation.inputs import (
+    PlayerSearchInput,
+    is_valid_search_term,
+    validate_search_term,
+)
+__all__ = ["PlayerSearchInput", "is_valid_search_term", "validate_search_term"]

src/validation/inputs.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""Input validation for user-provided data."""
+import re
+from pydantic import BaseModel, Field, field_validator
+# Patterns that indicate SQL injection attempts
+SQL_INJECTION_PATTERNS: list[str] = [
+    r"['\";]",  # Quote characters and semicolons
+    r"--",  # SQL comment
+    r"/\*",  # Block comment start
+    r"\*/",  # Block comment end
+    r"\bUNION\b",  # UNION keyword
+    r"\bSELECT\b",  # SELECT keyword
+    r"\bINSERT\b",  # INSERT keyword
+    r"\bUPDATE\b",  # UPDATE keyword
+    r"\bDELETE\b",  # DELETE keyword
+    r"\bDROP\b",  # DROP keyword
+    r"\bEXEC\b",  # EXEC keyword
+    r"\bOR\s+\d+=\d+",  # OR 1=1 pattern
+    r"\bAND\s+\d+=\d+",  # AND 1=1 pattern
+]
+# Compiled regex for efficiency
+SQL_INJECTION_REGEX = re.compile(
+    "|".join(SQL_INJECTION_PATTERNS), re.IGNORECASE
+)
+class PlayerSearchInput(BaseModel):
+    """Validated player search input."""
+    search_term: str = Field(
+        ...,
+        min_length=1,
+        max_length=100,
+        description="Player name search term",
+    )
+    @field_validator("search_term")
+    @classmethod
+    def validate_no_sql_injection(cls, v: str) -> str:
+        """Reject inputs containing SQL injection patterns.
+        Args:
+            v: Input search term
+        Returns:
+            Validated search term
+        Raises:
+            ValueError: If SQL injection pattern detected
+        """
+        if SQL_INJECTION_REGEX.search(v):
+            raise ValueError(
+                "Invalid characters in search term. "
+                "Please use only letters, numbers, spaces, and hyphens."
+            )
+        return v.strip()
+    @field_validator("search_term")
+    @classmethod
+    def validate_reasonable_characters(cls, v: str) -> str:
+        """Ensure search term contains only reasonable characters.
+        Args:
+            v: Input search term
+        Returns:
+            Validated search term
+        Raises:
+            ValueError: If invalid characters found
+        """
+        # Allow letters, numbers, spaces, hyphens, periods, and apostrophes
+        # (e.g., "O'Neal", "J.R. Smith")
+        if not re.match(r"^[a-zA-Z0-9\s\-.']+$", v):
+            raise ValueError(
+                "Search term contains invalid characters. "
+                "Please use only letters, numbers, spaces, hyphens, "
+                "periods, and apostrophes."
+            )
+        return v
+def validate_search_term(term: str) -> str | None:
+    """Validate a player search term.
+    Args:
+        term: Raw search input
+    Returns:
+        Validated and cleaned search term, or None if invalid
+    """
+    try:
+        validated = PlayerSearchInput(search_term=term)
+        return validated.search_term
+    except ValueError:
+        return None
+def is_valid_search_term(term: str) -> bool:
+    """Check if a search term is valid without raising exceptions.
+    Args:
+        term: Raw search input
+    Returns:
+        True if valid, False otherwise
+    """
+    return validate_search_term(term) is not None

tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Test suite for NBA Streamlit application."""

tests/conftest.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""Pytest fixtures for NBA Streamlit application tests."""
+from typing import Any
+from unittest.mock import MagicMock
+import pandas as pd
+import pytest
+@pytest.fixture
+def mock_snowflake_connection() -> MagicMock:
+    """Create a mock Snowflake connection.
+    Returns:
+        Mock connection object that simulates Snowflake connection behavior
+    """
+    mock_conn = MagicMock()
+    mock_cursor = MagicMock()
+    mock_conn.cursor.return_value.__enter__ = MagicMock(return_value=mock_cursor)
+    mock_conn.cursor.return_value.__exit__ = MagicMock(return_value=False)
+    return mock_conn
+@pytest.fixture
+def sample_player_data() -> list[tuple[Any, ...]]:
+    """Create sample player data matching database schema.
+    Returns:
+        List of tuples with sample player data
+    """
+    return [
+        (
+            "LeBron James",  # FULL_NAME
+            10141,  # AST
+            1107,  # BLK
+            5972,  # DREB
+            2891,  # FG3A
+            1043,  # FG3M
+            0.361,  # FG3_PCT
+            24856,  # FGA
+            12621,  # FGM
+            0.508,  # FG_PCT
+            11067,  # FTA
+            7938,  # FTM
+            0.717,  # FT_PCT
+            1421,  # GP
+            1421,  # GS
+            54218,  # MIN
+            1663,  # OREB
+            2159,  # PF
+            39223,  # PTS
+            10988,  # REB
+            2219,  # STL
+            5015,  # TOV
+            "LeBron",  # FIRST_NAME
+            "James",  # LAST_NAME
+            "lebron james",  # FULL_NAME_LOWER
+            "lebron",  # FIRST_NAME_LOWER
+            "james",  # LAST_NAME_LOWER
+            True,  # IS_ACTIVE
+        ),
+        (
+            "Michael Jordan",
+            5633,
+            893,
+            4578,
+            1778,
+            581,
+            0.327,
+            24537,
+            12192,
+            0.497,
+            8772,
+            7327,
+            0.835,
+            1072,
+            1039,
+            41011,
+            1463,
+            2783,
+            32292,
+            6672,
+            2514,
+            2924,
+            "Michael",
+            "Jordan",
+            "michael jordan",
+            "michael",
+            "jordan",
+            False,
+        ),
+    ]
+@pytest.fixture
+def sample_player_df(sample_player_data: list[tuple]) -> pd.DataFrame:
+    """Create sample player DataFrame.
+    Args:
+        sample_player_data: List of player tuples
+    Returns:
+        DataFrame with sample player data
+    """
+    from src.config import PLAYER_COLUMNS
+    return pd.DataFrame(sample_player_data, columns=PLAYER_COLUMNS)
+@pytest.fixture
+def sample_team_stats() -> list[list[float]]:
+    """Create sample team stats for ML model input.
+    Returns:
+        List of player stat lists (5 players x 10 stats)
+    """
+    return [
+        [1500.0, 100.0, 200.0, 300.0, 50.0, 30.0, 100.0, 0.35, 0.80, 500.0],
+        [1200.0, 80.0, 180.0, 250.0, 40.0, 25.0, 90.0, 0.38, 0.75, 450.0],
+        [1000.0, 60.0, 150.0, 200.0, 35.0, 20.0, 80.0, 0.40, 0.82, 400.0],
+        [800.0, 50.0, 120.0, 150.0, 30.0, 15.0, 70.0, 0.33, 0.78, 350.0],
+        [600.0, 40.0, 100.0, 100.0, 25.0, 10.0, 60.0, 0.36, 0.85, 300.0],
+    ]

tests/test_database.py ADDED Viewed

	@@ -0,0 +1,220 @@

+"""Tests for database module."""
+from unittest.mock import MagicMock, patch
+import pandas as pd
+import pytest
+from src.config import MAX_QUERY_ATTEMPTS, PLAYER_COLUMNS
+from src.database.connection import QueryExecutionError
+from src.database.queries import (
+    get_away_team_by_stats,
+    get_player_by_full_name,
+    get_players_by_full_names,
+    search_player_by_name,
+)
+class TestSearchPlayerByName:
+    """Tests for search_player_by_name function."""
+    def test_uses_parameterized_query(
+        self, mock_snowflake_connection: MagicMock
+    ) -> None:
+        """Verify parameterized queries are used (not string formatting)."""
+        mock_cursor = MagicMock()
+        mock_cursor.fetchall.return_value = [("LeBron James",)]
+        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
+            mock_cursor
+        )
+        search_player_by_name(mock_snowflake_connection, "james")
+        # Verify execute was called with params tuple, not string formatting
+        mock_cursor.execute.assert_called_once()
+        call_args = mock_cursor.execute.call_args
+        query = call_args[0][0]
+        params = call_args[0][1]
+        # Query should use %s placeholders
+        assert "%s" in query
+        # Should not contain the actual search term in the query string
+        assert "james" not in query.lower()
+        # Params should be a tuple with the search term
+        assert params == ("james", "james", "james")
+    def test_returns_list_of_tuples(
+        self, mock_snowflake_connection: MagicMock
+    ) -> None:
+        """Test that results are returned as list of tuples."""
+        mock_cursor = MagicMock()
+        mock_cursor.fetchall.return_value = [
+            ("LeBron James",),
+            ("James Harden",),
+        ]
+        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
+            mock_cursor
+        )
+        result = search_player_by_name(mock_snowflake_connection, "james")
+        assert result == [("LeBron James",), ("James Harden",)]
+class TestGetPlayersByFullNames:
+    """Tests for get_players_by_full_names batch query."""
+    def test_single_query_for_multiple_names(
+        self, mock_snowflake_connection: MagicMock, sample_player_data: list
+    ) -> None:
+        """Verify batch query uses single IN clause instead of N queries."""
+        mock_cursor = MagicMock()
+        mock_cursor.fetchall.return_value = sample_player_data
+        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
+            mock_cursor
+        )
+        names = ["LeBron James", "Michael Jordan"]
+        get_players_by_full_names(mock_snowflake_connection, names)
+        # Should only execute one query
+        assert mock_cursor.execute.call_count == 1
+        call_args = mock_cursor.execute.call_args
+        query = call_args[0][0]
+        params = call_args[0][1]
+        # Query should have IN clause with placeholders
+        assert "IN" in query.upper()
+        assert "%s" in query
+        # Params should be tuple of names
+        assert params == ("LeBron James", "Michael Jordan")
+    def test_returns_dataframe(
+        self, mock_snowflake_connection: MagicMock, sample_player_data: list
+    ) -> None:
+        """Test that results are returned as DataFrame."""
+        mock_cursor = MagicMock()
+        mock_cursor.fetchall.return_value = sample_player_data
+        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
+            mock_cursor
+        )
+        result = get_players_by_full_names(
+            mock_snowflake_connection, ["LeBron James", "Michael Jordan"]
+        )
+        assert isinstance(result, pd.DataFrame)
+        assert list(result.columns) == PLAYER_COLUMNS
+        assert len(result) == 2
+    def test_empty_names_returns_empty_dataframe(
+        self, mock_snowflake_connection: MagicMock
+    ) -> None:
+        """Test that empty input returns empty DataFrame without query."""
+        mock_cursor = MagicMock()
+        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
+            mock_cursor
+        )
+        result = get_players_by_full_names(mock_snowflake_connection, [])
+        assert isinstance(result, pd.DataFrame)
+        assert result.empty
+        # Should not execute any query
+        mock_cursor.execute.assert_not_called()
+class TestGetAwayTeamByStats:
+    """Tests for get_away_team_by_stats with max_attempts guard."""
+    def test_max_attempts_raises_error(
+        self, mock_snowflake_connection: MagicMock
+    ) -> None:
+        """Test that max_attempts limit prevents infinite loop."""
+        mock_cursor = MagicMock()
+        # Always return wrong number of players
+        mock_cursor.fetchall.return_value = [("Player1",), ("Player2",)]
+        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
+            mock_cursor
+        )
+        with pytest.raises(QueryExecutionError) as exc_info:
+            get_away_team_by_stats(
+                mock_snowflake_connection,
+                pts_threshold=1000,
+                reb_threshold=500,
+                ast_threshold=300,
+                stl_threshold=100,
+                max_attempts=3,
+            )
+        assert "3 attempts" in str(exc_info.value)
+        assert mock_cursor.execute.call_count == 3
+    def test_success_on_first_try(
+        self, mock_snowflake_connection: MagicMock, sample_player_data: list
+    ) -> None:
+        """Test successful query on first attempt."""
+        mock_cursor = MagicMock()
+        # Return exactly 5 players
+        mock_cursor.fetchall.return_value = sample_player_data * 3  # 6 players
+        mock_cursor.fetchall.return_value = [
+            sample_player_data[0],
+            sample_player_data[1],
+            sample_player_data[0],
+            sample_player_data[1],
+            sample_player_data[0],
+        ]
+        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
+            mock_cursor
+        )
+        result = get_away_team_by_stats(
+            mock_snowflake_connection,
+            pts_threshold=1000,
+            reb_threshold=500,
+            ast_threshold=300,
+            stl_threshold=100,
+        )
+        assert isinstance(result, pd.DataFrame)
+        assert len(result) == 5
+        # Should only need one query
+        assert mock_cursor.execute.call_count == 1
+    def test_uses_parameterized_query(
+        self, mock_snowflake_connection: MagicMock, sample_player_data: list
+    ) -> None:
+        """Verify parameterized queries are used for stat thresholds."""
+        mock_cursor = MagicMock()
+        mock_cursor.fetchall.return_value = [
+            sample_player_data[0],
+            sample_player_data[1],
+            sample_player_data[0],
+            sample_player_data[1],
+            sample_player_data[0],
+        ]
+        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
+            mock_cursor
+        )
+        get_away_team_by_stats(
+            mock_snowflake_connection,
+            pts_threshold=1000,
+            reb_threshold=500,
+            ast_threshold=300,
+            stl_threshold=100,
+        )
+        call_args = mock_cursor.execute.call_args
+        query = call_args[0][0]
+        params = call_args[0][1]
+        # Query should use %s placeholders
+        assert "%s" in query
+        # Should not contain actual numbers in query
+        assert "1000" not in query
+        assert "500" not in query
+        # Params should be tuple of thresholds
+        assert params == (1000, 500, 300, 100)

tests/test_ml.py ADDED Viewed

	@@ -0,0 +1,147 @@

+"""Tests for ML model module."""
+from unittest.mock import MagicMock, patch
+import numpy as np
+import pytest
+from src.ml.model import ModelLoadError, analyze_team_stats, predict_winner
+class TestAnalyzeTeamStats:
+    """Tests for analyze_team_stats function."""
+    def test_flattens_stats_correctly(
+        self, sample_team_stats: list[list[float]]
+    ) -> None:
+        """Test that team stats are flattened correctly."""
+        home_array, away_array, combined = analyze_team_stats(
+            sample_team_stats, sample_team_stats
+        )
+        # Each team has 5 players x 10 stats = 50 values
+        assert home_array.shape == (1, 50)
+        assert away_array.shape == (1, 50)
+        # Combined has both teams = 100 values
+        assert combined.shape == (1, 100)
+    def test_combined_contains_both_teams(
+        self, sample_team_stats: list[list[float]]
+    ) -> None:
+        """Test that combined array contains both teams' stats."""
+        home_stats = [[1.0, 2.0], [3.0, 4.0]]  # 2 players, 2 stats each
+        away_stats = [[5.0, 6.0], [7.0, 8.0]]
+        home_array, away_array, combined = analyze_team_stats(
+            home_stats, away_stats
+        )
+        # Home should be first 4 values, away next 4
+        np.testing.assert_array_equal(
+            combined[0], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
+        )
+class TestPredictWinner:
+    """Tests for predict_winner function."""
+    @patch("src.ml.model.get_winner_model")
+    def test_returns_probability_and_prediction(
+        self, mock_get_model: MagicMock
+    ) -> None:
+        """Test that function returns (probability, prediction) tuple."""
+        mock_model = MagicMock()
+        mock_model.predict.return_value = np.array([[0.75]])
+        mock_get_model.return_value = mock_model
+        stats = np.random.rand(1, 100)
+        probability, prediction = predict_winner(stats)
+        assert isinstance(probability, float)
+        assert isinstance(prediction, int)
+        assert 0.0 <= probability <= 1.0
+        assert prediction in (0, 1)
+    @patch("src.ml.model.get_winner_model")
+    def test_high_probability_predicts_win(
+        self, mock_get_model: MagicMock
+    ) -> None:
+        """Test that high probability (>0.5) predicts home win (1)."""
+        mock_model = MagicMock()
+        mock_model.predict.return_value = np.array([[0.8]])
+        mock_get_model.return_value = mock_model
+        stats = np.random.rand(1, 100)
+        probability, prediction = predict_winner(stats)
+        assert probability == 0.8
+        assert prediction == 1
+    @patch("src.ml.model.get_winner_model")
+    def test_low_probability_predicts_loss(
+        self, mock_get_model: MagicMock
+    ) -> None:
+        """Test that low probability (<0.5) predicts home loss (0)."""
+        mock_model = MagicMock()
+        mock_model.predict.return_value = np.array([[0.3]])
+        mock_get_model.return_value = mock_model
+        stats = np.random.rand(1, 100)
+        probability, prediction = predict_winner(stats)
+        assert probability == 0.3
+        assert prediction == 0
+    @patch("src.ml.model.get_winner_model")
+    def test_invalid_shape_raises_error(
+        self, mock_get_model: MagicMock
+    ) -> None:
+        """Test that invalid input shape raises ValueError."""
+        mock_model = MagicMock()
+        mock_get_model.return_value = mock_model
+        # Wrong shape
+        stats = np.random.rand(1, 50)
+        with pytest.raises(ValueError) as exc_info:
+            predict_winner(stats)
+        assert "Expected input shape (1, 100)" in str(exc_info.value)
+    @patch("src.ml.model.get_winner_model")
+    def test_model_called_with_verbose_zero(
+        self, mock_get_model: MagicMock
+    ) -> None:
+        """Test that model.predict is called with verbose=0."""
+        mock_model = MagicMock()
+        mock_model.predict.return_value = np.array([[0.5]])
+        mock_get_model.return_value = mock_model
+        stats = np.random.rand(1, 100)
+        predict_winner(stats)
+        mock_model.predict.assert_called_once_with(stats, verbose=0)
+class TestGetWinnerModel:
+    """Tests for get_winner_model caching."""
+    @patch("src.ml.model.load_model")
+    @patch("src.ml.model.Path")
+    def test_raises_error_for_missing_model(
+        self, mock_path: MagicMock, mock_load: MagicMock
+    ) -> None:
+        """Test that missing model file raises ModelLoadError."""
+        from src.ml.model import get_winner_model
+        # Clear the cache to ensure fresh test
+        get_winner_model.clear()
+        mock_path_instance = MagicMock()
+        mock_path_instance.exists.return_value = False
+        mock_path.return_value = mock_path_instance
+        with pytest.raises(ModelLoadError) as exc_info:
+            get_winner_model("nonexistent.keras")
+        assert "not found" in str(exc_info.value)

tests/test_models.py ADDED Viewed

	@@ -0,0 +1,140 @@

+"""Tests for Pydantic models."""
+import pytest
+from src.config import DIFFICULTY_PRESETS
+from src.models.player import DifficultySettings, PlayerStats
+class TestPlayerStats:
+    """Tests for PlayerStats model."""
+    def test_from_db_row(self, sample_player_data: list) -> None:
+        """Test creating PlayerStats from database row tuple."""
+        row = sample_player_data[0]  # LeBron James data
+        player = PlayerStats.from_db_row(row)
+        assert player.full_name == "LeBron James"
+        assert player.pts == 39223
+        assert player.ast == 10141
+        assert player.is_active is True
+    def test_validates_negative_stats(self) -> None:
+        """Test that negative stats are rejected."""
+        with pytest.raises(ValueError):
+            PlayerStats(
+                full_name="Test Player",
+                ast=-1,  # Invalid
+                blk=0,
+                dreb=0,
+                fg3a=0,
+                fg3m=0,
+                fg3_pct=0.0,
+                fga=0,
+                fgm=0,
+                fg_pct=0.0,
+                fta=0,
+                ftm=0,
+                ft_pct=0.0,
+                gp=0,
+                gs=0,
+                min=0,
+                oreb=0,
+                pf=0,
+                pts=0,
+                reb=0,
+                stl=0,
+                tov=0,
+                first_name="Test",
+                last_name="Player",
+                full_name_lower="test player",
+                first_name_lower="test",
+                last_name_lower="player",
+                is_active=True,
+            )
+    def test_validates_percentage_range(self) -> None:
+        """Test that percentages must be 0-1."""
+        with pytest.raises(ValueError):
+            PlayerStats(
+                full_name="Test Player",
+                ast=0,
+                blk=0,
+                dreb=0,
+                fg3a=0,
+                fg3m=0,
+                fg3_pct=1.5,  # Invalid - over 1.0
+                fga=0,
+                fgm=0,
+                fg_pct=0.0,
+                fta=0,
+                ftm=0,
+                ft_pct=0.0,
+                gp=0,
+                gs=0,
+                min=0,
+                oreb=0,
+                pf=0,
+                pts=0,
+                reb=0,
+                stl=0,
+                tov=0,
+                first_name="Test",
+                last_name="Player",
+                full_name_lower="test player",
+                first_name_lower="test",
+                last_name_lower="player",
+                is_active=True,
+            )
+class TestDifficultySettings:
+    """Tests for DifficultySettings model."""
+    @pytest.mark.parametrize("preset_name", list(DIFFICULTY_PRESETS.keys()))
+    def test_from_preset_valid(self, preset_name: str) -> None:
+        """Test creating DifficultySettings from valid presets."""
+        settings = DifficultySettings.from_preset(preset_name)
+        assert settings.name == preset_name
+        expected = DIFFICULTY_PRESETS[preset_name]
+        assert settings.pts_threshold == expected[0]
+        assert settings.reb_threshold == expected[1]
+        assert settings.ast_threshold == expected[2]
+        assert settings.stl_threshold == expected[3]
+    def test_from_preset_invalid(self) -> None:
+        """Test that invalid preset name raises ValueError."""
+        with pytest.raises(ValueError) as exc_info:
+            DifficultySettings.from_preset("Invalid Preset")
+        assert "Unknown difficulty preset" in str(exc_info.value)
+    def test_as_tuple(self) -> None:
+        """Test converting settings to tuple."""
+        settings = DifficultySettings.from_preset("Regular")
+        result = settings.as_tuple()
+        assert result == DIFFICULTY_PRESETS["Regular"]
+        assert isinstance(result, tuple)
+        assert len(result) == 4
+    def test_regular_preset_values(self) -> None:
+        """Test Regular preset has expected values."""
+        settings = DifficultySettings.from_preset("Regular")
+        assert settings.pts_threshold == 850
+        assert settings.reb_threshold == 400
+        assert settings.ast_threshold == 200
+        assert settings.stl_threshold == 60
+    def test_dream_team_preset_values(self) -> None:
+        """Test Dream Team preset has highest values."""
+        settings = DifficultySettings.from_preset("Dream Team")
+        assert settings.pts_threshold == 1450
+        assert settings.reb_threshold == 700
+        assert settings.ast_threshold == 500
+        assert settings.stl_threshold == 120

tests/test_validation.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""Tests for input validation module."""
+import pytest
+from src.validation.inputs import (
+    PlayerSearchInput,
+    is_valid_search_term,
+    validate_search_term,
+)
+class TestPlayerSearchInput:
+    """Tests for PlayerSearchInput validation."""
+    def test_valid_simple_name(self) -> None:
+        """Test valid simple name passes validation."""
+        result = PlayerSearchInput(search_term="James")
+        assert result.search_term == "James"
+    def test_valid_full_name(self) -> None:
+        """Test valid full name passes validation."""
+        result = PlayerSearchInput(search_term="LeBron James")
+        assert result.search_term == "LeBron James"
+    def test_valid_name_with_apostrophe(self) -> None:
+        """Test name with apostrophe passes validation."""
+        result = PlayerSearchInput(search_term="Shaquille O'Neal")
+        assert result.search_term == "Shaquille O'Neal"
+    def test_valid_name_with_period(self) -> None:
+        """Test name with period passes validation."""
+        result = PlayerSearchInput(search_term="J.R. Smith")
+        assert result.search_term == "J.R. Smith"
+    def test_valid_name_with_hyphen(self) -> None:
+        """Test name with hyphen passes validation."""
+        result = PlayerSearchInput(search_term="Kareem Abdul-Jabbar")
+        assert result.search_term == "Kareem Abdul-Jabbar"
+    def test_strips_whitespace(self) -> None:
+        """Test that whitespace is stripped."""
+        result = PlayerSearchInput(search_term="  James  ")
+        assert result.search_term == "James"
+class TestSqlInjectionRejection:
+    """Tests for SQL injection pattern rejection."""
+    @pytest.mark.parametrize(
+        "malicious_input",
+        [
+            "'; DROP TABLE NBA;--",
+            "James'; DELETE FROM NBA--",
+            "' OR '1'='1",
+            "James' UNION SELECT * FROM passwords--",
+            "James; SELECT * FROM users",
+            "/*comment*/James",
+            "James*/DROP TABLE/*",
+            "' OR 1=1--",
+            "James' AND 1=1--",
+            "Robert'); DROP TABLE Students;--",
+        ],
+    )
+    def test_rejects_sql_injection(self, malicious_input: str) -> None:
+        """Test that SQL injection patterns are rejected."""
+        with pytest.raises(ValueError) as exc_info:
+            PlayerSearchInput(search_term=malicious_input)
+        # Should mention invalid characters
+        assert "Invalid" in str(exc_info.value) or "invalid" in str(exc_info.value)
+    @pytest.mark.parametrize(
+        "invalid_input",
+        [
+            "James<script>",
+            "James&nbsp;",
+            "James@#$%",
+            "James\\nNewline",
+            "James\x00null",
+        ],
+    )
+    def test_rejects_special_characters(self, invalid_input: str) -> None:
+        """Test that special characters are rejected."""
+        with pytest.raises(ValueError):
+            PlayerSearchInput(search_term=invalid_input)
+    def test_rejects_empty_string(self) -> None:
+        """Test that empty string is rejected."""
+        with pytest.raises(ValueError):
+            PlayerSearchInput(search_term="")
+    def test_rejects_too_long(self) -> None:
+        """Test that overly long input is rejected."""
+        with pytest.raises(ValueError):
+            PlayerSearchInput(search_term="A" * 101)
+class TestValidateSearchTerm:
+    """Tests for validate_search_term helper function."""
+    def test_returns_cleaned_term(self) -> None:
+        """Test that valid term is returned cleaned."""
+        result = validate_search_term("  James  ")
+        assert result == "James"
+    def test_returns_none_for_invalid(self) -> None:
+        """Test that invalid input returns None."""
+        result = validate_search_term("'; DROP TABLE--")
+        assert result is None
+    def test_returns_none_for_empty(self) -> None:
+        """Test that empty input returns None."""
+        result = validate_search_term("")
+        assert result is None
+class TestIsValidSearchTerm:
+    """Tests for is_valid_search_term helper function."""
+    def test_returns_true_for_valid(self) -> None:
+        """Test returns True for valid input."""
+        assert is_valid_search_term("James") is True
+        assert is_valid_search_term("LeBron James") is True
+        assert is_valid_search_term("O'Neal") is True
+    def test_returns_false_for_invalid(self) -> None:
+        """Test returns False for invalid input."""
+        assert is_valid_search_term("'; DROP--") is False
+        assert is_valid_search_term("") is False
+        assert is_valid_search_term("<script>") is False