Hatmanstack
fix: restore player search functionality and add HF frontmatter
efaa2dc
"""Local data queries using pandas on loaded CSV data."""
import logging
from typing import Any
import pandas as pd
from src.config import MAX_QUERY_ATTEMPTS, PLAYER_COLUMNS
from src.database.connection import QueryExecutionError
logger = logging.getLogger("streamlit_nba")
def search_player_by_name(df: pd.DataFrame, name: str) -> list[tuple[str]]:
"""Search for players by name (first, last, or full name).
Args:
df: Player DataFrame
name: Search term (case-insensitive)
Returns:
List of tuples containing matching full names
"""
name_lower = name.lower().strip()
mask = (
df["FULL_NAME_LOWER"].str.contains(name_lower, case=False, na=False)
| df["FIRST_NAME_LOWER"].str.contains(name_lower, case=False, na=False)
| df["LAST_NAME_LOWER"].str.contains(name_lower, case=False, na=False)
)
results = df[mask]["FULL_NAME"].unique().tolist()
return [(player_name,) for player_name in results]
def get_player_by_full_name(
df: pd.DataFrame, full_name: str
) -> tuple[Any, ...] | None:
"""Get a single player's full record by exact name match.
Args:
df: Player DataFrame
full_name: Exact full name of player
Returns:
Player data tuple or None if not found
"""
result = df[df["FULL_NAME"] == full_name]
if result.empty:
return None
return tuple(result.iloc[0].values)
def get_players_by_full_names(
df: pd.DataFrame, names: list[str]
) -> pd.DataFrame:
"""Get multiple players' records in a single batch query.
Args:
df: Player DataFrame
names: List of exact full names
Returns:
DataFrame with player data
"""
if not names:
return pd.DataFrame(columns=PLAYER_COLUMNS)
return df[df["FULL_NAME"].isin(names)]
def get_away_team_by_stats(
df: pd.DataFrame,
pts_threshold: int,
reb_threshold: int,
ast_threshold: int,
stl_threshold: int,
max_attempts: int = MAX_QUERY_ATTEMPTS,
) -> pd.DataFrame:
"""Get a random away team based on stat thresholds.
Ensures 5 unique players are selected who meet various stat criteria.
Args:
df: Player DataFrame
pts_threshold: Minimum career points
reb_threshold: Minimum career rebounds
ast_threshold: Minimum career assists
stl_threshold: Minimum career steals
max_attempts: Maximum query attempts before raising error
Returns:
DataFrame with 5 players
Raises:
QueryExecutionError: If unable to get 5 players within max_attempts
"""
# Pre-filter pools to improve performance and reliability
pool_pts = df[df["PTS"] > pts_threshold]
pool_reb = df[df["REB"] > reb_threshold]
pool_ast = df[df["AST"] > ast_threshold]
pool_stl = df[df["STL"] > stl_threshold]
for attempt in range(max_attempts):
try:
# We need 5 unique players. Strategy:
# 1. Pick 2 from PTS
# 2. Pick 1 from REB (not in PTS)
# 3. Pick 1 from AST (not in PTS or REB)
# 4. Pick 1 from STL (not in PTS, REB, or AST)
selected_indices: set[int] = set()
# Step 1: PTS (2 players)
if len(pool_pts) < 2:
raise ValueError("PTS pool too small")
p12 = pool_pts.sample(n=2, replace=False)
selected_indices.update(p12.index.tolist())
# Step 2: REB (1 player)
remaining_reb = pool_reb[~pool_reb.index.isin(selected_indices)]
if remaining_reb.empty:
raise ValueError("REB pool exhausted")
p3 = remaining_reb.sample(n=1)
selected_indices.update(p3.index.tolist())
# Step 3: AST (1 player)
remaining_ast = pool_ast[~pool_ast.index.isin(selected_indices)]
if remaining_ast.empty:
raise ValueError("AST pool exhausted")
p4 = remaining_ast.sample(n=1)
selected_indices.update(p4.index.tolist())
# Step 4: STL (1 player)
remaining_stl = pool_stl[~pool_stl.index.isin(selected_indices)]
if remaining_stl.empty:
raise ValueError("STL pool exhausted")
p5 = remaining_stl.sample(n=1)
selected_indices.update(p5.index.tolist())
results = df.loc[list(selected_indices)]
if len(results) == 5:
logger.info(f"Got away team on attempt {attempt + 1}")
return results
except ValueError as e:
logger.debug(f"Attempt {attempt + 1} failed: {e}")
continue
raise QueryExecutionError(
f"Could not generate away team with 5 players after {max_attempts} attempts. "
"Try lowering the difficulty."
)