Spaces:
Sleeping
Sleeping
File size: 4,807 Bytes
20852d6 6424951 20852d6 6424951 20852d6 6424951 20852d6 6424951 20852d6 efaa2dc 20852d6 efaa2dc 6424951 20852d6 6424951 20852d6 6424951 20852d6 6424951 20852d6 6424951 20852d6 6424951 20852d6 6424951 20852d6 6424951 cdd29f8 6424951 20852d6 6424951 7b3455b 6424951 cdd29f8 6424951 20852d6 cdd29f8 5c113ef cdd29f8 5c113ef cdd29f8 5c113ef cdd29f8 5c113ef cdd29f8 5c113ef cdd29f8 5c113ef cdd29f8 20852d6 5c113ef cdd29f8 20852d6 6424951 20852d6 6424951 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | """Local data queries using pandas on loaded CSV data."""
import logging
from typing import Any
import pandas as pd
from src.config import MAX_QUERY_ATTEMPTS, PLAYER_COLUMNS
from src.database.connection import QueryExecutionError
logger = logging.getLogger("streamlit_nba")
def search_player_by_name(df: pd.DataFrame, name: str) -> list[tuple[str]]:
"""Search for players by name (first, last, or full name).
Args:
df: Player DataFrame
name: Search term (case-insensitive)
Returns:
List of tuples containing matching full names
"""
name_lower = name.lower().strip()
mask = (
df["FULL_NAME_LOWER"].str.contains(name_lower, case=False, na=False)
| df["FIRST_NAME_LOWER"].str.contains(name_lower, case=False, na=False)
| df["LAST_NAME_LOWER"].str.contains(name_lower, case=False, na=False)
)
results = df[mask]["FULL_NAME"].unique().tolist()
return [(player_name,) for player_name in results]
def get_player_by_full_name(
df: pd.DataFrame, full_name: str
) -> tuple[Any, ...] | None:
"""Get a single player's full record by exact name match.
Args:
df: Player DataFrame
full_name: Exact full name of player
Returns:
Player data tuple or None if not found
"""
result = df[df["FULL_NAME"] == full_name]
if result.empty:
return None
return tuple(result.iloc[0].values)
def get_players_by_full_names(
df: pd.DataFrame, names: list[str]
) -> pd.DataFrame:
"""Get multiple players' records in a single batch query.
Args:
df: Player DataFrame
names: List of exact full names
Returns:
DataFrame with player data
"""
if not names:
return pd.DataFrame(columns=PLAYER_COLUMNS)
return df[df["FULL_NAME"].isin(names)]
def get_away_team_by_stats(
df: pd.DataFrame,
pts_threshold: int,
reb_threshold: int,
ast_threshold: int,
stl_threshold: int,
max_attempts: int = MAX_QUERY_ATTEMPTS,
) -> pd.DataFrame:
"""Get a random away team based on stat thresholds.
Ensures 5 unique players are selected who meet various stat criteria.
Args:
df: Player DataFrame
pts_threshold: Minimum career points
reb_threshold: Minimum career rebounds
ast_threshold: Minimum career assists
stl_threshold: Minimum career steals
max_attempts: Maximum query attempts before raising error
Returns:
DataFrame with 5 players
Raises:
QueryExecutionError: If unable to get 5 players within max_attempts
"""
# Pre-filter pools to improve performance and reliability
pool_pts = df[df["PTS"] > pts_threshold]
pool_reb = df[df["REB"] > reb_threshold]
pool_ast = df[df["AST"] > ast_threshold]
pool_stl = df[df["STL"] > stl_threshold]
for attempt in range(max_attempts):
try:
# We need 5 unique players. Strategy:
# 1. Pick 2 from PTS
# 2. Pick 1 from REB (not in PTS)
# 3. Pick 1 from AST (not in PTS or REB)
# 4. Pick 1 from STL (not in PTS, REB, or AST)
selected_indices: set[int] = set()
# Step 1: PTS (2 players)
if len(pool_pts) < 2:
raise ValueError("PTS pool too small")
p12 = pool_pts.sample(n=2, replace=False)
selected_indices.update(p12.index.tolist())
# Step 2: REB (1 player)
remaining_reb = pool_reb[~pool_reb.index.isin(selected_indices)]
if remaining_reb.empty:
raise ValueError("REB pool exhausted")
p3 = remaining_reb.sample(n=1)
selected_indices.update(p3.index.tolist())
# Step 3: AST (1 player)
remaining_ast = pool_ast[~pool_ast.index.isin(selected_indices)]
if remaining_ast.empty:
raise ValueError("AST pool exhausted")
p4 = remaining_ast.sample(n=1)
selected_indices.update(p4.index.tolist())
# Step 4: STL (1 player)
remaining_stl = pool_stl[~pool_stl.index.isin(selected_indices)]
if remaining_stl.empty:
raise ValueError("STL pool exhausted")
p5 = remaining_stl.sample(n=1)
selected_indices.update(p5.index.tolist())
results = df.loc[list(selected_indices)]
if len(results) == 5:
logger.info(f"Got away team on attempt {attempt + 1}")
return results
except ValueError as e:
logger.debug(f"Attempt {attempt + 1} failed: {e}")
continue
raise QueryExecutionError(
f"Could not generate away team with 5 players after {max_attempts} attempts. "
"Try lowering the difficulty."
)
|