Spaces:

Hatman
/

NBA-Fantasy-Game

Running

App Files Files Community

Hatmanstack commited on Feb 27

Commit

20852d6

1 Parent(s): 92a832f

Transition data layer to local CSV using pandas

Browse files

Files changed (5) hide show

snowflake_nba.csv +0 -0
src/database/connection.py +35 -76
src/database/queries.py +40 -51
tests/conftest.py +0 -16
tests/test_database.py +60 -163

snowflake_nba.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

src/database/connection.py CHANGED Viewed

@@ -1,112 +1,71 @@
-"""Database connection management with error handling."""
 import logging
 from collections.abc import Generator
 from contextlib import contextmanager
-from typing import Any
-import snowflake.connector
 import streamlit as st
-from snowflake.connector import SnowflakeConnection
-from snowflake.connector.errors import DatabaseError, ProgrammingError
 logger = logging.getLogger("streamlit_nba")
 class DatabaseConnectionError(Exception):
-    """Raised when database connection fails."""
     pass
 class QueryExecutionError(Exception):
-    """Raised when query execution fails."""
     pass
-@st.cache_resource
-def _get_connection_pool() -> SnowflakeConnection:
-    """Create and cache a Snowflake connection.
     Returns:
-        Cached Snowflake connection
     Raises:
-        DatabaseConnectionError: If connection cannot be established
     """
     try:
-        return snowflake.connector.connect(**st.secrets["snowflake"])
-    except DatabaseError as e:
-        logger.error(f"Failed to connect to database: {e}")
-        raise DatabaseConnectionError(f"Could not connect to database: {e}") from e
-    except KeyError as e:
-        logger.error("Snowflake credentials not found in secrets")
-        raise DatabaseConnectionError(
-            "Database credentials not configured. Please check st.secrets."
-        ) from e
 @contextmanager
-def get_connection() -> Generator[SnowflakeConnection, None, None]:
-    """Context manager for database connections with error handling.
     Yields:
-        Active Snowflake connection
     Raises:
-        DatabaseConnectionError: If connection fails
-    Example:
-        with get_connection() as conn:
-            # use connection
     """
     try:
-        conn = snowflake.connector.connect(**st.secrets["snowflake"])
-        yield conn
-    except DatabaseError as e:
-        logger.error(f"Database connection error: {e}")
-        raise DatabaseConnectionError(f"Database connection failed: {e}") from e
-    except KeyError as e:
-        logger.error("Snowflake credentials not found in secrets")
-        raise DatabaseConnectionError(
-            "Database credentials not configured. Please check st.secrets."
-        ) from e
     finally:
-        try:
-            conn.close()
-        except Exception:
-            pass  # Connection may already be closed
-def execute_query(
-    conn: SnowflakeConnection,
-    query: str,
-    params: tuple[Any, ...] | list[Any] | None = None,
-) -> list[tuple[Any, ...]]:
-    """Execute a parameterized query safely.
-    Args:
-        conn: Active database connection
-        query: SQL query with %s placeholders
-        params: Query parameters (optional)
-    Returns:
-        List of result tuples
-    Raises:
-        QueryExecutionError: If query execution fails
-    """
-    try:
-        with conn.cursor() as cur:
-            if params:
-                cur.execute(query, params)
-            else:
-                cur.execute(query)
-            return cur.fetchall()
-    except ProgrammingError as e:
-        logger.error(f"Query execution error: {e}")
-        raise QueryExecutionError(f"Query failed: {e}") from e
-    except DatabaseError as e:
-        logger.error(f"Database error during query: {e}")
-        raise QueryExecutionError(f"Database error: {e}") from e

+"""Local CSV data management with error handling."""
 import logging
 from collections.abc import Generator
 from contextlib import contextmanager
+from pathlib import Path
+import pandas as pd
 import streamlit as st
 logger = logging.getLogger("streamlit_nba")
+CSV_PATH = Path("snowflake_nba.csv")
 class DatabaseConnectionError(Exception):
+    """Raised when local data file cannot be found or loaded."""
     pass
 class QueryExecutionError(Exception):
+    """Raised when data query fails."""
     pass
+@st.cache_data
+def load_data() -> pd.DataFrame:
+    """Load and cache the local CSV data.
     Returns:
+        DataFrame containing player data
     Raises:
+        DatabaseConnectionError: If file cannot be loaded
     """
+    if not CSV_PATH.exists():
+        logger.error(f"Data file not found: {CSV_PATH}")
+        raise DatabaseConnectionError(f"Data file not found: {CSV_PATH}")
     try:
+        df = pd.read_csv(CSV_PATH)
+        # Ensure column names match expected Snowflake names (uppercase)
+        df.columns = [col.upper() for col in df.columns]
+        return df
+    except Exception as e:
+        logger.error(f"Failed to load CSV data: {e}")
+        raise DatabaseConnectionError(f"Could not load data from {CSV_PATH}: {e}") from e
 @contextmanager
+def get_connection() -> Generator[pd.DataFrame, None, None]:
+    """Context manager for local data access with error handling.
     Yields:
+        DataFrame with player data
     Raises:
+        DatabaseConnectionError: If data cannot be loaded
     """
     try:
+        yield load_data()
+    except DatabaseConnectionError as e:
+        logger.error(f"Data access error: {e}")
+        raise
+    except Exception as e:
+        logger.error(f"Unexpected error accessing data: {e}")
+        raise DatabaseConnectionError(f"Data access failed: {e}") from e
     finally:
+        pass

src/database/queries.py CHANGED Viewed

@@ -1,64 +1,61 @@
-"""Parameterized database queries for player data."""
 import logging
 from typing import Any
 import pandas as pd
-from snowflake.connector import SnowflakeConnection
 from src.config import MAX_QUERY_ATTEMPTS, PLAYER_COLUMNS
-from src.database.connection import QueryExecutionError, execute_query
 logger = logging.getLogger("streamlit_nba")
-def search_player_by_name(conn: SnowflakeConnection, name: str) -> list[tuple[str]]:
     """Search for players by name (first, last, or full name).
     Args:
-        conn: Active database connection
         name: Search term (case-insensitive)
     Returns:
         List of tuples containing matching full names
     """
     name_lower = name.lower().strip()
-    query = """
-        SELECT full_name FROM NBA
-        WHERE full_name_lower = %s
-           OR first_name_lower = %s
-           OR last_name_lower = %s
-    """
-    return execute_query(conn, query, (name_lower, name_lower, name_lower))
 def get_player_by_full_name(
-    conn: SnowflakeConnection, full_name: str
 ) -> tuple[Any, ...] | None:
     """Get a single player's full record by exact name match.
     Args:
-        conn: Active database connection
         full_name: Exact full name of player
     Returns:
         Player data tuple or None if not found
     """
-    query = "SELECT * FROM NBA WHERE FULL_NAME = %s"
-    results = execute_query(conn, query, (full_name,))
-    return results[0] if results else None
 def get_players_by_full_names(
-    conn: SnowflakeConnection, names: list[str]
 ) -> pd.DataFrame:
     """Get multiple players' records in a single batch query.
-    This fixes the N+1 query problem by using a single IN clause
-    instead of multiple individual queries.
     Args:
-        conn: Active database connection
         names: List of exact full names
     Returns:
@@ -67,16 +64,11 @@ def get_players_by_full_names(
     if not names:
         return pd.DataFrame(columns=PLAYER_COLUMNS)
-    # Build parameterized IN clause
-    placeholders = ", ".join(["%s"] * len(names))
-    query = f"SELECT * FROM NBA WHERE FULL_NAME IN ({placeholders})"
-    results = execute_query(conn, query, tuple(names))
-    return pd.DataFrame(results, columns=PLAYER_COLUMNS)
 def get_away_team_by_stats(
-    conn: SnowflakeConnection,
     pts_threshold: int,
     reb_threshold: int,
     ast_threshold: int,
@@ -85,11 +77,10 @@ def get_away_team_by_stats(
 ) -> pd.DataFrame:
     """Get a random away team based on stat thresholds.
-    Uses UNION with SAMPLE to get diverse players meeting stat criteria.
-    Includes a max_attempts guard to prevent infinite loops.
     Args:
-        conn: Active database connection
         pts_threshold: Minimum career points
         reb_threshold: Minimum career rebounds
         ast_threshold: Minimum career assists
@@ -100,28 +91,26 @@ def get_away_team_by_stats(
         DataFrame with 5 players
     Raises:
-        QueryExecutionError: If unable to get 5 players within max_attempts
-    """
-    query = """
-        SELECT * FROM (SELECT * FROM NBA WHERE PTS > %s) SAMPLE (2 ROWS)
-        UNION
-        SELECT * FROM (SELECT * FROM NBA WHERE REB > %s) SAMPLE (1 ROWS)
-        UNION
-        SELECT * FROM (SELECT * FROM NBA WHERE AST > %s) SAMPLE (1 ROWS)
-        UNION
-        SELECT * FROM (SELECT * FROM NBA WHERE STL > %s) SAMPLE (1 ROWS)
     """
-    params = (pts_threshold, reb_threshold, ast_threshold, stl_threshold)
     for attempt in range(max_attempts):
-        results = execute_query(conn, query, params)
-        if len(results) == 5:
-            logger.info(f"Got away team on attempt {attempt + 1}")
-            return pd.DataFrame(results, columns=PLAYER_COLUMNS)
-        logger.debug(f"Attempt {attempt + 1}: got {len(results)} players, need 5")
-    # Fallback: if we can't get exactly 5, raise an error
     raise QueryExecutionError(
         f"Could not generate away team with 5 players after {max_attempts} attempts. "
-        f"Last attempt returned {len(results)} players."
     )

+"""Local data queries using pandas on loaded CSV data."""
 import logging
 from typing import Any
 import pandas as pd
 from src.config import MAX_QUERY_ATTEMPTS, PLAYER_COLUMNS
+from src.database.connection import QueryExecutionError
 logger = logging.getLogger("streamlit_nba")
+def search_player_by_name(df: pd.DataFrame, name: str) -> list[tuple[str]]:
     """Search for players by name (first, last, or full name).
     Args:
+        df: Player DataFrame
         name: Search term (case-insensitive)
     Returns:
         List of tuples containing matching full names
     """
     name_lower = name.lower().strip()
+    mask = (
+        (df["FULL_NAME_LOWER"] == name_lower)
+        | (df["FIRST_NAME_LOWER"] == name_lower)
+        | (df["LAST_NAME_LOWER"] == name_lower)
+    )
+    results = df[mask]["FULL_NAME"].unique().tolist()
+    return [(name,) for name in results]
 def get_player_by_full_name(
+    df: pd.DataFrame, full_name: str
 ) -> tuple[Any, ...] | None:
     """Get a single player's full record by exact name match.
     Args:
+        df: Player DataFrame
         full_name: Exact full name of player
     Returns:
         Player data tuple or None if not found
     """
+    result = df[df["FULL_NAME"] == full_name]
+    if result.empty:
+        return None
+    return tuple(result.iloc[0].values)
 def get_players_by_full_names(
+    df: pd.DataFrame, names: list[str]
 ) -> pd.DataFrame:
     """Get multiple players' records in a single batch query.
     Args:
+        df: Player DataFrame
         names: List of exact full names
     Returns:
     if not names:
         return pd.DataFrame(columns=PLAYER_COLUMNS)
+    return df[df["FULL_NAME"].isin(names)]
 def get_away_team_by_stats(
+    df: pd.DataFrame,
     pts_threshold: int,
     reb_threshold: int,
     ast_threshold: int,
 ) -> pd.DataFrame:
     """Get a random away team based on stat thresholds.
+    Replicates Snowflake's SAMPLE and UNION logic using pandas.
     Args:
+        df: Player DataFrame
         pts_threshold: Minimum career points
         reb_threshold: Minimum career rebounds
         ast_threshold: Minimum career assists
         DataFrame with 5 players
     Raises:
+        RuntimeError: If unable to get 5 players within max_attempts
     """
     for attempt in range(max_attempts):
+        try:
+            df1 = df[df["PTS"] > pts_threshold].sample(n=2)
+            df2 = df[df["REB"] > reb_threshold].sample(n=1)
+            df3 = df[df["AST"] > ast_threshold].sample(n=1)
+            df4 = df[df["STL"] > stl_threshold].sample(n=1)
+            results = pd.concat([df1, df2, df3, df4]).drop_duplicates()
+            if len(results) == 5:
+                logger.info(f"Got away team on attempt {attempt + 1}")
+                return results
+        except ValueError:
+            # sample() can raise ValueError if n > population
+            logger.debug(f"Attempt {attempt + 1}: stat thresholds too restrictive")
+            continue
     raise QueryExecutionError(
         f"Could not generate away team with 5 players after {max_attempts} attempts. "
+        "Try lowering the difficulty."
     )

tests/conftest.py CHANGED Viewed

@@ -1,26 +1,10 @@
 """Pytest fixtures for NBA Streamlit application tests."""
 from typing import Any
-from unittest.mock import MagicMock
 import pandas as pd
 import pytest
-@pytest.fixture
-def mock_snowflake_connection() -> MagicMock:
-    """Create a mock Snowflake connection.
-    Returns:
-        Mock connection object that simulates Snowflake connection behavior
-    """
-    mock_conn = MagicMock()
-    mock_cursor = MagicMock()
-    mock_conn.cursor.return_value.__enter__ = MagicMock(return_value=mock_cursor)
-    mock_conn.cursor.return_value.__exit__ = MagicMock(return_value=False)
-    return mock_conn
 @pytest.fixture
 def sample_player_data() -> list[tuple[Any, ...]]:
     """Create sample player data matching database schema.

 """Pytest fixtures for NBA Streamlit application tests."""
 from typing import Any
 import pandas as pd
 import pytest
 @pytest.fixture
 def sample_player_data() -> list[tuple[Any, ...]]:
     """Create sample player data matching database schema.

tests/test_database.py CHANGED Viewed

@@ -1,6 +1,4 @@
-"""Tests for database module."""
-from unittest.mock import MagicMock
 import pandas as pd
 import pytest
@@ -17,130 +15,72 @@ from src.database.queries import (
 class TestSearchPlayerByName:
     """Tests for search_player_by_name function."""
-    def test_uses_parameterized_query(
-        self, mock_snowflake_connection: MagicMock
-    ) -> None:
-        """Verify parameterized queries are used (not string formatting)."""
-        mock_cursor = MagicMock()
-        mock_cursor.fetchall.return_value = [("LeBron James",)]
-        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
-            mock_cursor
-        )
-        search_player_by_name(mock_snowflake_connection, "james")
-        # Verify execute was called with params tuple, not string formatting
-        mock_cursor.execute.assert_called_once()
-        call_args = mock_cursor.execute.call_args
-        query = call_args[0][0]
-        params = call_args[0][1]
-        # Query should use %s placeholders
-        assert "%s" in query
-        # Should not contain the actual search term in the query string
-        assert "james" not in query.lower()
-        # Params should be a tuple with the search term
-        assert params == ("james", "james", "james")
-    def test_returns_list_of_tuples(
-        self, mock_snowflake_connection: MagicMock
-    ) -> None:
-        """Test that results are returned as list of tuples."""
-        mock_cursor = MagicMock()
-        mock_cursor.fetchall.return_value = [
-            ("LeBron James",),
-            ("James Harden",),
-        ]
-        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
-            mock_cursor
-        )
-        result = search_player_by_name(mock_snowflake_connection, "james")
-        assert result == [("LeBron James",), ("James Harden",)]
 class TestGetPlayersByFullNames:
     """Tests for get_players_by_full_names batch query."""
-    def test_single_query_for_multiple_names(
-        self, mock_snowflake_connection: MagicMock, sample_player_data: list
-    ) -> None:
-        """Verify batch query uses single IN clause instead of N queries."""
-        mock_cursor = MagicMock()
-        mock_cursor.fetchall.return_value = sample_player_data
-        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
-            mock_cursor
-        )
         names = ["LeBron James", "Michael Jordan"]
-        get_players_by_full_names(mock_snowflake_connection, names)
-        # Should only execute one query
-        assert mock_cursor.execute.call_count == 1
-        call_args = mock_cursor.execute.call_args
-        query = call_args[0][0]
-        params = call_args[0][1]
-        # Query should have IN clause with placeholders
-        assert "IN" in query.upper()
-        assert "%s" in query
-        # Params should be tuple of names
-        assert params == ("LeBron James", "Michael Jordan")
-    def test_returns_dataframe(
-        self, mock_snowflake_connection: MagicMock, sample_player_data: list
-    ) -> None:
-        """Test that results are returned as DataFrame."""
-        mock_cursor = MagicMock()
-        mock_cursor.fetchall.return_value = sample_player_data
-        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
-            mock_cursor
-        )
-        result = get_players_by_full_names(
-            mock_snowflake_connection, ["LeBron James", "Michael Jordan"]
-        )
         assert isinstance(result, pd.DataFrame)
-        assert list(result.columns) == PLAYER_COLUMNS
         assert len(result) == 2
-    def test_empty_names_returns_empty_dataframe(
-        self, mock_snowflake_connection: MagicMock
-    ) -> None:
-        """Test that empty input returns empty DataFrame without query."""
-        mock_cursor = MagicMock()
-        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
-            mock_cursor
-        )
-        result = get_players_by_full_names(mock_snowflake_connection, [])
         assert isinstance(result, pd.DataFrame)
         assert result.empty
-        # Should not execute any query
-        mock_cursor.execute.assert_not_called()
 class TestGetAwayTeamByStats:
-    """Tests for get_away_team_by_stats with max_attempts guard."""
-    def test_max_attempts_raises_error(
-        self, mock_snowflake_connection: MagicMock
-    ) -> None:
-        """Test that max_attempts limit prevents infinite loop."""
-        mock_cursor = MagicMock()
-        # Always return wrong number of players
-        mock_cursor.fetchall.return_value = [("Player1",), ("Player2",)]
-        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
-            mock_cursor
-        )
         with pytest.raises(QueryExecutionError) as exc_info:
             get_away_team_by_stats(
-                mock_snowflake_connection,
                 pts_threshold=1000,
                 reb_threshold=500,
                 ast_threshold=300,
@@ -149,28 +89,23 @@ class TestGetAwayTeamByStats:
             )
         assert "3 attempts" in str(exc_info.value)
-        assert mock_cursor.execute.call_count == 3
-    def test_success_on_first_try(
-        self, mock_snowflake_connection: MagicMock, sample_player_data: list
-    ) -> None:
-        """Test successful query on first attempt."""
-        mock_cursor = MagicMock()
-        # Return exactly 5 players
-        mock_cursor.fetchall.return_value = sample_player_data * 3  # 6 players
-        mock_cursor.fetchall.return_value = [
-            sample_player_data[0],
-            sample_player_data[1],
-            sample_player_data[0],
-            sample_player_data[1],
-            sample_player_data[0],
-        ]
-        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
-            mock_cursor
-        )
         result = get_away_team_by_stats(
-            mock_snowflake_connection,
             pts_threshold=1000,
             reb_threshold=500,
             ast_threshold=300,
@@ -179,41 +114,3 @@ class TestGetAwayTeamByStats:
         assert isinstance(result, pd.DataFrame)
         assert len(result) == 5
-        # Should only need one query
-        assert mock_cursor.execute.call_count == 1
-    def test_uses_parameterized_query(
-        self, mock_snowflake_connection: MagicMock, sample_player_data: list
-    ) -> None:
-        """Verify parameterized queries are used for stat thresholds."""
-        mock_cursor = MagicMock()
-        mock_cursor.fetchall.return_value = [
-            sample_player_data[0],
-            sample_player_data[1],
-            sample_player_data[0],
-            sample_player_data[1],
-            sample_player_data[0],
-        ]
-        mock_snowflake_connection.cursor.return_value.__enter__.return_value = (
-            mock_cursor
-        )
-        get_away_team_by_stats(
-            mock_snowflake_connection,
-            pts_threshold=1000,
-            reb_threshold=500,
-            ast_threshold=300,
-            stl_threshold=100,
-        )
-        call_args = mock_cursor.execute.call_args
-        query = call_args[0][0]
-        params = call_args[0][1]
-        # Query should use %s placeholders
-        assert "%s" in query
-        # Should not contain actual numbers in query
-        assert "1000" not in query
-        assert "500" not in query
-        # Params should be tuple of thresholds
-        assert params == (1000, 500, 300, 100)

+"""Tests for database module using local pandas data."""
 import pandas as pd
 import pytest
 class TestSearchPlayerByName:
     """Tests for search_player_by_name function."""
+    def test_search_by_full_name(self, sample_player_df: pd.DataFrame) -> None:
+        """Verify search finds player by full name."""
+        result = search_player_by_name(sample_player_df, "LeBron James")
+        assert result == [("LeBron James",)]
+    def test_search_by_first_name(self, sample_player_df: pd.DataFrame) -> None:
+        """Verify search finds player by first name."""
+        result = search_player_by_name(sample_player_df, "LeBron")
+        assert result == [("LeBron James",)]
+    def test_search_by_last_name(self, sample_player_df: pd.DataFrame) -> None:
+        """Verify search finds player by last name."""
+        result = search_player_by_name(sample_player_df, "Jordan")
+        assert result == [("Michael Jordan",)]
+    def test_search_case_insensitive(self, sample_player_df: pd.DataFrame) -> None:
+        """Verify search is case-insensitive."""
+        result = search_player_by_name(sample_player_df, "lebron")
+        assert result == [("LeBron James",)]
+    def test_returns_empty_on_no_match(self, sample_player_df: pd.DataFrame) -> None:
+        """Verify empty list returned when no player found."""
+        result = search_player_by_name(sample_player_df, "NonExistent Player")
+        assert result == []
 class TestGetPlayersByFullNames:
     """Tests for get_players_by_full_names batch query."""
+    def test_returns_correct_players(self, sample_player_df: pd.DataFrame) -> None:
+        """Verify correct players are returned in DataFrame."""
         names = ["LeBron James", "Michael Jordan"]
+        result = get_players_by_full_names(sample_player_df, names)
         assert isinstance(result, pd.DataFrame)
         assert len(result) == 2
+        assert set(result["FULL_NAME"]) == set(names)
+        assert list(result.columns) == PLAYER_COLUMNS
+    def test_empty_names_returns_empty_dataframe(self, sample_player_df: pd.DataFrame) -> None:
+        """Test that empty input returns empty DataFrame."""
+        result = get_players_by_full_names(sample_player_df, [])
         assert isinstance(result, pd.DataFrame)
         assert result.empty
+        assert list(result.columns) == PLAYER_COLUMNS
 class TestGetAwayTeamByStats:
+    """Tests for get_away_team_by_stats."""
+    def test_max_attempts_raises_error(self) -> None:
+        """Test that max_attempts limit works when population is too small."""
+        # Create a DF with only 2 players
+        df = pd.DataFrame([
+            {"FULL_NAME": "P1", "PTS": 1001, "REB": 501, "AST": 301, "STL": 101},
+            {"FULL_NAME": "P2", "PTS": 1001, "REB": 501, "AST": 301, "STL": 101},
+        ])
+        # Add missing columns to avoid errors if needed, though queries only use these
+        for col in PLAYER_COLUMNS:
+            if col not in df.columns:
+                df[col] = 0
         with pytest.raises(QueryExecutionError) as exc_info:
             get_away_team_by_stats(
+                df,
                 pts_threshold=1000,
                 reb_threshold=500,
                 ast_threshold=300,
             )
         assert "3 attempts" in str(exc_info.value)
+    def test_success_with_enough_players(self) -> None:
+        """Test successful generation with sufficient population."""
+        # Create a DF with 10 players meeting criteria
+        data = []
+        for i in range(10):
+            data.append({
+                "FULL_NAME": f"Player{i}",
+                "PTS": 2000, "REB": 1000, "AST": 500, "STL": 200
+            })
+        df = pd.DataFrame(data)
+        for col in PLAYER_COLUMNS:
+            if col not in df.columns:
+                df[col] = 0
         result = get_away_team_by_stats(
+            df,
             pts_threshold=1000,
             reb_threshold=500,
             ast_threshold=300,
         assert isinstance(result, pd.DataFrame)
         assert len(result) == 5