"""
Web scraper for finding college football games on nfl-video.com.

This module uses team-specific pages for efficient searching:
1. Builds an index of team names to their dedicated pages
2. Fetches games directly from the team's page
3. Optionally filters by opponent (team_b)
"""

import logging
import re
import time
from typing import Optional

import requests
from bs4 import BeautifulSoup

from .models import GameResult, SearchResults

logger = logging.getLogger(__name__)

# Base URL for college football section
BASE_URL = "https://nfl-video.com/cfb"

# Main page with team links in sidebar
MAIN_PAGE_URL = "https://nfl-video.com/cfb/ncaa_college_football_highlights_games_replay/ncaa_college_football_full_game_replays/2"

# User agent to avoid being blocked
HEADERS = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}

# Delay between page requests to be polite to the server
REQUEST_DELAY_SECONDS = 0.5

# Cache for team index (team name -> URL mapping)
_team_index_cache: Optional[dict[str, str]] = None  # pylint: disable=invalid-name


def _fetch_page(url: str) -> Optional[BeautifulSoup]:
    """
    Fetch a page and return its parsed HTML.

    Args:
        url: URL to fetch

    Returns:
        BeautifulSoup object or None if fetch failed
    """
    try:
        logger.debug("Fetching URL: %s", url)
        response = requests.get(url, headers=HEADERS, timeout=30)
        response.raise_for_status()
        return BeautifulSoup(response.text, "html.parser")
    except requests.RequestException as e:
        logger.error("Failed to fetch %s: %s", url, e)
        return None


def _build_team_index() -> dict[str, str]:
    """
    Build an index mapping team names to their dedicated page URLs.

    Scrapes the main page sidebar to find all team links.

    Returns:
        Dictionary mapping team names (lowercase) to full URLs
    """
    global _team_index_cache  # pylint: disable=global-statement

    if _team_index_cache is not None:
        logger.debug("Using cached team index with %d teams", len(_team_index_cache))
        return _team_index_cache

    logger.info("Building team index from main page...")
    soup = _fetch_page(MAIN_PAGE_URL)

    if soup is None:
        logger.error("Failed to fetch main page for team index")
        return {}

    team_index: dict[str, str] = {}

    for link in soup.find_all("a", href=True):
        href = link["href"]
        text = link.get_text(strip=True)

        # Team pages follow pattern like /cfb/sec_football/alabama_crimson_tide_football/59
        if "/cfb/" in href and "_football/" in href and text:
            # Skip main category links
            if "ncaa_college_football" not in href and "replay" not in href.lower():
                # Ensure full URL
                if not href.startswith("http"):
                    href = f"https://nfl-video.com{href}"

                # Store with lowercase key for case-insensitive lookup
                team_key = text.lower().strip()
                if team_key and team_key not in team_index:
                    team_index[team_key] = href
                    logger.debug("Indexed team: %s -> %s", text, href)

    logger.info("Built team index with %d teams", len(team_index))
    _team_index_cache = team_index
    return team_index


def get_team_index() -> dict[str, str]:
    """
    Get the team index (public API for debugging/listing teams).

    Returns:
        Dictionary mapping team names to their page URLs
    """
    return _build_team_index()


def clear_team_index_cache() -> None:
    """Clear the cached team index (useful for testing)."""
    global _team_index_cache  # pylint: disable=global-statement
    _team_index_cache = None
    logger.debug("Cleared team index cache")


def _find_team_url(team_name: str) -> Optional[str]:
    """
    Find the URL for a team's dedicated page.

    Performs fuzzy matching to handle variations like:
    - "Ohio State" matches "ohio state"
    - "OSU" might not match (would need alias support)
    - "Texas A&M" matches "texas a&m"

    Args:
        team_name: Team name to search for

    Returns:
        Team page URL or None if not found
    """
    team_index = _build_team_index()
    team_lower = team_name.lower().strip()

    # Exact match first
    if team_lower in team_index:
        return team_index[team_lower]

    # Partial match - check if search term is contained in any team name
    for indexed_name, url in team_index.items():
        if team_lower in indexed_name or indexed_name in team_lower:
            logger.debug("Fuzzy matched '%s' to '%s'", team_name, indexed_name)
            return url

    logger.warning("Team not found in index: %s", team_name)
    return None


def _parse_game_title(title: str) -> dict:
    """
    Parse a game title to extract team names, year, and event.

    Example titles:
        "Ohio State vs Oregon Football 2024 Big Ten Championship Full Game Replay"
        "Alabama vs. Oklahoma Football December 19, 2025 CFP First Round Full Game Replay"

    Args:
        title: Full game title string

    Returns:
        Dictionary with keys: team_a, team_b, year, event (some may be None)
    """
    result = {"team_a": None, "team_b": None, "year": None, "event": None}

    # Clean up the title
    title = title.strip()

    # Pattern: "Team A vs Team B Football [Date] [Event] Full Game Replay"
    # Handle both "vs" and "vs." (with period)
    vs_match = re.match(r"^(.+?)\s+vs\.?\s+(.+?)\s+Football\s+", title, re.IGNORECASE)
    if vs_match:
        result["team_a"] = vs_match.group(1).strip()
        result["team_b"] = vs_match.group(2).strip()

    # Extract year (4-digit number)
    year_match = re.search(r"\b(20\d{2})\b", title)
    if year_match:
        result["year"] = int(year_match.group(1))

    # Extract event - everything between the date and "Full Game Replay"
    # Format: "Football [Month Day, Year] [Event] Full Game Replay"
    event_match = re.search(r"Football\s+\w+\s+\d{1,2},?\s+20\d{2}\s+(.+?)\s+Full Game Replay", title, re.IGNORECASE)
    if event_match:
        event = event_match.group(1).strip()
        if event and event.lower() not in ["full", "game", "replay"]:
            result["event"] = event

    return result


def _extract_games_from_page(soup: BeautifulSoup, filter_team: Optional[str] = None) -> list[GameResult]:
    """
    Extract game listings from a parsed page.

    Args:
        soup: BeautifulSoup object of the page
        filter_team: Optional team name to filter for (for team-specific pages that may include unrelated games)

    Returns:
        List of GameResult objects found on the page
    """
    games = []

    for link in soup.find_all("a", href=True):
        text = link.get_text(strip=True)

        # Skip links that don't look like game titles
        if "Full Game Replay" not in text:
            continue

        # Skip if it doesn't have "vs" or "vs." (not a game matchup)
        text_lower = text.lower()
        if " vs " not in text_lower and " vs. " not in text_lower:
            continue

        href = link["href"]

        # Make sure it's a full URL
        if not href.startswith("http"):
            href = f"https://nfl-video.com{href}"

        # Skip if we've already seen this URL (duplicates on page)
        if any(g.url == href for g in games):
            continue

        # Parse the title for metadata
        parsed = _parse_game_title(text)

        if parsed["team_a"] and parsed["team_b"]:
            # If filter_team specified, skip games that don't involve that team
            if filter_team:
                filter_lower = filter_team.lower()
                if filter_lower not in parsed["team_a"].lower() and filter_lower not in parsed["team_b"].lower():
                    continue

            # Try to find thumbnail
            thumbnail_url = None
            parent = link.find_parent()
            if parent:
                img = parent.find("img")
                if img and img.get("src"):
                    thumbnail_url = img["src"]
                    if not thumbnail_url.startswith("http"):
                        thumbnail_url = f"https://nfl-video.com{thumbnail_url}"

            game = GameResult(
                title=text,
                team_a=parsed["team_a"],
                team_b=parsed["team_b"],
                url=href,
                thumbnail_url=thumbnail_url,
                year=parsed["year"],
                event=parsed["event"],
            )
            games.append(game)
            logger.debug("Found game: %s", game)

    return games


def _team_matches(game: GameResult, team_name: str) -> bool:
    """
    Check if a game involves a given team.

    Args:
        game: GameResult to check
        team_name: Team name to search for

    Returns:
        True if the team appears in either team_a or team_b
    """
    team_lower = team_name.lower()
    return team_lower in game.team_a.lower() or team_lower in game.team_b.lower()


def search_games(
    team_a: str,
    team_b: Optional[str] = None,
    max_pages: int = 5,
    delay_seconds: float = REQUEST_DELAY_SECONDS,
) -> SearchResults:
    """
    Search for college football games by team name(s).

    Uses team-specific pages for efficient searching:
    1. Looks up team_a's dedicated page in the team index
    2. Fetches all games from that team's page
    3. If team_b specified, filters to only games against that opponent

    Args:
        team_a: Primary team name to search for (required)
        team_b: Optional opponent name - if provided, only games against this team are returned
        max_pages: Maximum number of pages to search on the team's page (default 5)
        delay_seconds: Delay between page requests to avoid rate limiting

    Returns:
        SearchResults object containing matching games and search metadata

    Example:
        # Find all Alabama games
        results = search_games("Alabama")

        # Find Alabama vs Georgia specifically
        results = search_games("Alabama", "Georgia")
    """
    logger.info("Searching for games: team_a='%s', team_b='%s'", team_a, team_b)

    # Find the team's dedicated page URL
    team_url = _find_team_url(team_a)

    if team_url is None:
        logger.warning("Could not find team page for '%s', returning empty results", team_a)
        return SearchResults(
            query_team_a=team_a,
            query_team_b=team_b,
            games=[],
            pages_searched=0,
            total_games_scanned=0,
        )

    logger.info("Found team page for '%s': %s", team_a, team_url)

    matching_games: list[GameResult] = []
    total_scanned = 0

    # Fetch pages from the team's dedicated page
    for page_num in range(1, max_pages + 1):
        # Team pages use same pagination pattern as main: base URL, then base-2, base-3, etc.
        if page_num == 1:
            page_url = team_url
        else:
            page_url = f"{team_url}-{page_num}"

        logger.info("Searching page %d/%d: %s", page_num, max_pages, page_url)

        soup = _fetch_page(page_url)
        if soup is None:
            logger.warning("Failed to fetch page %d, stopping search", page_num)
            break

        # Extract games, filtering for the team (team pages sometimes include unrelated sidebar games)
        page_games = _extract_games_from_page(soup, filter_team=team_a)
        total_scanned += len(page_games)

        # Filter by opponent if specified
        for game in page_games:
            # If team_b specified, must also match team_b
            if team_b and not _team_matches(game, team_b):
                continue

            # Avoid duplicates (same URL)
            if not any(g.url == game.url for g in matching_games):
                matching_games.append(game)
                logger.info("Found matching game: %s", game)

        # Be polite - add delay between requests
        if page_num < max_pages:
            time.sleep(delay_seconds)

    results = SearchResults(
        query_team_a=team_a,
        query_team_b=team_b,
        games=matching_games,
        pages_searched=max_pages,
        total_games_scanned=total_scanned,
    )

    logger.info("Search complete: %s", results)
    return results


def list_available_teams() -> list[str]:
    """
    Get a list of all available team names.

    Useful for populating dropdown menus or autocomplete in the UI.

    Returns:
        Sorted list of team names
    """
    team_index = _build_team_index()
    # Return with original casing (capitalize each word)
    return sorted([name.title() for name in team_index])