Spaces:

andytaylor-smg
/

cfb40

Sleeping

App Files Files Community

andytaylor-smg commited on Jan 28

Commit

4f9adc4

1 Parent(s): deb89c9

simple request

Browse files

Files changed (6) hide show

pyproject.toml +3 -0
scripts/test_source_finding.py +213 -0
src/source_finding/__init__.py +33 -0
src/source_finding/downloader.py +245 -0
src/source_finding/models.py +80 -0
src/source_finding/scraper.py +267 -0

pyproject.toml CHANGED Viewed

@@ -12,6 +12,9 @@ dependencies = [
     "pillow>=10.0.0",
     "pydantic>=2.0.0",
     "pytesseract>=0.3.10",
 ]
 [dependency-groups]

     "pillow>=10.0.0",
     "pydantic>=2.0.0",
     "pytesseract>=0.3.10",
+    "requests>=2.31.0",
+    "beautifulsoup4>=4.12.0",
+    "yt-dlp>=2024.1.0",
 ]
 [dependency-groups]

scripts/test_source_finding.py ADDED Viewed

	@@ -0,0 +1,213 @@

+#!/usr/bin/env python3
+"""
+Test script for the source_finding module.
+This script demonstrates and validates the functionality of:
+1. Searching for games by team name(s)
+2. Generating download commands
+3. Extracting direct video URLs (if yt-dlp is available)
+Usage:
+    # Activate virtual environment first
+    source .venv/bin/activate
+    # Run with default test (Ohio State vs Oregon)
+    python scripts/test_source_finding.py
+    # Search for a specific team
+    python scripts/test_source_finding.py --team-a "Tennessee"
+    # Search for a specific matchup
+    python scripts/test_source_finding.py --team-a "Ohio State" --team-b "Oregon"
+    # Search more pages (default is 3)
+    python scripts/test_source_finding.py --team-a "Michigan" --max-pages 10
+    # Test URL extraction on a specific result
+    python scripts/test_source_finding.py --team-a "Ohio State" --test-download
+"""
+import argparse
+import logging
+import sys
+from pathlib import Path
+# Add src to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+from source_finding import search_games, get_download_command, extract_direct_video_url, get_suggested_filename
+from source_finding.downloader import is_ytdlp_available, get_video_info
+def setup_logging(verbose: bool = False) -> None:
+    """Configure logging for the test script."""
+    level = logging.DEBUG if verbose else logging.INFO
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        datefmt="%H:%M:%S",
+    )
+def print_separator(title: str = "") -> None:
+    """Print a visual separator with optional title."""
+    width = 70
+    if title:
+        print(f"\n{'=' * width}")
+        print(f"  {title}")
+        print(f"{'=' * width}")
+    else:
+        print(f"\n{'-' * width}")
+def test_search(team_a: str, team_b: str | None, max_pages: int) -> None:
+    """Test game search functionality."""
+    print_separator(f"Searching for: {team_a}" + (f" vs {team_b}" if team_b else ""))
+    print(f"\nParameters:")
+    print(f"  team_a: {team_a}")
+    print(f"  team_b: {team_b or '(any)'}")
+    print(f"  max_pages: {max_pages}")
+    # Perform the search
+    results = search_games(team_a=team_a, team_b=team_b, max_pages=max_pages)
+    print(f"\nSearch Results:")
+    print(f"  Pages searched: {results.pages_searched}")
+    print(f"  Total games scanned: {results.total_games_scanned}")
+    print(f"  Matching games found: {len(results.games)}")
+    if not results.games:
+        print("\n  No matching games found!")
+        return results
+    print_separator("Found Games")
+    for i, game in enumerate(results.games, 1):
+        print(f"\n[{i}] {game.title}")
+        print(f"    Teams: {game.team_a} vs {game.team_b}")
+        if game.event:
+            print(f"    Event: {game.event}")
+        if game.year:
+            print(f"    Year: {game.year}")
+        if game.date_str:
+            print(f"    Date: {game.date_str}")
+        print(f"    URL: {game.url}")
+        if game.thumbnail_url:
+            print(f"    Thumbnail: {game.thumbnail_url[:60]}...")
+    return results
+def test_download_command(game) -> None:
+    """Test download command generation."""
+    print_separator("Download Command Generation")
+    # Generate download command
+    cmd = get_download_command(game, output_dir="~/Downloads")
+    filename = get_suggested_filename(game)
+    print(f"\nGame: {game}")
+    print(f"Suggested filename: {filename}")
+    print(f"\nDownload command:")
+    print(f"  {cmd}")
+def test_url_extraction(game) -> None:
+    """Test direct URL extraction (requires yt-dlp)."""
+    print_separator("Direct URL Extraction")
+    if not is_ytdlp_available():
+        print("\n  yt-dlp is not available. Install with: pip install yt-dlp")
+        print("  Skipping URL extraction test.")
+        return
+    print(f"\nExtracting direct URL for: {game.title}")
+    print("  (This may take 10-30 seconds...)")
+    url = extract_direct_video_url(game, timeout_seconds=60)
+    if url:
+        print(f"\n  SUCCESS! Direct URL extracted:")
+        # Truncate URL for display if very long
+        if len(url) > 100:
+            print(f"  {url[:100]}...")
+        else:
+            print(f"  {url}")
+        print("\n  This URL can be used for browser-direct download.")
+        print("  NOTE: URL may expire after a few minutes!")
+    else:
+        print("\n  FAILED to extract direct URL.")
+        print("  The video may use a hosting service not supported by yt-dlp,")
+        print("  or there may be extraction issues.")
+def test_video_info(game) -> None:
+    """Test video info extraction (requires yt-dlp)."""
+    print_separator("Video Info Extraction")
+    if not is_ytdlp_available():
+        print("\n  yt-dlp is not available. Skipping.")
+        return
+    print(f"\nGetting video info for: {game.title}")
+    print("  (This may take a few seconds...)")
+    info = get_video_info(game, timeout_seconds=30)
+    if info:
+        print("\n  Video metadata:")
+        print(f"    Title: {info.get('title', 'N/A')}")
+        print(f"    Duration: {info.get('duration', 'N/A')} seconds")
+        filesize = info.get("filesize") or info.get("filesize_approx")
+        if filesize:
+            print(f"    File size: {filesize / (1024*1024*1024):.2f} GB")
+        print(f"    Format: {info.get('format', 'N/A')}")
+        print(f"    Resolution: {info.get('resolution', 'N/A')}")
+    else:
+        print("\n  FAILED to get video info.")
+def main():
+    parser = argparse.ArgumentParser(description="Test the source_finding module")
+    parser.add_argument("--team-a", default="Ohio State", help="Primary team to search for")
+    parser.add_argument("--team-b", default=None, help="Optional second team for specific matchup")
+    parser.add_argument("--max-pages", type=int, default=3, help="Maximum pages to search")
+    parser.add_argument("--test-download", action="store_true", help="Test URL extraction on first result")
+    parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
+    args = parser.parse_args()
+    setup_logging(args.verbose)
+    print("\n" + "=" * 70)
+    print("  SOURCE FINDING MODULE TEST")
+    print("=" * 70)
+    # Check yt-dlp availability
+    print(f"\nyt-dlp available: {is_ytdlp_available()}")
+    # Run search test
+    results = test_search(args.team_a, args.team_b, args.max_pages)
+    if results and results.games:
+        # Test with the first result
+        first_game = results.games[0]
+        # Always test download command generation
+        test_download_command(first_game)
+        # Optionally test URL extraction
+        if args.test_download:
+            test_url_extraction(first_game)
+            test_video_info(first_game)
+    print_separator("Test Complete")
+    print("\nTo test URL extraction, run with --test-download flag:")
+    print(f'  python scripts/test_source_finding.py --team-a "{args.team_a}" --test-download')
+if __name__ == "__main__":
+    main()

src/source_finding/__init__.py ADDED Viewed

	@@ -0,0 +1,33 @@

+"""
+Source finding module for discovering and downloading college football game videos.
+This module provides functionality to:
+1. Search nfl-video.com for college football games by team name(s)
+2. Return matching games with metadata (title, teams, date, event)
+3. Generate download commands or extract direct video URLs for downloading
+Example usage:
+    from source_finding import search_games, get_download_command, extract_direct_video_url
+    # Search for games
+    results = search_games(team_a="Ohio State", team_b="Oregon", max_pages=3)
+    # Get download command for offline use
+    cmd = get_download_command(results.games[0])
+    # Or extract direct URL for browser download
+    url = extract_direct_video_url(results.games[0])
+"""
+from .models import GameResult, SearchResults
+from .scraper import search_games
+from .downloader import get_download_command, extract_direct_video_url, get_suggested_filename
+__all__ = [
+    "GameResult",
+    "SearchResults",
+    "search_games",
+    "get_download_command",
+    "extract_direct_video_url",
+    "get_suggested_filename",
+]

src/source_finding/downloader.py ADDED Viewed

	@@ -0,0 +1,245 @@

+"""
+Video download utilities for college football games.
+This module provides multiple strategies for downloading videos:
+1. Generate yt-dlp commands for users to run locally (offline mode)
+2. Extract direct video URLs for browser-direct downloads (preferred for apps)
+3. Stream video through a proxy (fallback, uses server bandwidth)
+"""
+import logging
+import shlex
+import subprocess
+from pathlib import Path
+from typing import Generator, Optional
+from .models import GameResult
+logger = logging.getLogger(__name__)
+def get_suggested_filename(game: GameResult, extension: str = "mp4") -> str:
+    """
+    Generate a suggested filename for a game video.
+    Args:
+        game: GameResult object
+        extension: File extension (default "mp4")
+    Returns:
+        Safe filename string like "Ohio_State_vs_Oregon_2024.mp4"
+    """
+    return f"{game.get_filename_base()}.{extension}"
+def get_download_command(game: GameResult, output_dir: str = ".", output_filename: Optional[str] = None) -> str:
+    """
+    Generate a yt-dlp command string for downloading a game video.
+    This is the "offline mode" - returns a command users can copy and run locally.
+    Args:
+        game: GameResult object containing the game URL
+        output_dir: Directory to save the video (default current directory)
+        output_filename: Optional custom filename; if None, generates from game metadata
+    Returns:
+        A complete yt-dlp command string ready to run in a terminal
+    Example:
+        >>> cmd = get_download_command(game, output_dir="~/Downloads")
+        >>> print(cmd)
+        yt-dlp "https://nfl-video.com/ohio-state-vs-oregon..." -o "~/Downloads/Ohio_State_vs_Oregon_2024.mp4"
+    """
+    if output_filename is None:
+        output_filename = get_suggested_filename(game)
+    # Build the output path
+    output_path = Path(output_dir) / output_filename
+    # Quote the URL and path for shell safety
+    quoted_url = shlex.quote(game.url)
+    quoted_output = shlex.quote(str(output_path))
+    # Build the command
+    # Using yt-dlp with common options for best compatibility
+    command = f"yt-dlp {quoted_url} -o {quoted_output}"
+    return command
+def extract_direct_video_url(game: GameResult, timeout_seconds: int = 60) -> Optional[str]:
+    """
+    Extract the direct video URL from a game page using yt-dlp.
+    This is the preferred method for in-app downloads - extracts the actual video
+    URL from the hosting service (mixdrop, streamtape, etc.) so the browser can
+    download directly without going through your server.
+    IMPORTANT: The extracted URL may expire after a few minutes. Call this
+    function on-demand when the user clicks "Download", not when displaying results.
+    Args:
+        game: GameResult object containing the game page URL
+        timeout_seconds: Maximum time to wait for URL extraction
+    Returns:
+        Direct video URL string, or None if extraction failed
+    Example:
+        >>> url = extract_direct_video_url(game)
+        >>> # In Shiny: redirect browser to this URL for download
+        >>> print(url)
+        'https://s1.mixdropjp.pw/v/abc123def456.mp4?...'
+    """
+    try:
+        logger.info("Extracting direct video URL for: %s", game.url)
+        # Use yt-dlp with --get-url to extract the direct video URL
+        # --no-warnings to suppress non-critical warnings
+        # -f best to get the best quality single file
+        result = subprocess.run(
+            ["yt-dlp", "--get-url", "--no-warnings", "-f", "best", game.url],
+            capture_output=True,
+            text=True,
+            timeout=timeout_seconds,
+            check=False,  # We handle return codes manually
+        )
+        if result.returncode != 0:
+            logger.error("yt-dlp failed with code %d: %s", result.returncode, result.stderr)
+            return None
+        # The URL is in stdout
+        url = result.stdout.strip()
+        if not url:
+            logger.error("yt-dlp returned empty URL")
+            return None
+        # Sometimes yt-dlp returns multiple URLs (for video/audio streams)
+        # Take the first one which is typically the video
+        if "\n" in url:
+            urls = url.split("\n")
+            url = urls[0]
+            logger.debug("Multiple URLs returned, using first: %s", url[:100])
+        logger.info("Extracted direct URL: %s...", url[:100] if len(url) > 100 else url)
+        return url
+    except subprocess.TimeoutExpired:
+        logger.error("yt-dlp timed out after %d seconds", timeout_seconds)
+        return None
+    except FileNotFoundError:
+        logger.error("yt-dlp not found. Please install it: pip install yt-dlp")
+        return None
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        logger.error("Failed to extract video URL: %s", e)
+        return None
+def is_ytdlp_available() -> bool:
+    """
+    Check if yt-dlp is available on the system.
+    Returns:
+        True if yt-dlp is installed and accessible
+    """
+    try:
+        result = subprocess.run(["yt-dlp", "--version"], capture_output=True, text=True, timeout=5, check=False)
+        return result.returncode == 0
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        return False
+def stream_video_proxy(game: GameResult, chunk_size: int = 8192) -> Generator[bytes, None, None]:
+    """
+    Stream video content in chunks (fallback method).
+    This is Option B from the architecture - streams video through your server.
+    Use this only if extract_direct_video_url() doesn't work reliably (e.g., URLs expire too fast).
+    Memory usage is constant regardless of video size (~chunk_size bytes).
+    NOTE: This is a stub implementation. For production use, you would:
+    1. First extract the direct URL using yt-dlp
+    2. Stream from that URL using requests with stream=True
+    Args:
+        game: GameResult object
+        chunk_size: Size of chunks to yield (default 8KB)
+    Yields:
+        Bytes chunks of the video file
+    Example:
+        >>> for chunk in stream_video_proxy(game):
+        ...     response.write(chunk)  # In a web framework
+    """
+    # First, get the direct video URL
+    direct_url = extract_direct_video_url(game)
+    if direct_url is None:
+        logger.error("Cannot stream - failed to extract direct URL")
+        return
+    try:
+        import requests  # pylint: disable=import-outside-toplevel
+        logger.info("Starting streaming proxy for: %s", game.title)
+        # Stream the video content
+        with requests.get(direct_url, stream=True, timeout=30) as response:
+            response.raise_for_status()
+            # Yield chunks as they arrive
+            for chunk in response.iter_content(chunk_size=chunk_size):
+                if chunk:  # Filter out keep-alive chunks
+                    yield chunk
+        logger.info("Streaming complete for: %s", game.title)
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        logger.error("Streaming failed: %s", e)
+        return
+def get_video_info(game: GameResult, timeout_seconds: int = 30) -> Optional[dict]:
+    """
+    Get video metadata without downloading.
+    Useful for showing file size, duration, quality options to users before download.
+    Args:
+        game: GameResult object
+        timeout_seconds: Maximum time to wait
+    Returns:
+        Dictionary with video info (title, duration, filesize, etc.) or None if failed
+    """
+    try:
+        import json  # pylint: disable=import-outside-toplevel
+        logger.info("Getting video info for: %s", game.url)
+        result = subprocess.run(
+            ["yt-dlp", "--dump-json", "--no-download", "--no-warnings", game.url],
+            capture_output=True,
+            text=True,
+            timeout=timeout_seconds,
+            check=False,  # We handle return codes manually
+        )
+        if result.returncode != 0:
+            logger.error("yt-dlp info extraction failed: %s", result.stderr)
+            return None
+        info = json.loads(result.stdout)
+        return info
+    except subprocess.TimeoutExpired:
+        logger.error("yt-dlp info timed out after %d seconds", timeout_seconds)
+        return None
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        logger.error("Failed to get video info: %s", e)
+        return None

src/source_finding/models.py ADDED Viewed

	@@ -0,0 +1,80 @@

+"""
+Pydantic models for the source finding module.
+Contains data models for representing game search results and metadata.
+"""
+from typing import Optional
+from pydantic import BaseModel, Field
+class GameResult(BaseModel):
+    """
+    Represents a single college football game found on nfl-video.com.
+    Attributes:
+        title: Full title from the website, e.g. "Ohio State vs Oregon Football 2024 Big Ten Championship Full Game Replay"
+        team_a: First team name parsed from the title (appears before "vs")
+        team_b: Second team name parsed from the title (appears after "vs")
+        url: Full URL to the game's page on nfl-video.com
+        thumbnail_url: URL to the game's thumbnail image, if available
+        date_str: Date string parsed from the page, e.g. "January 8, 2024"
+        event: Event name parsed from the title, e.g. "CFP National Championship", "Big Ten Championship"
+        year: Year of the game, parsed from the title
+    """
+    title: str = Field(..., description="Full title of the game from the website")
+    team_a: str = Field(..., description="First team name (before 'vs')")
+    team_b: str = Field(..., description="Second team name (after 'vs')")
+    url: str = Field(..., description="Full URL to the game page")
+    thumbnail_url: Optional[str] = Field(default=None, description="URL to thumbnail image")
+    date_str: Optional[str] = Field(default=None, description="Date string, e.g. 'January 8, 2024'")
+    event: Optional[str] = Field(default=None, description="Event name, e.g. 'CFP National Championship'")
+    year: Optional[int] = Field(default=None, description="Year of the game")
+    def __str__(self) -> str:
+        """Human-readable representation of the game."""
+        event_str = f" ({self.event})" if self.event else ""
+        date_str = f" - {self.date_str}" if self.date_str else ""
+        return f"{self.team_a} vs {self.team_b}{event_str}{date_str}"
+    def get_filename_base(self) -> str:
+        """
+        Generate a safe filename base for this game (without extension).
+        Returns:
+            A filename-safe string like "Ohio_State_vs_Oregon_2024"
+        """
+        # Replace spaces with underscores and remove special characters
+        # pylint: disable=no-member  # False positive: Pydantic fields are strings at runtime
+        team_a_safe = self.team_a.replace(" ", "_").replace("(", "").replace(")", "")
+        team_b_safe = self.team_b.replace(" ", "_").replace("(", "").replace(")", "")
+        year_str = f"_{self.year}" if self.year else ""
+        return f"{team_a_safe}_vs_{team_b_safe}{year_str}"
+class SearchResults(BaseModel):
+    """
+    Results from searching for games on nfl-video.com.
+    Attributes:
+        query_team_a: The team name that was searched for (required)
+        query_team_b: Optional second team name that was searched for
+        games: List of matching GameResult objects
+        pages_searched: Number of pages that were searched
+        total_games_scanned: Total number of games scanned across all pages
+    """
+    query_team_a: str = Field(..., description="Primary team name searched for")
+    query_team_b: Optional[str] = Field(default=None, description="Optional second team name searched for")
+    games: list[GameResult] = Field(default_factory=list, description="List of matching games found")
+    pages_searched: int = Field(default=0, description="Number of pages searched")
+    total_games_scanned: int = Field(default=0, description="Total games scanned across all pages")
+    def __str__(self) -> str:
+        """Human-readable summary of search results."""
+        team_str = self.query_team_a
+        if self.query_team_b:
+            team_str += f" vs {self.query_team_b}"
+        return f"Search for '{team_str}': {len(self.games)} games found (scanned {self.total_games_scanned} across {self.pages_searched} pages)"

src/source_finding/scraper.py ADDED Viewed

	@@ -0,0 +1,267 @@

+"""
+Web scraper for finding college football games on nfl-video.com.
+This module handles fetching and parsing game listings from the website,
+filtering by team names, and extracting metadata from game titles.
+"""
+import logging
+import re
+import time
+from typing import Optional
+import requests
+from bs4 import BeautifulSoup
+from .models import GameResult, SearchResults
+logger = logging.getLogger(__name__)
+# Base URL for college football full game replays
+BASE_URL = "https://nfl-video.com/cfb/ncaa_college_football_highlights_games_replay/ncaa_college_football_full_game_replays/2"
+# User agent to avoid being blocked
+HEADERS = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}
+# Delay between page requests to be polite to the server
+REQUEST_DELAY_SECONDS = 0.5
+def _get_page_url(page_num: int) -> str:
+    """
+    Get the URL for a specific page of game listings.
+    Args:
+        page_num: Page number (1-indexed)
+    Returns:
+        Full URL for that page
+    """
+    if page_num == 1:
+        return BASE_URL
+    return f"{BASE_URL}-{page_num}"
+def _parse_game_title(title: str) -> dict:
+    """
+    Parse a game title to extract team names, year, and event.
+    Example titles:
+        "Ohio State vs Oregon Football 2024 Big Ten Championship Full Game Replay"
+        "Washington vs Michigan Football 2024 CFP National Championship Full Game Replay"
+        "Iowa vs Tennessee Football 2024 Citrus Bowl Full Game Replay"
+    Args:
+        title: Full game title string
+    Returns:
+        Dictionary with keys: team_a, team_b, year, event (some may be None)
+    """
+    result = {"team_a": None, "team_b": None, "year": None, "event": None}
+    # Clean up the title
+    title = title.strip()
+    # Pattern: "Team A vs Team B Football YYYY [Event] Full Game Replay"
+    # Some titles use "vs." with a period, others use "vs" without
+    # First, try to extract the "vs" or "vs." split
+    vs_match = re.match(r"^(.+?)\s+vs\.?\s+(.+?)\s+Football\s+", title, re.IGNORECASE)
+    if vs_match:
+        result["team_a"] = vs_match.group(1).strip()
+        result["team_b"] = vs_match.group(2).strip()
+    # Extract year (4-digit number)
+    year_match = re.search(r"\b(20\d{2})\b", title)
+    if year_match:
+        result["year"] = int(year_match.group(1))
+    # Extract event - everything between the date and "Full Game Replay"
+    # Format: "Football [Month Day, Year] [Event] Full Game Replay"
+    # Example: "Football December 20, 2025 CFP First Round Full Game Replay"
+    event_match = re.search(r"Football\s+\w+\s+\d{1,2},?\s+20\d{2}\s+(.+?)\s+Full Game Replay", title, re.IGNORECASE)
+    if event_match:
+        event = event_match.group(1).strip()
+        # Clean up common patterns
+        if event and event.lower() not in ["full", "game", "replay"]:
+            result["event"] = event
+    return result
+def _fetch_page(url: str) -> Optional[BeautifulSoup]:
+    """
+    Fetch a page and return its parsed HTML.
+    Args:
+        url: URL to fetch
+    Returns:
+        BeautifulSoup object or None if fetch failed
+    """
+    try:
+        logger.debug("Fetching URL: %s", url)
+        response = requests.get(url, headers=HEADERS, timeout=30)
+        response.raise_for_status()
+        return BeautifulSoup(response.text, "html.parser")
+    except requests.RequestException as e:
+        logger.error("Failed to fetch %s: %s", url, e)
+        return None
+def _extract_games_from_page(soup: BeautifulSoup) -> list[GameResult]:
+    """
+    Extract game listings from a parsed page.
+    Args:
+        soup: BeautifulSoup object of the page
+    Returns:
+        List of GameResult objects found on the page
+    """
+    games = []
+    # Find all game links - they typically have h3 headers or are in specific divs
+    # Looking for links that contain "Full Game Replay" in the text
+    for link in soup.find_all("a", href=True):
+        text = link.get_text(strip=True)
+        # Skip links that don't look like game titles
+        if "Full Game Replay" not in text:
+            continue
+        # Skip if it doesn't have "vs" or "vs." (not a game matchup)
+        # Some titles use "vs." with a period, others use "vs" without
+        text_lower = text.lower()
+        if " vs " not in text_lower and " vs. " not in text_lower:
+            continue
+        href = link["href"]
+        # Make sure it's a full URL
+        if not href.startswith("http"):
+            href = f"https://nfl-video.com{href}"
+        # Skip if we've already seen this URL (duplicates on page)
+        if any(g.url == href for g in games):
+            continue
+        # Parse the title for metadata
+        parsed = _parse_game_title(text)
+        if parsed["team_a"] and parsed["team_b"]:
+            # Try to find thumbnail - look for img in parent or sibling elements
+            thumbnail_url = None
+            parent = link.find_parent()
+            if parent:
+                img = parent.find("img")
+                if img and img.get("src"):
+                    thumbnail_url = img["src"]
+                    if not thumbnail_url.startswith("http"):
+                        thumbnail_url = f"https://nfl-video.com{thumbnail_url}"
+            game = GameResult(
+                title=text,
+                team_a=parsed["team_a"],
+                team_b=parsed["team_b"],
+                url=href,
+                thumbnail_url=thumbnail_url,
+                year=parsed["year"],
+                event=parsed["event"],
+            )
+            games.append(game)
+            logger.debug("Found game: %s", game)
+    return games
+def _team_matches(game: GameResult, team_name: str) -> bool:
+    """
+    Check if a game involves a given team.
+    Performs case-insensitive partial matching to handle variations like:
+    - "Ohio State" matches "Ohio State Buckeyes"
+    - "OSU" would need to be handled separately if desired
+    Args:
+        game: GameResult to check
+        team_name: Team name to search for
+    Returns:
+        True if the team appears in either team_a or team_b
+    """
+    team_lower = team_name.lower()
+    return team_lower in game.team_a.lower() or team_lower in game.team_b.lower()
+def search_games(
+    team_a: str,
+    team_b: Optional[str] = None,
+    max_pages: int = 5,
+    delay_seconds: float = REQUEST_DELAY_SECONDS,
+) -> SearchResults:
+    """
+    Search for college football games on nfl-video.com by team name(s).
+    Args:
+        team_a: Primary team name to search for (required)
+        team_b: Optional second team name - if provided, only games with BOTH teams are returned
+        max_pages: Maximum number of pages to search (default 5)
+        delay_seconds: Delay between page requests to avoid rate limiting
+    Returns:
+        SearchResults object containing matching games and search metadata
+    Example:
+        # Find all Ohio State games
+        results = search_games("Ohio State", max_pages=10)
+        # Find Ohio State vs Oregon specifically
+        results = search_games("Ohio State", "Oregon")
+    """
+    logger.info("Searching for games: team_a='%s', team_b='%s', max_pages=%d", team_a, team_b, max_pages)
+    matching_games: list[GameResult] = []
+    total_scanned = 0
+    for page_num in range(1, max_pages + 1):
+        url = _get_page_url(page_num)
+        logger.info("Searching page %d/%d: %s", page_num, max_pages, url)
+        soup = _fetch_page(url)
+        if soup is None:
+            logger.warning("Failed to fetch page %d, stopping search", page_num)
+            break
+        # Extract all games from this page
+        page_games = _extract_games_from_page(soup)
+        total_scanned += len(page_games)
+        # Filter by team name(s)
+        for game in page_games:
+            # Must match team_a
+            if not _team_matches(game, team_a):
+                continue
+            # If team_b specified, must also match team_b
+            if team_b and not _team_matches(game, team_b):
+                continue
+            # Avoid duplicates (same URL)
+            if not any(g.url == game.url for g in matching_games):
+                matching_games.append(game)
+                logger.info("Found matching game: %s", game)
+        # Be polite - add delay between requests
+        if page_num < max_pages:
+            time.sleep(delay_seconds)
+    results = SearchResults(
+        query_team_a=team_a,
+        query_team_b=team_b,
+        games=matching_games,
+        pages_searched=max_pages,
+        total_games_scanned=total_scanned,
+    )
+    logger.info("Search complete: %s", results)
+    return results