import wikipedia from youtube_transcript_api import YouTubeTranscriptApi import cv2 from pytube import YouTube import re import chess import chess.engine import pandas as pd import requests from bs4 import BeautifulSoup import whisper from imdb import IMDb import subprocess import sys from typing import Optional, List, Dict, Any from smolagents import tool # === wikipedia_search === @tool def wikipedia_search_call(query: str) -> Dict[str, Any]: """ Search Wikipedia for information about a specific topic. Args: query (str): The search query/topic to look up on Wikipedia Returns: dict: Dictionary containing the page title, content, and sections """ page = wikipedia.page(query) sections = {sec: page.section(sec) for sec in page.sections} return {"title": page.title, "content": page.content, "sections": sections} # === youtube_transcript === @tool def youtube_transcript_call(video_id: str) -> List[Dict[str, Any]]: """ Get the transcript/subtitles from a YouTube video. Args: video_id (str): The YouTube video ID (the part after v= in the URL) Returns: list: List of transcript segments with text and timing information """ return YouTubeTranscriptApi.get_transcript(video_id) # === video_frame_analyzer === def download_and_sample(video_id: str, fps: int = 1) -> List[Any]: """ Download a YouTube video and sample frames at specified FPS. Args: video_id (str): The YouTube video ID fps (int): Frames per second to sample (default: 1) Returns: list: List of video frames as numpy arrays """ url = f"https://www.youtube.com/watch?v={video_id}" yt = YouTube(url) stream = yt.streams.filter(progressive=True, file_extension='mp4').first() path = stream.download(filename=f"{video_id}.mp4") cap = cv2.VideoCapture(path) frame_rate = cap.get(cv2.CAP_PROP_FPS) or 1 step = max(1, int(frame_rate / fps)) frames = [] idx = 0 while True: ret, frame = cap.read() if not ret: break if idx % step == 0: frames.append(frame) idx += 1 cap.release() return frames def detect_species(frame: Any) -> List[str]: """ Detect bird species in a video frame. Args: frame: Video frame as numpy array Returns: list: List of detected bird species names """ # TODO: integrate actual CV model for bird-species detection return [] @tool def video_frame_analyzer_call(video_id: str) -> int: """ Analyze video frames to count unique bird species. Args: video_id (str): The YouTube video ID to analyze Returns: int: Maximum number of unique bird species detected in any frame """ frames = download_and_sample(video_id) counts = [len(set(detect_species(f))) for f in frames] return max(counts) if counts else 0 # === string_manipulator === @tool def string_manipulator_call(text: str, operation: str = "reverse", pattern: Optional[str] = None, replacement: Optional[str] = None) -> Any: """ Perform various string manipulation operations. Args: text (str): The input text to manipulate operation (str): The operation to perform ("reverse", "split", "regex_replace") pattern (str, optional): Regex pattern for replacement operations replacement (str, optional): Replacement string for regex operations Returns: Any: Result of the string operation (string or list) """ if operation == "reverse": return text[::-1] if operation == "split": return text.split() if operation == "regex_replace" and pattern and replacement is not None: return re.sub(pattern, replacement, text) raise ValueError(f"Unsupported operation: {operation}") # === vision_chess_engine === @tool def vision_chess_engine_call(fen: str, depth: int = 20) -> str: """ Analyze a chess position and suggest the best move using Stockfish engine. Args: fen (str): FEN notation representing the chess position depth (int): Search depth for the chess engine (default: 20) Returns: str: The best move in Standard Algebraic Notation (SAN) """ engine = chess.engine.SimpleEngine.popen_uci("stockfish") board = chess.Board(fen) result = engine.play(board, chess.engine.Limit(depth=depth)) engine.quit() return board.san(result.move) # === table_parser === @tool def table_parser_call(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame: """ Parse CSV or Excel files into a pandas DataFrame. Args: file_path (str): Path to the CSV or Excel file sheet_name (str, optional): Sheet name for Excel files Returns: pd.DataFrame: Parsed data as a pandas DataFrame """ if file_path.lower().endswith('.csv'): return pd.read_csv(file_path) return pd.read_excel(file_path, sheet_name=sheet_name) # === libretext_fetcher === @tool def libretext_fetcher_call(url: str, section_id: str) -> List[str]: """ Fetch content from LibreTexts website by section ID. Args: url (str): The LibreTexts page URL section_id (str): The HTML section ID to extract content from Returns: list: List of text items from the specified section """ resp = requests.get(url) soup = BeautifulSoup(resp.text, "html.parser") sec = soup.find(id=section_id) if not sec: return [] items = sec.find_next('ul') if items and hasattr(items, 'find_all'): items = items.find_all('li') return [li.get_text(strip=True) for li in items] return [] # === audio_transcriber === @tool def audio_transcriber_call(audio_path: str) -> str: """ Transcribe audio files to text using OpenAI Whisper. Args: audio_path (str): Path to the audio file to transcribe Returns: str: Transcribed text from the audio """ model = whisper.load_model("base") result = model.transcribe(audio_path) return result.get("text", "") # === botanical_classifier === BOTANICAL_VEGETABLES = {"tomato", "eggplant", "pepper", "squash"} @tool def botanical_classifier_call(items: List[str]) -> List[str]: """ Classify items as botanical vegetables. Args: items (list): List of items to classify Returns: list: Items that are classified as botanical vegetables """ return [item for item in items if item.lower() in BOTANICAL_VEGETABLES] # === imdb_lookup === @tool def imdb_lookup_call(person_name: str) -> Dict[str, Any]: """ Look up information about a person on IMDb. Args: person_name (str): Name of the person to search for Returns: dict: Dictionary containing person's name and filmography """ ia = IMDb() results = ia.search_person(person_name) if not results: return {} person = results[0] ia.update(person, 'filmography') return {"name": person['name'], "filmography": person.get('filmography', {})} # === python_executor === @tool def python_executor_call(script_path: str) -> str: """ Execute a Python script and return its output. Args: script_path (str): Path to the Python script to execute Returns: str: Standard output from the script execution """ proc = subprocess.run([sys.executable, script_path], capture_output=True, text=True, check=True) return proc.stdout.strip() # === sports_stats_api === @tool def sports_stats_api_call(season: int, team: str, stat: str = "BB") -> Dict[str, Any]: """ Get sports statistics for a team in a specific season. Args: season (int): The sports season year team (str): The team name stat (str): The statistic type to retrieve (default: "BB") Returns: dict: Sports statistics data """ raise NotImplementedError("sports_stats_api integration not configured") # === web_scraper === @tool def web_scraper_call(url: str, css_selector: str) -> List[str]: """ Scrape content from a website using CSS selectors. Args: url (str): The URL to scrape css_selector (str): CSS selector to find elements Returns: list: List of text content from matching elements """ resp = requests.get(url) soup = BeautifulSoup(resp.text, "html.parser") return [el.get_text(strip=True) for el in soup.select(css_selector)] # === excel_reader === @tool def excel_reader_call(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame: """ Read Excel files into a pandas DataFrame. Args: file_path (str): Path to the Excel file sheet_name (str, optional): Specific sheet name to read Returns: pd.DataFrame: Data from the Excel file as a pandas DataFrame """ return pd.read_excel(file_path, sheet_name=sheet_name) # === competition_db === @tool def competition_db_call(year_start: int, year_end: int) -> List[Dict[str, Any]]: """ Query competition database for events between specified years. Args: year_start (int): Start year for the query range year_end (int): End year for the query range Returns: list: List of competition events in the specified year range """ raise NotImplementedError("competition_db integration not configured") # === japanese_baseball_api === @tool def japanese_baseball_api_call(team: str, date: str) -> List[Dict[str, Any]]: """ Get Japanese baseball data for a specific team and date. Args: team (str): The baseball team name date (str): The date in YYYY-MM-DD format Returns: list: List of baseball game data for the specified team and date """ raise NotImplementedError("japanese_baseball_api integration not configured") tools_list = [ wikipedia_search_call, youtube_transcript_call, video_frame_analyzer_call, string_manipulator_call, vision_chess_engine_call, table_parser_call, libretext_fetcher_call, audio_transcriber_call, botanical_classifier_call, imdb_lookup_call, python_executor_call, sports_stats_api_call, web_scraper_call, excel_reader_call, competition_db_call, japanese_baseball_api_call, ]