|
|
import wikipedia |
|
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
|
import cv2 |
|
|
from pytube import YouTube |
|
|
import re |
|
|
import chess |
|
|
import chess.engine |
|
|
import pandas as pd |
|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
import whisper |
|
|
from imdb import IMDb |
|
|
import subprocess |
|
|
import sys |
|
|
from typing import Optional, List, Dict, Any |
|
|
from smolagents import tool |
|
|
|
|
|
|
|
|
@tool |
|
|
def wikipedia_search_call(query: str) -> Dict[str, Any]: |
|
|
""" |
|
|
Search Wikipedia for information about a specific topic. |
|
|
|
|
|
Args: |
|
|
query (str): The search query/topic to look up on Wikipedia |
|
|
|
|
|
Returns: |
|
|
dict: Dictionary containing the page title, content, and sections |
|
|
""" |
|
|
page = wikipedia.page(query) |
|
|
sections = {sec: page.section(sec) for sec in page.sections} |
|
|
return {"title": page.title, "content": page.content, "sections": sections} |
|
|
|
|
|
|
|
|
@tool |
|
|
def youtube_transcript_call(video_id: str) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Get the transcript/subtitles from a YouTube video. |
|
|
|
|
|
Args: |
|
|
video_id (str): The YouTube video ID (the part after v= in the URL) |
|
|
|
|
|
Returns: |
|
|
list: List of transcript segments with text and timing information |
|
|
""" |
|
|
return YouTubeTranscriptApi.get_transcript(video_id) |
|
|
|
|
|
|
|
|
def download_and_sample(video_id: str, fps: int = 1) -> List[Any]: |
|
|
""" |
|
|
Download a YouTube video and sample frames at specified FPS. |
|
|
|
|
|
Args: |
|
|
video_id (str): The YouTube video ID |
|
|
fps (int): Frames per second to sample (default: 1) |
|
|
|
|
|
Returns: |
|
|
list: List of video frames as numpy arrays |
|
|
""" |
|
|
url = f"https://www.youtube.com/watch?v={video_id}" |
|
|
yt = YouTube(url) |
|
|
stream = yt.streams.filter(progressive=True, file_extension='mp4').first() |
|
|
path = stream.download(filename=f"{video_id}.mp4") |
|
|
cap = cv2.VideoCapture(path) |
|
|
frame_rate = cap.get(cv2.CAP_PROP_FPS) or 1 |
|
|
step = max(1, int(frame_rate / fps)) |
|
|
frames = [] |
|
|
idx = 0 |
|
|
while True: |
|
|
ret, frame = cap.read() |
|
|
if not ret: |
|
|
break |
|
|
if idx % step == 0: |
|
|
frames.append(frame) |
|
|
idx += 1 |
|
|
cap.release() |
|
|
return frames |
|
|
|
|
|
def detect_species(frame: Any) -> List[str]: |
|
|
""" |
|
|
Detect bird species in a video frame. |
|
|
|
|
|
Args: |
|
|
frame: Video frame as numpy array |
|
|
|
|
|
Returns: |
|
|
list: List of detected bird species names |
|
|
""" |
|
|
|
|
|
return [] |
|
|
|
|
|
@tool |
|
|
def video_frame_analyzer_call(video_id: str) -> int: |
|
|
""" |
|
|
Analyze video frames to count unique bird species. |
|
|
|
|
|
Args: |
|
|
video_id (str): The YouTube video ID to analyze |
|
|
|
|
|
Returns: |
|
|
int: Maximum number of unique bird species detected in any frame |
|
|
""" |
|
|
frames = download_and_sample(video_id) |
|
|
counts = [len(set(detect_species(f))) for f in frames] |
|
|
return max(counts) if counts else 0 |
|
|
|
|
|
|
|
|
@tool |
|
|
def string_manipulator_call(text: str, operation: str = "reverse", pattern: Optional[str] = None, replacement: Optional[str] = None) -> Any: |
|
|
""" |
|
|
Perform various string manipulation operations. |
|
|
|
|
|
Args: |
|
|
text (str): The input text to manipulate |
|
|
operation (str): The operation to perform ("reverse", "split", "regex_replace") |
|
|
pattern (str, optional): Regex pattern for replacement operations |
|
|
replacement (str, optional): Replacement string for regex operations |
|
|
|
|
|
Returns: |
|
|
Any: Result of the string operation (string or list) |
|
|
""" |
|
|
if operation == "reverse": |
|
|
return text[::-1] |
|
|
if operation == "split": |
|
|
return text.split() |
|
|
if operation == "regex_replace" and pattern and replacement is not None: |
|
|
return re.sub(pattern, replacement, text) |
|
|
raise ValueError(f"Unsupported operation: {operation}") |
|
|
|
|
|
|
|
|
@tool |
|
|
def vision_chess_engine_call(fen: str, depth: int = 20) -> str: |
|
|
""" |
|
|
Analyze a chess position and suggest the best move using Stockfish engine. |
|
|
|
|
|
Args: |
|
|
fen (str): FEN notation representing the chess position |
|
|
depth (int): Search depth for the chess engine (default: 20) |
|
|
|
|
|
Returns: |
|
|
str: The best move in Standard Algebraic Notation (SAN) |
|
|
""" |
|
|
engine = chess.engine.SimpleEngine.popen_uci("stockfish") |
|
|
board = chess.Board(fen) |
|
|
result = engine.play(board, chess.engine.Limit(depth=depth)) |
|
|
engine.quit() |
|
|
return board.san(result.move) |
|
|
|
|
|
|
|
|
@tool |
|
|
def table_parser_call(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame: |
|
|
""" |
|
|
Parse CSV or Excel files into a pandas DataFrame. |
|
|
|
|
|
Args: |
|
|
file_path (str): Path to the CSV or Excel file |
|
|
sheet_name (str, optional): Sheet name for Excel files |
|
|
|
|
|
Returns: |
|
|
pd.DataFrame: Parsed data as a pandas DataFrame |
|
|
""" |
|
|
if file_path.lower().endswith('.csv'): |
|
|
return pd.read_csv(file_path) |
|
|
return pd.read_excel(file_path, sheet_name=sheet_name) |
|
|
|
|
|
|
|
|
@tool |
|
|
def libretext_fetcher_call(url: str, section_id: str) -> List[str]: |
|
|
""" |
|
|
Fetch content from LibreTexts website by section ID. |
|
|
|
|
|
Args: |
|
|
url (str): The LibreTexts page URL |
|
|
section_id (str): The HTML section ID to extract content from |
|
|
|
|
|
Returns: |
|
|
list: List of text items from the specified section |
|
|
""" |
|
|
resp = requests.get(url) |
|
|
soup = BeautifulSoup(resp.text, "html.parser") |
|
|
sec = soup.find(id=section_id) |
|
|
if not sec: |
|
|
return [] |
|
|
items = sec.find_next('ul') |
|
|
if items and hasattr(items, 'find_all'): |
|
|
items = items.find_all('li') |
|
|
return [li.get_text(strip=True) for li in items] |
|
|
return [] |
|
|
|
|
|
|
|
|
@tool |
|
|
def audio_transcriber_call(audio_path: str) -> str: |
|
|
""" |
|
|
Transcribe audio files to text using OpenAI Whisper. |
|
|
|
|
|
Args: |
|
|
audio_path (str): Path to the audio file to transcribe |
|
|
|
|
|
Returns: |
|
|
str: Transcribed text from the audio |
|
|
""" |
|
|
model = whisper.load_model("base") |
|
|
result = model.transcribe(audio_path) |
|
|
return result.get("text", "") |
|
|
|
|
|
|
|
|
BOTANICAL_VEGETABLES = {"tomato", "eggplant", "pepper", "squash"} |
|
|
|
|
|
@tool |
|
|
def botanical_classifier_call(items: List[str]) -> List[str]: |
|
|
""" |
|
|
Classify items as botanical vegetables. |
|
|
|
|
|
Args: |
|
|
items (list): List of items to classify |
|
|
|
|
|
Returns: |
|
|
list: Items that are classified as botanical vegetables |
|
|
""" |
|
|
return [item for item in items if item.lower() in BOTANICAL_VEGETABLES] |
|
|
|
|
|
|
|
|
@tool |
|
|
def imdb_lookup_call(person_name: str) -> Dict[str, Any]: |
|
|
""" |
|
|
Look up information about a person on IMDb. |
|
|
|
|
|
Args: |
|
|
person_name (str): Name of the person to search for |
|
|
|
|
|
Returns: |
|
|
dict: Dictionary containing person's name and filmography |
|
|
""" |
|
|
ia = IMDb() |
|
|
results = ia.search_person(person_name) |
|
|
if not results: |
|
|
return {} |
|
|
person = results[0] |
|
|
ia.update(person, 'filmography') |
|
|
return {"name": person['name'], "filmography": person.get('filmography', {})} |
|
|
|
|
|
|
|
|
@tool |
|
|
def python_executor_call(script_path: str) -> str: |
|
|
""" |
|
|
Execute a Python script and return its output. |
|
|
|
|
|
Args: |
|
|
script_path (str): Path to the Python script to execute |
|
|
|
|
|
Returns: |
|
|
str: Standard output from the script execution |
|
|
""" |
|
|
proc = subprocess.run([sys.executable, script_path], capture_output=True, text=True, check=True) |
|
|
return proc.stdout.strip() |
|
|
|
|
|
|
|
|
@tool |
|
|
def sports_stats_api_call(season: int, team: str, stat: str = "BB") -> Dict[str, Any]: |
|
|
""" |
|
|
Get sports statistics for a team in a specific season. |
|
|
|
|
|
Args: |
|
|
season (int): The sports season year |
|
|
team (str): The team name |
|
|
stat (str): The statistic type to retrieve (default: "BB") |
|
|
|
|
|
Returns: |
|
|
dict: Sports statistics data |
|
|
""" |
|
|
raise NotImplementedError("sports_stats_api integration not configured") |
|
|
|
|
|
|
|
|
@tool |
|
|
def web_scraper_call(url: str, css_selector: str) -> List[str]: |
|
|
""" |
|
|
Scrape content from a website using CSS selectors. |
|
|
|
|
|
Args: |
|
|
url (str): The URL to scrape |
|
|
css_selector (str): CSS selector to find elements |
|
|
|
|
|
Returns: |
|
|
list: List of text content from matching elements |
|
|
""" |
|
|
resp = requests.get(url) |
|
|
soup = BeautifulSoup(resp.text, "html.parser") |
|
|
return [el.get_text(strip=True) for el in soup.select(css_selector)] |
|
|
|
|
|
|
|
|
@tool |
|
|
def excel_reader_call(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame: |
|
|
""" |
|
|
Read Excel files into a pandas DataFrame. |
|
|
|
|
|
Args: |
|
|
file_path (str): Path to the Excel file |
|
|
sheet_name (str, optional): Specific sheet name to read |
|
|
|
|
|
Returns: |
|
|
pd.DataFrame: Data from the Excel file as a pandas DataFrame |
|
|
""" |
|
|
return pd.read_excel(file_path, sheet_name=sheet_name) |
|
|
|
|
|
|
|
|
@tool |
|
|
def competition_db_call(year_start: int, year_end: int) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Query competition database for events between specified years. |
|
|
|
|
|
Args: |
|
|
year_start (int): Start year for the query range |
|
|
year_end (int): End year for the query range |
|
|
|
|
|
Returns: |
|
|
list: List of competition events in the specified year range |
|
|
""" |
|
|
raise NotImplementedError("competition_db integration not configured") |
|
|
|
|
|
|
|
|
@tool |
|
|
def japanese_baseball_api_call(team: str, date: str) -> List[Dict[str, Any]]: |
|
|
""" |
|
|
Get Japanese baseball data for a specific team and date. |
|
|
|
|
|
Args: |
|
|
team (str): The baseball team name |
|
|
date (str): The date in YYYY-MM-DD format |
|
|
|
|
|
Returns: |
|
|
list: List of baseball game data for the specified team and date |
|
|
""" |
|
|
raise NotImplementedError("japanese_baseball_api integration not configured") |
|
|
|
|
|
|
|
|
|
|
|
tools_list = [ |
|
|
wikipedia_search_call, |
|
|
youtube_transcript_call, |
|
|
video_frame_analyzer_call, |
|
|
string_manipulator_call, |
|
|
vision_chess_engine_call, |
|
|
table_parser_call, |
|
|
libretext_fetcher_call, |
|
|
audio_transcriber_call, |
|
|
botanical_classifier_call, |
|
|
imdb_lookup_call, |
|
|
python_executor_call, |
|
|
sports_stats_api_call, |
|
|
web_scraper_call, |
|
|
excel_reader_call, |
|
|
competition_db_call, |
|
|
japanese_baseball_api_call, |
|
|
] |