Spaces:
Runtime error
Runtime error
| from duckduckgo_search import DDGS | |
| import wikipedia | |
| import chess | |
| import chess.engine | |
| import sympy | |
| import fitz | |
| import pandas as pd | |
| from imdb import IMDb | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| import yt_dlp | |
| import whisper | |
| import os | |
| import re | |
| import requests | |
| from datetime import datetime | |
| from bs4 import BeautifulSoup | |
| from langchain_core.tools import tool | |
| from tavily import TavilyClient | |
| from typing import Optional, List | |
| TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") | |
| client = TavilyClient(api_key=TAVILY_API_KEY) | |
| def handle_question(question: str) -> str: | |
| """ | |
| Simple router for question types. Uses web_lookup as default. | |
| """ | |
| if "table" in question and "*" in question: | |
| return detect_non_commutative_subset.run(question) | |
| if "reverse" in question or "backwards" in question: | |
| return reverse_sentence.run(question) | |
| if "vegetables" in question: | |
| return ", ".join(filter_vegetables.run(question.split(", "))) | |
| return web_lookup.run(question) | |
| def count_albums_by_year_range( | |
| artist_name: str, start_year: int, end_year: int | |
| ) -> Optional[int]: | |
| """ | |
| Counts how many albums (typically studio albums) an artist released between two years by parsing their Wikipedia discography page. | |
| Args: | |
| artist_name (str): Name of the artist (e.g., "Mercedes Sosa"). | |
| start_year (int): Start of the year range (inclusive). | |
| end_year (int): End of the year range (inclusive). | |
| Returns: | |
| Optional[int]: Number of albums released in the year range, or None if parsing failed. | |
| """ | |
| print("๐ ๏ธ count_albums_by_year_range") | |
| try: | |
| title = artist_name.strip().replace(" ", "_") + "_discography" | |
| url = f"https://en.wikipedia.org/wiki/{title}" | |
| res = requests.get(url, timeout=10) | |
| if res.status_code != 200: | |
| return None | |
| soup = BeautifulSoup(res.text, "html.parser") | |
| text = soup.get_text() | |
| # Match years in parentheses like (2003), (2005) | |
| years = re.findall(r"\((19|20)\d{2}\)", text) | |
| year_nums = [int(y) for y in years] | |
| count = sum(start_year <= y <= end_year for y in year_nums) | |
| return count | |
| except Exception as e: | |
| return None | |
| def web_lookup(query: str) -> str: | |
| """ | |
| Unified web search tool that: | |
| - Uses Tavily API to retrieve relevant snippets. | |
| - Extracts the most relevant numeric or short factual answer. | |
| - Falls back to Wikipedia if Tavily fails. | |
| Args: | |
| query (str): The user query or question. | |
| Returns: | |
| str: A concise factual answer extracted from Tavily or Wikipedia. | |
| """ | |
| print("๐ ๏ธ web_lookup") | |
| try: | |
| # Step 1: Tavily search | |
| response = client.search(query=query, search_depth="advanced", max_results=5) | |
| snippets = [r["content"] for r in response.get("results", [])] | |
| for s in snippets: | |
| # Try to extract a meaningful answer (year, name, short fact) | |
| match = re.search(r"\b(18|19|20)\d{2}\b", s) | |
| if match: | |
| return match.group() | |
| elif len(s.split()) <= 12: | |
| return s.strip() | |
| # Step 2: Wikipedia fallback | |
| # Guess page title from query | |
| wiki_title = query.split(" ")[-1].capitalize() | |
| wiki_url = f"https://en.wikipedia.org/wiki/{wiki_title}" | |
| res = requests.get(wiki_url, timeout=10) | |
| if res.status_code != 200: | |
| return "โ Wikipedia page not found." | |
| soup = BeautifulSoup(res.text, "html.parser") | |
| text = soup.get_text() | |
| match = re.search(r"\b(18|19|20)\d{2}\b", text) | |
| if match: | |
| return match.group() | |
| # Fallback to first paragraph or snippet | |
| paras = soup.find_all("p") | |
| if paras: | |
| for p in paras: | |
| if p.get_text(strip=True): | |
| return p.get_text(strip=True) | |
| return "โ No relevant data found." | |
| except Exception as e: | |
| return f"โ Error during web lookup: {str(e)}" | |
| def extract_number_from_snippets(snippets: list[str]) -> Optional[int]: | |
| """ | |
| Extracts the first integer number found in a list of text snippets. | |
| This function scans each snippet in order and looks for the first standalone | |
| integer (1 to 4 digits). It returns the first match as an integer. If no | |
| match is found across all snippets, the function returns None. | |
| Args: | |
| snippets (list[str]): A list of strings, typically search result snippets. | |
| Returns: | |
| Optional[int]: The first integer found in the snippets, or None if none found. | |
| Example: | |
| >>> extract_number_from_snippets(["The Eiffel Tower was built in 1889.", "Height is 324m"]) | |
| 1889 | |
| """ | |
| print("๐ ๏ธ extract_number_from_snippets") | |
| for s in snippets: | |
| match = re.search(r"\b\d{1,4}\b", s) | |
| if match: | |
| return int(match.group()) | |
| return None | |
| def get_article_nominator_from_fac_page(title: str) -> str: | |
| """ | |
| Get the nominator of a Featured Article by scanning the main FAC page (not just archives). | |
| """ | |
| print("๐ ๏ธ get_article_nominator_from_fac_page") | |
| base = "https://en.wikipedia.org/wiki/Wikipedia:Featured_article_candidates" | |
| url = f"{base}/{title}" | |
| res = requests.get(url) | |
| if res.status_code != 200: | |
| return "Nominator not found" | |
| soup = BeautifulSoup(res.text, "html.parser") | |
| text = soup.get_text() | |
| # Try direct pattern first | |
| match = re.search(r"nominated by \[\[User:(.*?)\]\]", text, re.IGNORECASE) | |
| if match: | |
| return match.group(1).strip() | |
| # Fallback: try to find first signed comment (e.g. --[[User:XYZ]]) | |
| match = re.search(r"--\s*\[\[User:(.*?)\|", text) | |
| if match: | |
| return match.group(1).strip() | |
| return "Nominator not found" | |
| def extract_structured_facts_from_url(url: str, selector: Optional[str] = None) -> str: | |
| """ | |
| Extract structured facts (tables, bullet lists, or sections) from a webpage. | |
| Args: | |
| url (str): Target webpage URL. | |
| selector (Optional[str]): Optional CSS selector to narrow down the section. | |
| Returns: | |
| str: Cleaned structured data from the page. | |
| """ | |
| print("๐ ๏ธ extract_structured_facts_from_url") | |
| try: | |
| response = requests.get(url, timeout=10) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| # If a selector is provided, use it directly | |
| if selector: | |
| elements = soup.select(selector) | |
| else: | |
| # Fallback to first table or bullet list if no selector provided | |
| elements = soup.select("table, ul") | |
| if not elements: | |
| return "No structured content found." | |
| content_lines = [] | |
| for el in elements: | |
| if el.name == "table": | |
| for row in el.find_all("tr"): | |
| cols = [td.get_text(strip=True) for td in row.find_all(["td", "th"])] | |
| if cols: | |
| content_lines.append(" | ".join(cols)) | |
| elif el.name == "ul": | |
| items = [li.get_text(strip=True) for li in el.find_all("li")] | |
| content_lines.extend(items) | |
| return "\n".join(content_lines[:100]) # limit output size | |
| except Exception as e: | |
| return f"Error extracting structured facts: {str(e)}" | |
| def categorize_grocery_items(items: list[str]) -> dict: | |
| """ | |
| Categorizes grocery items into food groups: vegetables, fruits, grains, dairy, snacks, nuts/legumes, spices, and beverages. | |
| Args: | |
| items (list[str]): List of grocery item names. | |
| Returns: | |
| dict: Dictionary mapping category names to sorted lists of items in that category. | |
| """ | |
| print(f"๐ ๏ธ categorize_grocery_items called with: {items}") | |
| categories = { | |
| "vegetables": {"broccoli", "celery", "green beans", "lettuce", "sweet potatoes", "zucchini"}, | |
| "fruits": {"plums", "bananas", "strawberries", "blueberries", "bell pepper"}, | |
| "grains": {"rice", "flour", "corn"}, | |
| "dairy": {"milk", "eggs"}, | |
| "snacks": {"Oreos"}, | |
| "nuts_and_legumes": {"peanuts", "acorns"}, | |
| "spices": {"whole allspice", "fresh basil"}, | |
| "beverages": {"whole bean coffee"}, | |
| } | |
| result = {category: [] for category in categories} | |
| result["other"] = [] | |
| for item in items: | |
| found = False | |
| for category, items_set in categories.items(): | |
| if item in items_set: | |
| result[category].append(item) | |
| found = True | |
| break | |
| if not found: | |
| result["other"].append(item) | |
| # Sort each category list | |
| for key in result: | |
| result[key] = sorted(result[key]) | |
| return result | |
| # @tool | |
| # def search_featured_articles_by_date_range(start_date: str, end_date: str) -> list[str]: | |
| # """ | |
| # Searches the English Wikipedia featured article archive and returns article titles | |
| # promoted between start_date and end_date. | |
| # Args: | |
| # start_date (str): Start date in YYYY-MM-DD format (e.g. '2016-11-01') | |
| # end_date (str): End date in YYYY-MM-DD format (e.g. '2016-11-30') | |
| # Returns: | |
| # list[str]: A list of article titles promoted as Featured Articles during that period. | |
| # """ | |
| # print(f"๐ ๏ธ search_featured_articles_by_date_range called with: {start_date} , {end_date}") | |
| # try: | |
| # base_url = "https://en.wikipedia.org/wiki/Wikipedia:Featured_articles" | |
| # archive_url = "https://en.wikipedia.org/wiki/Wikipedia:Featured_articles_by_year" | |
| # start = datetime.strptime(start_date, "%Y-%m-%d") | |
| # end = datetime.strptime(end_date, "%Y-%m-%d") | |
| # # We'll collect year-specific pages | |
| # result_titles = [] | |
| # for year in range(start.year, end.year + 1): | |
| # url = f"https://en.wikipedia.org/wiki/Wikipedia:Featured_articles_{year}" | |
| # response = requests.get(url) | |
| # if response.status_code != 200: | |
| # continue | |
| # soup = BeautifulSoup(response.text, "html.parser") | |
| # for li in soup.select("li"): | |
| # text = li.get_text() | |
| # date_matches = date_matches = re.findall(r"\b(19\d{2}|20\d{2})-\d{2}-\d{2}\b", text) | |
| # print("๐ Date matches:", date_matches) | |
| # for match in date_matches: | |
| # try: | |
| # d = datetime.strptime(match, "%Y-%m-%d") | |
| # if start <= d <= end: | |
| # a_tag = li.find("a") | |
| # if a_tag: | |
| # result_titles.append(a_tag.get_text(strip=True)) | |
| # except: | |
| # continue | |
| # return sorted(set(result_titles)) | |
| # except Exception as e: | |
| # return [f"Error: {str(e)}"] | |
| def detect_non_commutative_subset(table_text: str) -> str: | |
| """ | |
| Analyzes a markdown-style operation table and returns the minimal subset of elements | |
| involved in at least one non-commutative pair. | |
| Args: | |
| table_text (str): Markdown-formatted binary operation table. | |
| Returns: | |
| str: Comma-separated list of elements in alphabetical order, no spaces. | |
| """ | |
| print("๐ ๏ธ detect_non_commutative_subset called") | |
| try: | |
| lines = [line for line in table_text.strip().splitlines() if "|" in line] | |
| if len(lines) < 3: | |
| return "" | |
| # Parse headers (skip label cell and rogue entries) | |
| headers = [h.strip() for h in lines[0].split('|')[1:] if h.strip() and h.strip() != '*'] | |
| operation = {} | |
| for line in lines[2:]: | |
| parts = [c.strip() for c in line.split('|')[1:] if c.strip()] | |
| if len(parts) != len(headers) + 1: | |
| continue | |
| row_label = parts[0] | |
| for i, result in enumerate(parts[1:]): | |
| col_label = headers[i] | |
| operation[(row_label, col_label)] = result | |
| involved = set() | |
| for a in headers: | |
| for b in headers: | |
| if operation.get((a, b)) != operation.get((b, a)): | |
| involved.update([a, b]) | |
| return ",".join(sorted(involved)) if involved else "" | |
| except Exception as e: | |
| return "" | |
| def reverse_sentence(sentence: str) -> str: | |
| """ | |
| Reverses a sentence written backwards to restore its original form. | |
| Args: | |
| sentence (str): A sentence with characters in reverse order. | |
| Returns: | |
| str: The sentence in normal forward order. | |
| """ | |
| print(f"๐ ๏ธ reverse_sentence called with: {sentence}") | |
| return sentence[::-1] | |
| def filter_vegetables(items: list[str]) -> list[str]: | |
| """ | |
| Filters and returns only the true vegetables from a list of grocery items. | |
| This tool takes a list of grocery item names and returns a new list containing | |
| only those items that are classified as true vegetables in the botanical sense | |
| (i.e., excluding botanical fruits that are commonly considered vegetables in culinary contexts). | |
| Currently, the classification is based on a predefined set of known vegetable names. | |
| The resulting list is alphabetically sorted to ensure consistency. | |
| Args: | |
| items (list[str]): A list of grocery item names (as lowercase strings). | |
| Returns: | |
| list[str]: An alphabetically sorted list of items that are true vegetables. | |
| Example: | |
| >>> filter_vegetables(["milk", "broccoli", "plums", "celery"]) | |
| ['broccoli', 'celery'] | |
| """ | |
| print(f"๐ ๏ธ filter_vegetables called with: {items}") | |
| vegetables = {"broccoli", "celery", "green beans", "lettuce", "sweet potatoes", "zucchini"} | |
| return sorted([item for item in items if item in vegetables]) | |
| # List of all tools | |
| all_tools = [ | |
| count_albums_by_year_range, | |
| web_lookup, | |
| extract_number_from_snippets, | |
| detect_non_commutative_subset, | |
| reverse_sentence, | |
| filter_vegetables, | |
| categorize_grocery_items, | |
| get_article_nominator_from_fac_page, | |
| # Optional: handle_question (for fallback routing) | |
| ] | |