Spaces:

arbnori45
/

assignment_agent

Sleeping

assignment_agent / custom_tools.py

Arbnor Tefiki

Add more tools and search enginge

2caebe4 7 months ago

11.2 kB

	import requests
	from duckduckgo_search import DDGS
	from langchain_core.tools import tool
	import time
	import re
	import json
	from datetime import datetime, timedelta
	import urllib.parse

	# Rate limiting
	last_search_time = None
	min_search_interval = 1.0

	@tool
	def reverse_text(input: str) -> str:
	"""Reverse the characters in a text or string."""
	return input[::-1]

	@tool
	def web_search(query: str) -> str:
	"""Perform web search using multiple providers for robustness."""
	global last_search_time

	# Rate limiting
	if last_search_time:
	elapsed = time.time() - last_search_time
	if elapsed < min_search_interval:
	time.sleep(min_search_interval - elapsed)

	query = query.strip()
	if not query:
	return "Empty search query"

	results = []

	# Try multiple search methods in order
	search_methods = [
	("Wikipedia", search_wikipedia),
	("Google (via SerpAPI simulation)", search_google_fallback),
	("DuckDuckGo", search_duckduckgo),
	("Bing", search_bing_fallback),
	]

	for method_name, method_func in search_methods:
	try:
	print(f"Trying {method_name} search...")
	method_results = method_func(query)
	if method_results:
	results.extend(method_results)
	print(f"{method_name} found {len(method_results)} results")
	if len(results) >= 3: # Enough results
	break
	except Exception as e:
	print(f"{method_name} search failed: {e}")
	continue

	if not results:
	return "No search results found. All search methods failed."

	# Format results
	formatted_results = []
	for i, result in enumerate(results[:8]):
	if isinstance(result, dict):
	title = result.get('title', '')
	content = result.get('content', '')
	url = result.get('url', '')
	formatted = f"{title}. {content}"
	if url:
	formatted += f" (Source: {url})"
	formatted_results.append(formatted)
	else:
	formatted_results.append(str(result))

	return "\n\n".join(formatted_results)

	def search_wikipedia(query: str) -> list:
	"""Search Wikipedia directly"""
	results = []

	try:
	# Wikipedia API search
	search_url = "https://en.wikipedia.org/w/api.php"

	# First, search for articles
	search_params = {
	"action": "query",
	"list": "search",
	"srsearch": query,
	"format": "json",
	"srlimit": 5,
	"srprop": "snippet\|titlesnippet\|size\|wordcount"
	}

	response = requests.get(search_url, params=search_params, timeout=10)
	if response.status_code == 200:
	data = response.json()
	search_results = data.get("query", {}).get("search", [])

	for item in search_results[:3]:
	title = item.get("title", "")
	snippet = re.sub(r'<[^>]+>', '', item.get("snippet", ""))

	# Get more detailed content
	page_params = {
	"action": "query",
	"prop": "extracts\|info",
	"exintro": True,
	"explaintext": True,
	"inprop": "url",
	"titles": title,
	"format": "json",
	"exsentences": 5
	}

	page_response = requests.get(search_url, params=page_params, timeout=10)
	if page_response.status_code == 200:
	page_data = page_response.json()
	pages = page_data.get("query", {}).get("pages", {})

	for page_id, page_info in pages.items():
	extract = page_info.get("extract", "")
	url = page_info.get("fullurl", "")

	if extract:
	results.append({
	"title": f"Wikipedia: {title}",
	"content": extract[:500],
	"url": url
	})
	break
	else:
	# Use snippet if can't get extract
	results.append({
	"title": f"Wikipedia: {title}",
	"content": snippet,
	"url": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
	})

	except Exception as e:
	print(f"Wikipedia search error: {e}")

	return results

	def search_duckduckgo(query: str) -> list:
	"""Search using DuckDuckGo"""
	results = []

	try:
	with DDGS() as ddgs:
	# Simple search without problematic parameters
	search_results = list(ddgs.text(query, max_results=5))

	for r in search_results:
	results.append({
	"title": r.get("title", ""),
	"content": r.get("body", ""),
	"url": r.get("href", "")
	})

	except Exception as e:
	print(f"DuckDuckGo error: {e}")

	return results

	def search_google_fallback(query: str) -> list:
	"""Fallback Google search using alternative methods"""
	results = []

	try:
	# Try Google Custom Search JSON API simulation
	# This is a fallback method - in production, use proper API
	encoded_query = urllib.parse.quote(query)

	# Try to get Google search results page
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
	}

	# Use a Google search URL
	search_url = f"https://www.google.com/search?q={encoded_query}&hl=en"

	# Note: This is a simplified approach and may not always work
	# In production, use Google Custom Search API

	except Exception as e:
	print(f"Google fallback error: {e}")

	return results

	def search_bing_fallback(query: str) -> list:
	"""Fallback Bing search"""
	results = []

	try:
	# Bing Web Search API would be used here in production
	# This is a placeholder for the pattern
	pass

	except Exception as e:
	print(f"Bing fallback error: {e}")

	return results

	@tool
	def calculate(expression: str) -> str:
	"""Evaluate mathematical expressions safely."""
	try:
	# Clean the expression
	expression = expression.strip()

	# Handle various notations
	expression = expression.replace("×", "*").replace("÷", "/")
	expression = expression.replace("^", "**")
	expression = expression.replace(",", "")

	# Handle percentages
	expression = re.sub(r'(\d+(?:\.\d+)?)\s%\sof\s(\d+(?:\.\d+)?)', r'(\2 \1 / 100)', expression)
	expression = re.sub(r'(\d+(?:\.\d+)?)\s*%', r'(\1/100)', expression)

	# Safe evaluation
	allowed_names = {
	"abs": abs, "round": round, "min": min, "max": max,
	"pow": pow, "sum": sum, "__builtins__": {}
	}

	result = eval(expression, allowed_names)

	if isinstance(result, float) and result.is_integer():
	return str(int(result))
	return str(result)

	except Exception as e:
	return f"Calculation error: {e}"

	@tool
	def wikipedia_summary(query: str) -> str:
	"""Get Wikipedia summary for a topic."""
	try:
	results = search_wikipedia(query)
	if results:
	# Combine top results
	summaries = []
	for r in results[:2]:
	summaries.append(f"{r['title']}: {r['content']}")
	return "\n\n".join(summaries)

	return f"No Wikipedia article found for '{query}'"

	except Exception as e:
	return f"Wikipedia error: {e}"

	@tool
	def define_term(term: str) -> str:
	"""Define a term using dictionary API."""
	try:
	term = term.strip().lower()

	# Try dictionary API
	response = requests.get(
	f"https://api.dictionaryapi.dev/api/v2/entries/en/{term}",
	timeout=10
	)

	if response.status_code == 200:
	data = response.json()
	definitions = []

	for entry in data:
	for meaning in entry.get("meanings", []):
	for definition in meaning.get("definitions", []):
	def_text = definition.get("definition", "")
	if def_text:
	definitions.append(def_text)

	if definitions:
	return definitions[0] # Return first definition

	# Fallback to Wikipedia
	wiki_results = search_wikipedia(f"{term} definition meaning")
	if wiki_results:
	return wiki_results[0]['content'][:200]

	return f"No definition found for '{term}'"

	except Exception as e:
	return f"Definition error: {e}"

	# Advanced search function for specific GAIA queries
	@tool
	def gaia_smart_search(query: str) -> str:
	"""Smart search specifically optimized for GAIA questions."""

	# Parse query for specific patterns
	query_lower = query.lower()

	# For album/discography queries
	if 'album' in query_lower or 'discography' in query_lower:
	artist_match = re.search(r'([\w\s]+?)(?:\s+album\|\s+discography\|\s+between)', query)
	if artist_match:
	artist = artist_match.group(1).strip()
	# Search for discography
	return web_search(f"{artist} discography albums list")

	# For Olympic queries
	if 'olympic' in query_lower:
	year_match = re.search(r'(\d{4})\s+(?:summer\|winter)?\s*olympics', query_lower)
	if year_match:
	year = year_match.group(1)
	return web_search(f"{year} Olympics participating countries athletes count")

	# For academic papers
	if 'paper' in query_lower or 'article' in query_lower:
	author_match = re.search(r'by\s+([\w\s]+?)(?:\s+was\|\s+published\|\s+in)', query)
	if author_match:
	author = author_match.group(1).strip()
	return web_search(f"{author} research paper article")

	# Default to regular search
	return web_search(query)

	# List of tools
	TOOLS = [web_search, calculate, wikipedia_summary, define_term, reverse_text, gaia_smart_search]