Spaces:
Sleeping
Sleeping
| import requests | |
| from duckduckgo_search import DDGS | |
| from langchain_core.tools import tool | |
| import time | |
| import re | |
| import json | |
| from datetime import datetime, timedelta | |
| import urllib.parse | |
| # Rate limiting | |
| last_search_time = None | |
| min_search_interval = 1.0 | |
| def reverse_text(input: str) -> str: | |
| """Reverse the characters in a text or string.""" | |
| return input[::-1] | |
| def web_search(query: str) -> str: | |
| """Perform web search using multiple providers for robustness.""" | |
| global last_search_time | |
| # Rate limiting | |
| if last_search_time: | |
| elapsed = time.time() - last_search_time | |
| if elapsed < min_search_interval: | |
| time.sleep(min_search_interval - elapsed) | |
| query = query.strip() | |
| if not query: | |
| return "Empty search query" | |
| results = [] | |
| # Try multiple search methods in order | |
| search_methods = [ | |
| ("Wikipedia", search_wikipedia), | |
| ("Google (via SerpAPI simulation)", search_google_fallback), | |
| ("DuckDuckGo", search_duckduckgo), | |
| ("Bing", search_bing_fallback), | |
| ] | |
| for method_name, method_func in search_methods: | |
| try: | |
| print(f"Trying {method_name} search...") | |
| method_results = method_func(query) | |
| if method_results: | |
| results.extend(method_results) | |
| print(f"{method_name} found {len(method_results)} results") | |
| if len(results) >= 3: # Enough results | |
| break | |
| except Exception as e: | |
| print(f"{method_name} search failed: {e}") | |
| continue | |
| if not results: | |
| return "No search results found. All search methods failed." | |
| # Format results | |
| formatted_results = [] | |
| for i, result in enumerate(results[:8]): | |
| if isinstance(result, dict): | |
| title = result.get('title', '') | |
| content = result.get('content', '') | |
| url = result.get('url', '') | |
| formatted = f"{title}. {content}" | |
| if url: | |
| formatted += f" (Source: {url})" | |
| formatted_results.append(formatted) | |
| else: | |
| formatted_results.append(str(result)) | |
| return "\n\n".join(formatted_results) | |
| def search_wikipedia(query: str) -> list: | |
| """Search Wikipedia directly""" | |
| results = [] | |
| try: | |
| # Wikipedia API search | |
| search_url = "https://en.wikipedia.org/w/api.php" | |
| # First, search for articles | |
| search_params = { | |
| "action": "query", | |
| "list": "search", | |
| "srsearch": query, | |
| "format": "json", | |
| "srlimit": 5, | |
| "srprop": "snippet|titlesnippet|size|wordcount" | |
| } | |
| response = requests.get(search_url, params=search_params, timeout=10) | |
| if response.status_code == 200: | |
| data = response.json() | |
| search_results = data.get("query", {}).get("search", []) | |
| for item in search_results[:3]: | |
| title = item.get("title", "") | |
| snippet = re.sub(r'<[^>]+>', '', item.get("snippet", "")) | |
| # Get more detailed content | |
| page_params = { | |
| "action": "query", | |
| "prop": "extracts|info", | |
| "exintro": True, | |
| "explaintext": True, | |
| "inprop": "url", | |
| "titles": title, | |
| "format": "json", | |
| "exsentences": 5 | |
| } | |
| page_response = requests.get(search_url, params=page_params, timeout=10) | |
| if page_response.status_code == 200: | |
| page_data = page_response.json() | |
| pages = page_data.get("query", {}).get("pages", {}) | |
| for page_id, page_info in pages.items(): | |
| extract = page_info.get("extract", "") | |
| url = page_info.get("fullurl", "") | |
| if extract: | |
| results.append({ | |
| "title": f"Wikipedia: {title}", | |
| "content": extract[:500], | |
| "url": url | |
| }) | |
| break | |
| else: | |
| # Use snippet if can't get extract | |
| results.append({ | |
| "title": f"Wikipedia: {title}", | |
| "content": snippet, | |
| "url": f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}" | |
| }) | |
| except Exception as e: | |
| print(f"Wikipedia search error: {e}") | |
| return results | |
| def search_duckduckgo(query: str) -> list: | |
| """Search using DuckDuckGo""" | |
| results = [] | |
| try: | |
| with DDGS() as ddgs: | |
| # Simple search without problematic parameters | |
| search_results = list(ddgs.text(query, max_results=5)) | |
| for r in search_results: | |
| results.append({ | |
| "title": r.get("title", ""), | |
| "content": r.get("body", ""), | |
| "url": r.get("href", "") | |
| }) | |
| except Exception as e: | |
| print(f"DuckDuckGo error: {e}") | |
| return results | |
| def search_google_fallback(query: str) -> list: | |
| """Fallback Google search using alternative methods""" | |
| results = [] | |
| try: | |
| # Try Google Custom Search JSON API simulation | |
| # This is a fallback method - in production, use proper API | |
| encoded_query = urllib.parse.quote(query) | |
| # Try to get Google search results page | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| } | |
| # Use a Google search URL | |
| search_url = f"https://www.google.com/search?q={encoded_query}&hl=en" | |
| # Note: This is a simplified approach and may not always work | |
| # In production, use Google Custom Search API | |
| except Exception as e: | |
| print(f"Google fallback error: {e}") | |
| return results | |
| def search_bing_fallback(query: str) -> list: | |
| """Fallback Bing search""" | |
| results = [] | |
| try: | |
| # Bing Web Search API would be used here in production | |
| # This is a placeholder for the pattern | |
| pass | |
| except Exception as e: | |
| print(f"Bing fallback error: {e}") | |
| return results | |
| def calculate(expression: str) -> str: | |
| """Evaluate mathematical expressions safely.""" | |
| try: | |
| # Clean the expression | |
| expression = expression.strip() | |
| # Handle various notations | |
| expression = expression.replace("×", "*").replace("÷", "/") | |
| expression = expression.replace("^", "**") | |
| expression = expression.replace(",", "") | |
| # Handle percentages | |
| expression = re.sub(r'(\d+(?:\.\d+)?)\s*%\s*of\s*(\d+(?:\.\d+)?)', r'(\2 * \1 / 100)', expression) | |
| expression = re.sub(r'(\d+(?:\.\d+)?)\s*%', r'(\1/100)', expression) | |
| # Safe evaluation | |
| allowed_names = { | |
| "abs": abs, "round": round, "min": min, "max": max, | |
| "pow": pow, "sum": sum, "__builtins__": {} | |
| } | |
| result = eval(expression, allowed_names) | |
| if isinstance(result, float) and result.is_integer(): | |
| return str(int(result)) | |
| return str(result) | |
| except Exception as e: | |
| return f"Calculation error: {e}" | |
| def wikipedia_summary(query: str) -> str: | |
| """Get Wikipedia summary for a topic.""" | |
| try: | |
| results = search_wikipedia(query) | |
| if results: | |
| # Combine top results | |
| summaries = [] | |
| for r in results[:2]: | |
| summaries.append(f"{r['title']}: {r['content']}") | |
| return "\n\n".join(summaries) | |
| return f"No Wikipedia article found for '{query}'" | |
| except Exception as e: | |
| return f"Wikipedia error: {e}" | |
| def define_term(term: str) -> str: | |
| """Define a term using dictionary API.""" | |
| try: | |
| term = term.strip().lower() | |
| # Try dictionary API | |
| response = requests.get( | |
| f"https://api.dictionaryapi.dev/api/v2/entries/en/{term}", | |
| timeout=10 | |
| ) | |
| if response.status_code == 200: | |
| data = response.json() | |
| definitions = [] | |
| for entry in data: | |
| for meaning in entry.get("meanings", []): | |
| for definition in meaning.get("definitions", []): | |
| def_text = definition.get("definition", "") | |
| if def_text: | |
| definitions.append(def_text) | |
| if definitions: | |
| return definitions[0] # Return first definition | |
| # Fallback to Wikipedia | |
| wiki_results = search_wikipedia(f"{term} definition meaning") | |
| if wiki_results: | |
| return wiki_results[0]['content'][:200] | |
| return f"No definition found for '{term}'" | |
| except Exception as e: | |
| return f"Definition error: {e}" | |
| # Advanced search function for specific GAIA queries | |
| def gaia_smart_search(query: str) -> str: | |
| """Smart search specifically optimized for GAIA questions.""" | |
| # Parse query for specific patterns | |
| query_lower = query.lower() | |
| # For album/discography queries | |
| if 'album' in query_lower or 'discography' in query_lower: | |
| artist_match = re.search(r'([\w\s]+?)(?:\s+album|\s+discography|\s+between)', query) | |
| if artist_match: | |
| artist = artist_match.group(1).strip() | |
| # Search for discography | |
| return web_search(f"{artist} discography albums list") | |
| # For Olympic queries | |
| if 'olympic' in query_lower: | |
| year_match = re.search(r'(\d{4})\s+(?:summer|winter)?\s*olympics', query_lower) | |
| if year_match: | |
| year = year_match.group(1) | |
| return web_search(f"{year} Olympics participating countries athletes count") | |
| # For academic papers | |
| if 'paper' in query_lower or 'article' in query_lower: | |
| author_match = re.search(r'by\s+([\w\s]+?)(?:\s+was|\s+published|\s+in)', query) | |
| if author_match: | |
| author = author_match.group(1).strip() | |
| return web_search(f"{author} research paper article") | |
| # Default to regular search | |
| return web_search(query) | |
| # List of tools | |
| TOOLS = [web_search, calculate, wikipedia_summary, define_term, reverse_text, gaia_smart_search] |