Spaces:
Sleeping
Sleeping
| # import time | |
| # from typing import Optional | |
| # import requests | |
| # from bs4 import BeautifulSoup | |
| # from langchain.tools import tool | |
| # class WebSearchTool: | |
| # def __init__(self): | |
| # self.last_request_time = 0 | |
| # self.min_request_interval = 2.0 # Minimum time between requests in seconds | |
| # self.max_retries = 10 | |
| # def search(self, query: str, domain: Optional[str] = None) -> str: | |
| # """Perform web search with rate limiting and retries.""" | |
| # for attempt in range(self.max_retries): | |
| # # Implement rate limiting | |
| # current_time = time.time() | |
| # time_since_last = current_time - self.last_request_time | |
| # if time_since_last < self.min_request_interval: | |
| # time.sleep(self.min_request_interval - time_since_last) | |
| # try: | |
| # # Make the search request | |
| # results = self._do_search(query, domain) | |
| # self.last_request_time = time.time() | |
| # return results | |
| # except Exception as e: | |
| # if "202 Ratelimit" in str(e): | |
| # if attempt < self.max_retries - 1: | |
| # # Exponential backoff | |
| # wait_time = (2 ** attempt) * self.min_request_interval | |
| # time.sleep(wait_time) | |
| # continue | |
| # return f"Search failed after {self.max_retries} attempts: {str(e)}" | |
| # return "Search failed due to rate limiting" | |
| # def _do_search(self, query: str, domain: Optional[str] = None) -> str: | |
| # """Perform the actual search request.""" | |
| # try: | |
| # # Construct search URL | |
| # base_url = "https://html.duckduckgo.com/html" | |
| # params = {"q": query} | |
| # if domain: | |
| # params["q"] += f" site:{domain}" | |
| # # Make request with increased timeout | |
| # response = requests.get(base_url, params=params, timeout=10) | |
| # response.raise_for_status() | |
| # if response.status_code == 202: | |
| # raise Exception("202 Ratelimit") | |
| # # Extract search results | |
| # results = [] | |
| # soup = BeautifulSoup(response.text, 'html.parser') | |
| # for result in soup.find_all('div', {'class': 'result'}): | |
| # title = result.find('a', {'class': 'result__a'}) | |
| # snippet = result.find('a', {'class': 'result__snippet'}) | |
| # if title and snippet: | |
| # results.append({ | |
| # 'title': title.get_text(), | |
| # 'snippet': snippet.get_text(), | |
| # 'url': title.get('href') | |
| # }) | |
| # # Format results | |
| # formatted_results = [] | |
| # for r in results[:10]: # Limit to top 5 results | |
| # formatted_results.append(f"[{r['title']}]({r['url']})\n{r['snippet']}\n") | |
| # return "## Search Results\n\n" + "\n".join(formatted_results) | |
| # except requests.RequestException as e: | |
| # raise Exception(f"Search request failed: {str(e)}") |