iLOVE2D's picture
Upload 2846 files
5374a2d verified
import wikipedia
from .search_base import SearchBase
from .tool import Tool,Toolkit
from typing import Dict, Any, Optional, List
from pydantic import Field
from ..core.logging import logger
class SearchWiki(SearchBase):
max_summary_sentences: Optional[int] = Field(default=None, description="Maximum number of sentences in the summary. Default None means return all available content.")
def __init__(
self,
name: str = 'SearchWiki',
num_search_pages: Optional[int] = 5,
max_content_words: Optional[int] = None,
max_summary_sentences: Optional[int] = None,
**kwargs
):
"""
Initialize the Wikipedia Search tool.
Args:
name (str): The name of the search tool
num_search_pages (int): Number of search results to retrieve
max_content_words (int, optional): Maximum number of words to include in content, None means no limit
max_summary_sentences (int, optional): Maximum number of sentences in the summary, None means no limit
**kwargs: Additional data to pass to the parent class
"""
super().__init__(
name=name,
num_search_pages=num_search_pages,
max_content_words=max_content_words,
max_summary_sentences=max_summary_sentences,
**kwargs
)
def search(self, query: str, num_search_pages: int = None, max_content_words: int = None, max_summary_sentences: int = None) -> Dict[str, Any]:
"""
Searches Wikipedia for the given query and returns the summary and truncated full content.
Args:
query (str): The search query.
num_search_pages (int): Number of search results to retrieve
max_content_words (int): Maximum number of words to include in content, None means no limit
max_summary_sentences (int): Maximum number of sentences in the summary, None means no limit
Returns:
dict: A dictionary with the title, summary, truncated content, and Wikipedia page link.
"""
num_search_pages = num_search_pages or self.num_search_pages
max_content_words = max_content_words or self.max_content_words
max_summary_sentences = max_summary_sentences or self.max_summary_sentences
try:
logger.info(f"Searching wikipedia: {query}, num_results={num_search_pages}, max_content_words={max_content_words}, max_summary_sentences={max_summary_sentences}")
# Search for top matching titles
search_results = wikipedia.search(query, results=num_search_pages)
logger.info(f"Search results: {search_results}")
if not search_results:
return {"results": [], "error": "No search results found."}
# Try fetching the best available page
results = []
for title in search_results:
try:
page = wikipedia.page(title, auto_suggest=False)
# Handle the max_summary_sentences parameter
if max_summary_sentences is not None and max_summary_sentences > 0:
summary = wikipedia.summary(title, sentences=max_summary_sentences)
else:
# Get the full summary without limiting sentences
summary = wikipedia.summary(title)
# Use the base class's content truncation method
display_content = self._truncate_content(page.content, max_content_words)
results.append({
"title": page.title,
"summary": summary,
"content": display_content,
"url": page.url,
})
except wikipedia.exceptions.DisambiguationError:
# Skip ambiguous results and try the next
continue
except wikipedia.exceptions.PageError:
# Skip non-existing pages and try the next
continue
# logger.info(f"get results from wikipedia: {results}")
return {"results": results, "error": None}
except Exception as e:
logger.error(f"Error searching Wikipedia: {str(e)}")
return {"results": [], "error": str(e)}
class WikipediaSearchTool(Tool):
name: str = "wikipedia_search"
description: str = "Search Wikipedia for relevant articles and content"
inputs: Dict[str, Dict[str, str]] = {
"query": {
"type": "string",
"description": "The search query to look up on Wikipedia"
},
"num_search_pages": {
"type": "integer",
"description": "Number of search results to retrieve. Default: 5"
},
"max_content_words": {
"type": "integer",
"description": "Maximum number of words to include in content per result. None means no limit. Default: None"
},
"max_summary_sentences": {
"type": "integer",
"description": "Maximum number of sentences in the summary. None means no limit. Default: None"
}
}
required: Optional[List[str]] = ["query"]
def __init__(self, search_wiki: SearchWiki = None):
super().__init__()
self.search_wiki = search_wiki
def __call__(self, query: str, num_search_pages: int = None, max_content_words: int = None, max_summary_sentences: int = None) -> Dict[str, Any]:
"""Execute Wikipedia search using the SearchWiki instance."""
if not self.search_wiki:
raise RuntimeError("Wikipedia search instance not initialized")
try:
return self.search_wiki.search(query, num_search_pages, max_content_words, max_summary_sentences)
except Exception as e:
return {"results": [], "error": f"Error executing Wikipedia search: {str(e)}"}
class WikipediaSearchToolkit(Toolkit):
def __init__(
self,
name: str = "WikipediaSearchToolkit",
num_search_pages: Optional[int] = 5,
max_content_words: Optional[int] = None,
max_summary_sentences: Optional[int] = None,
**kwargs
):
# Create the shared Wikipedia search instance
search_wiki = SearchWiki(
name="SearchWiki",
num_search_pages=num_search_pages,
max_content_words=max_content_words,
max_summary_sentences=max_summary_sentences,
**kwargs
)
# Create tools with the shared search instance
tools = [
WikipediaSearchTool(search_wiki=search_wiki)
]
# Initialize parent with tools
super().__init__(name=name, tools=tools)
# Store search_wiki as instance variable
self.search_wiki = search_wiki