File size: 7,053 Bytes
5374a2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
import wikipedia
from .search_base import SearchBase
from .tool import Tool,Toolkit
from typing import Dict, Any, Optional, List
from pydantic import Field
from ..core.logging import logger
class SearchWiki(SearchBase):
max_summary_sentences: Optional[int] = Field(default=None, description="Maximum number of sentences in the summary. Default None means return all available content.")
def __init__(
self,
name: str = 'SearchWiki',
num_search_pages: Optional[int] = 5,
max_content_words: Optional[int] = None,
max_summary_sentences: Optional[int] = None,
**kwargs
):
"""
Initialize the Wikipedia Search tool.
Args:
name (str): The name of the search tool
num_search_pages (int): Number of search results to retrieve
max_content_words (int, optional): Maximum number of words to include in content, None means no limit
max_summary_sentences (int, optional): Maximum number of sentences in the summary, None means no limit
**kwargs: Additional data to pass to the parent class
"""
super().__init__(
name=name,
num_search_pages=num_search_pages,
max_content_words=max_content_words,
max_summary_sentences=max_summary_sentences,
**kwargs
)
def search(self, query: str, num_search_pages: int = None, max_content_words: int = None, max_summary_sentences: int = None) -> Dict[str, Any]:
"""
Searches Wikipedia for the given query and returns the summary and truncated full content.
Args:
query (str): The search query.
num_search_pages (int): Number of search results to retrieve
max_content_words (int): Maximum number of words to include in content, None means no limit
max_summary_sentences (int): Maximum number of sentences in the summary, None means no limit
Returns:
dict: A dictionary with the title, summary, truncated content, and Wikipedia page link.
"""
num_search_pages = num_search_pages or self.num_search_pages
max_content_words = max_content_words or self.max_content_words
max_summary_sentences = max_summary_sentences or self.max_summary_sentences
try:
logger.info(f"Searching wikipedia: {query}, num_results={num_search_pages}, max_content_words={max_content_words}, max_summary_sentences={max_summary_sentences}")
# Search for top matching titles
search_results = wikipedia.search(query, results=num_search_pages)
logger.info(f"Search results: {search_results}")
if not search_results:
return {"results": [], "error": "No search results found."}
# Try fetching the best available page
results = []
for title in search_results:
try:
page = wikipedia.page(title, auto_suggest=False)
# Handle the max_summary_sentences parameter
if max_summary_sentences is not None and max_summary_sentences > 0:
summary = wikipedia.summary(title, sentences=max_summary_sentences)
else:
# Get the full summary without limiting sentences
summary = wikipedia.summary(title)
# Use the base class's content truncation method
display_content = self._truncate_content(page.content, max_content_words)
results.append({
"title": page.title,
"summary": summary,
"content": display_content,
"url": page.url,
})
except wikipedia.exceptions.DisambiguationError:
# Skip ambiguous results and try the next
continue
except wikipedia.exceptions.PageError:
# Skip non-existing pages and try the next
continue
# logger.info(f"get results from wikipedia: {results}")
return {"results": results, "error": None}
except Exception as e:
logger.error(f"Error searching Wikipedia: {str(e)}")
return {"results": [], "error": str(e)}
class WikipediaSearchTool(Tool):
name: str = "wikipedia_search"
description: str = "Search Wikipedia for relevant articles and content"
inputs: Dict[str, Dict[str, str]] = {
"query": {
"type": "string",
"description": "The search query to look up on Wikipedia"
},
"num_search_pages": {
"type": "integer",
"description": "Number of search results to retrieve. Default: 5"
},
"max_content_words": {
"type": "integer",
"description": "Maximum number of words to include in content per result. None means no limit. Default: None"
},
"max_summary_sentences": {
"type": "integer",
"description": "Maximum number of sentences in the summary. None means no limit. Default: None"
}
}
required: Optional[List[str]] = ["query"]
def __init__(self, search_wiki: SearchWiki = None):
super().__init__()
self.search_wiki = search_wiki
def __call__(self, query: str, num_search_pages: int = None, max_content_words: int = None, max_summary_sentences: int = None) -> Dict[str, Any]:
"""Execute Wikipedia search using the SearchWiki instance."""
if not self.search_wiki:
raise RuntimeError("Wikipedia search instance not initialized")
try:
return self.search_wiki.search(query, num_search_pages, max_content_words, max_summary_sentences)
except Exception as e:
return {"results": [], "error": f"Error executing Wikipedia search: {str(e)}"}
class WikipediaSearchToolkit(Toolkit):
def __init__(
self,
name: str = "WikipediaSearchToolkit",
num_search_pages: Optional[int] = 5,
max_content_words: Optional[int] = None,
max_summary_sentences: Optional[int] = None,
**kwargs
):
# Create the shared Wikipedia search instance
search_wiki = SearchWiki(
name="SearchWiki",
num_search_pages=num_search_pages,
max_content_words=max_content_words,
max_summary_sentences=max_summary_sentences,
**kwargs
)
# Create tools with the shared search instance
tools = [
WikipediaSearchTool(search_wiki=search_wiki)
]
# Initialize parent with tools
super().__init__(name=name, tools=tools)
# Store search_wiki as instance variable
self.search_wiki = search_wiki
|