File size: 7,053 Bytes
5374a2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import wikipedia
from .search_base import SearchBase
from .tool import Tool,Toolkit
from typing import Dict, Any, Optional, List
from pydantic import Field
from ..core.logging import logger


class SearchWiki(SearchBase):

    max_summary_sentences: Optional[int] = Field(default=None, description="Maximum number of sentences in the summary. Default None means return all available content.")
    
    def __init__(
        self, 
        name: str = 'SearchWiki',
        num_search_pages: Optional[int] = 5, 
        max_content_words: Optional[int] = None,
        max_summary_sentences: Optional[int] = None,
        **kwargs
    ):
        """
        Initialize the Wikipedia Search tool.
        
        Args:
            name (str): The name of the search tool
            num_search_pages (int): Number of search results to retrieve
            max_content_words (int, optional): Maximum number of words to include in content, None means no limit
            max_summary_sentences (int, optional): Maximum number of sentences in the summary, None means no limit
            **kwargs: Additional data to pass to the parent class
        """

        super().__init__(
            name=name,
            num_search_pages=num_search_pages,
            max_content_words=max_content_words,
            max_summary_sentences=max_summary_sentences,
            **kwargs
        )

    def search(self, query: str, num_search_pages: int = None, max_content_words: int = None, max_summary_sentences: int = None) -> Dict[str, Any]:
        """
        Searches Wikipedia for the given query and returns the summary and truncated full content.

        Args:
            query (str): The search query.
            num_search_pages (int): Number of search results to retrieve
            max_content_words (int): Maximum number of words to include in content, None means no limit
            max_summary_sentences (int): Maximum number of sentences in the summary, None means no limit

        Returns:
            dict: A dictionary with the title, summary, truncated content, and Wikipedia page link.
        """
        num_search_pages = num_search_pages or self.num_search_pages
        max_content_words = max_content_words or self.max_content_words
        max_summary_sentences = max_summary_sentences or self.max_summary_sentences
            
        try:
            logger.info(f"Searching wikipedia: {query}, num_results={num_search_pages}, max_content_words={max_content_words}, max_summary_sentences={max_summary_sentences}")
            # Search for top matching titles
            search_results = wikipedia.search(query, results=num_search_pages)
            logger.info(f"Search results: {search_results}")
            if not search_results:
                return {"results": [], "error": "No search results found."}

            # Try fetching the best available page
            results = []
            for title in search_results:
                try:
                    page = wikipedia.page(title, auto_suggest=False)
                    
                    # Handle the max_summary_sentences parameter
                    if max_summary_sentences is not None and max_summary_sentences > 0:
                        summary = wikipedia.summary(title, sentences=max_summary_sentences)
                    else:
                        # Get the full summary without limiting sentences
                        summary = wikipedia.summary(title)

                    # Use the base class's content truncation method
                    display_content = self._truncate_content(page.content, max_content_words)
                    
                    results.append({
                        "title": page.title,
                        "summary": summary,
                        "content": display_content,
                        "url": page.url,
                    })
                except wikipedia.exceptions.DisambiguationError:
                    # Skip ambiguous results and try the next
                    continue
                except wikipedia.exceptions.PageError:
                    # Skip non-existing pages and try the next
                    continue
            
            # logger.info(f"get results from wikipedia: {results}")
            return {"results": results, "error": None}
        
        except Exception as e:
            logger.error(f"Error searching Wikipedia: {str(e)}")
            return {"results": [], "error": str(e)}
    

class WikipediaSearchTool(Tool):
    name: str = "wikipedia_search"
    description: str = "Search Wikipedia for relevant articles and content"
    inputs: Dict[str, Dict[str, str]] = {
        "query": {
            "type": "string",
            "description": "The search query to look up on Wikipedia"
        },
        "num_search_pages": {
            "type": "integer",
            "description": "Number of search results to retrieve. Default: 5"
        },
        "max_content_words": {
            "type": "integer",
            "description": "Maximum number of words to include in content per result. None means no limit. Default: None"
        },
        "max_summary_sentences": {
            "type": "integer",
            "description": "Maximum number of sentences in the summary. None means no limit. Default: None"
        }
    }
    required: Optional[List[str]] = ["query"]
    
    def __init__(self, search_wiki: SearchWiki = None):
        super().__init__()
        self.search_wiki = search_wiki
    
    def __call__(self, query: str, num_search_pages: int = None, max_content_words: int = None, max_summary_sentences: int = None) -> Dict[str, Any]:
        """Execute Wikipedia search using the SearchWiki instance."""
        if not self.search_wiki:
            raise RuntimeError("Wikipedia search instance not initialized")
        
        try:
            return self.search_wiki.search(query, num_search_pages, max_content_words, max_summary_sentences)
        except Exception as e:
            return {"results": [], "error": f"Error executing Wikipedia search: {str(e)}"}


class WikipediaSearchToolkit(Toolkit):
    def __init__(
        self,
        name: str = "WikipediaSearchToolkit",
        num_search_pages: Optional[int] = 5,
        max_content_words: Optional[int] = None,
        max_summary_sentences: Optional[int] = None,
        **kwargs
    ):
        # Create the shared Wikipedia search instance
        search_wiki = SearchWiki(
            name="SearchWiki",
            num_search_pages=num_search_pages,
            max_content_words=max_content_words,
            max_summary_sentences=max_summary_sentences,
            **kwargs
        )
        
        # Create tools with the shared search instance
        tools = [
            WikipediaSearchTool(search_wiki=search_wiki)
        ]
        
        # Initialize parent with tools
        super().__init__(name=name, tools=tools)
        
        # Store search_wiki as instance variable
        self.search_wiki = search_wiki