File size: 9,998 Bytes
5374a2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
## Dux Distributed Global Search

from .search_base import SearchBase
from .tool import Tool,Toolkit
from ddgs import DDGS
from typing import Dict, Any, List, Optional
import pandas as pd
class SearchDDGS(SearchBase):
    """
    DDGS (Dux Distributed Global Search) tool that aggregates results from multiple search engines.
    Supports DuckDuckGo, Google, Bing, Brave, Yahoo, and other backends.
    """
    
    def __init__(
        self, 
        name: str = "SearchDDGS",
        num_search_pages: Optional[int] = 5, 
        max_content_words: Optional[int] = None,
        backend: str = "auto",
        region: str = "us-en",
        **kwargs 
    ):
        """
        Initialize the DDGS Search tool.
        
        Args:
            name (str): Name of the tool
            num_search_pages (int): Number of search results to retrieve
            max_content_words (int): Maximum number of words to include in content
            backend (str): Search backend(s) to use. Options: "auto", "duckduckgo", "google", "bing", "brave", "yahoo", etc.
            region (str): Search region (e.g., "us-en", "uk-en", "ru-ru")
            **kwargs: Additional keyword arguments for parent class initialization
        """
        super().__init__(name=name, num_search_pages=num_search_pages, max_content_words=max_content_words, **kwargs)
        self.backend = backend
        self.region = region

    def search(self, query: str, num_search_pages: int = None, max_content_words: int = None, backend: str = None, region: str = None) -> Dict[str, Any]:
        """
        Searches using DDGS for the given query and retrieves content from multiple pages.

        Args:
            query (str): The search query.
            num_search_pages (int): Number of search results to retrieve
            max_content_words (int): Maximum number of words to include in content, None means no limit
            backend (str): Search backend to use (overrides instance default)
            region (str): Search region to use (overrides instance default)

        Returns:
            Dict[str, Any]: Contains a list of search results and optional error message.
        """
        # Use class defaults
        num_search_pages = num_search_pages or self.num_search_pages
        max_content_words = max_content_words or self.max_content_words 
        backend = backend or self.backend
        region = region or self.region
            
        results = []
        try:
            # Step 1: Get search results using DDGS
            with DDGS() as ddgs:
                search_results = list(ddgs.text(
                    query, 
                    max_results=num_search_pages,
                    backend=backend,
                    region=region
                ))
            
            if not search_results:
                return {"results": [], "error": "No search results found."}
            
            # Step 2: Process each search result
            for result in search_results:
                try:
                    title = result.get('title', 'No Title')
                    url = result.get('href', '') or result.get('link', '') or result.get('url', '')
                    
                    # Always try to scrape the actual page content
                    if url and url.startswith(('http://', 'https://')):
                        try:
                            scraped_title, scraped_content = self._scrape_page(url)
                            if scraped_content:
                                title = scraped_title or title
                                content = scraped_content
                            else:
                                # Fall back to snippet if scraping fails
                                content = result.get('body', '')
                        except Exception:
                            # Fall back to snippet if scraping fails
                            content = result.get('body', '')
                    else:
                        # No valid URL, use snippet
                        content = result.get('body', '')
                    
                    if content:  # Ensure valid content exists
                        # Use the base class's content truncation method
                        display_content = self._truncate_content(content, max_content_words)
                            
                        results.append({
                            "title": title,
                            "content": display_content,
                            "url": url,
                        })
                        
                except Exception:
                    continue  # Skip results that cannot be processed

            return {"results": results, "error": None}
        
        except Exception as e:
            return {"results": [], "error": str(e)}
    

class DDGSSearchTool(Tool):
    name: str = "ddgs_search"
    description: str = "Search using DDGS (Dux Distributed Global Search) which aggregates results from multiple search engines including DuckDuckGo, Google, Bing, and others"
    inputs: Dict[str, Dict[str, str]] = {
        "query": {
            "type": "string",
            "description": "The search query to execute"
        },
        "num_search_pages": {
            "type": "integer",
            "description": "Number of search results to retrieve. Default: 5"
        },
        "max_content_words": {
            "type": "integer",
            "description": "Maximum number of words to include in content per result. None means no limit. Default: None"
        },
        "backend": {
            "type": "string",
            "description": "Search backend to use. Options: 'auto', 'duckduckgo', 'google', 'bing', 'brave', 'yahoo'. Default: 'auto'"
        },
        "region": {
            "type": "string",
            "description": "Search region (e.g., 'us-en', 'uk-en', 'ru-ru'). Default: 'us-en'"
        }
    }
    required: Optional[List[str]] = ["query"]
    
    def __init__(self, search_ddgs: SearchDDGS = None):
        super().__init__()
        self.search_ddgs = search_ddgs
    
    def __call__(self, query: str, num_search_pages: int = None, max_content_words: int = None, backend: str = None, region: str = None) -> Dict[str, Any]:
        """Execute DDGS search using the SearchDDGS instance."""
        if not self.search_ddgs:
            raise RuntimeError("DDGS search instance not initialized")
        
        try:
            return self.search_ddgs.search(query, num_search_pages, max_content_words, backend, region)
        except Exception as e:
            return {"results": [], "error": f"Error executing DDGS search: {str(e)}"}


class DDGSSearchToolkit(Toolkit):
    def __init__(
        self,
        name: str = "DDGSSearchToolkit",
        num_search_pages: Optional[int] = 5,
        max_content_words: Optional[int] = None,
        backend: str = "auto",
        region: str = "us-en",
        **kwargs
    ):
        # Create the shared DDGS search instance
        search_ddgs = SearchDDGS(
            name="DDGSSearch",
            num_search_pages=num_search_pages,
            max_content_words=max_content_words,
            backend=backend,
            region=region,
            **kwargs
        )
        
        # Create tools with the shared search instance
        tools = [
            DDGSSearchTool(search_ddgs=search_ddgs)
        ]
        
        # Initialize parent with tools
        super().__init__(name=name, tools=tools)
        
        # Store search_ddgs as instance variable
        self.search_ddgs = search_ddgs
    

class PERTSearchTool(Tool):
    name: str = "pert_search"
    description: str = "Search gene regulatory network and return the gene-gene pair"
    inputs: Dict[str, Dict[str, str]] = {
        "source_gene_name": {
            "type": "string",
            "description": "name of perturbed gene"
        },
        "target_gene_name": {
            "type": "string",
            "description": "name of targeted gene"
        },
        "cell_line": {
            "type": "string",
            "description": "Name of selected cell line"
        },
    }
    required: Optional[List[str]] = ["source_gene_name", "target_gene_name", "cell_line"]
    
    def __init__(self,sourcekey='k562', toplist = 20):
        super().__init__()
        self.toplist = toplist
        self.sourcekey = sourcekey
        self.filelist = pd.read_csv(f"/gpfs/radev/home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/{sourcekey}_processed_grn.csv",index_col=0)
    
    def __call__(self, source_gene_name: str, target_gene_name: str, cell_line: str) -> Dict[str, Any]:
        """Execute DDGS search using the SearchDDGS instance."""
        print(source_gene_name, target_gene_name, cell_line)
        gene_name = target_gene_name
        try:
            searchinfo =f'''The detected gene list and gene regulatory strength in cell line {self.sourcekey} is: '''
            finditem = self.filelist.loc[:,gene_name].sort_values(ascending=False).iloc[0:self.toplist]
            searchinfo += '''RegulatorGeneName TargetGeneName Score\n'''
            for name, sten in zip(finditem.index, finditem.values):
                searchinfo += f'''{name} {target_gene_name} {sten}\n'''
            print(searchinfo)
            return {"results": searchinfo}
        except Exception as e:
            return {"results": [], "error": f"Error executing Perturbation searching: {str(e)}"}
        
class PertToolkit(Toolkit):
    def __init__(
        self,
        name: str = "PertToolkit",
        sourcekey = "k562",
        toplist = 20,
        **kwargs
    ):
        # Create the shared DDGS search instance

        # Create tools with the shared search instance
        tools = [
            PERTSearchTool(sourcekey=sourcekey,toplist=toplist)
        ]
        
        # Initialize parent with tools
        super().__init__(name=name, tools=tools)