|
|
|
|
|
|
|
|
from .search_base import SearchBase |
|
|
from .tool import Tool,Toolkit |
|
|
from ddgs import DDGS |
|
|
from typing import Dict, Any, List, Optional |
|
|
import pandas as pd |
|
|
class SearchDDGS(SearchBase): |
|
|
""" |
|
|
DDGS (Dux Distributed Global Search) tool that aggregates results from multiple search engines. |
|
|
Supports DuckDuckGo, Google, Bing, Brave, Yahoo, and other backends. |
|
|
""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
name: str = "SearchDDGS", |
|
|
num_search_pages: Optional[int] = 5, |
|
|
max_content_words: Optional[int] = None, |
|
|
backend: str = "auto", |
|
|
region: str = "us-en", |
|
|
**kwargs |
|
|
): |
|
|
""" |
|
|
Initialize the DDGS Search tool. |
|
|
|
|
|
Args: |
|
|
name (str): Name of the tool |
|
|
num_search_pages (int): Number of search results to retrieve |
|
|
max_content_words (int): Maximum number of words to include in content |
|
|
backend (str): Search backend(s) to use. Options: "auto", "duckduckgo", "google", "bing", "brave", "yahoo", etc. |
|
|
region (str): Search region (e.g., "us-en", "uk-en", "ru-ru") |
|
|
**kwargs: Additional keyword arguments for parent class initialization |
|
|
""" |
|
|
super().__init__(name=name, num_search_pages=num_search_pages, max_content_words=max_content_words, **kwargs) |
|
|
self.backend = backend |
|
|
self.region = region |
|
|
|
|
|
def search(self, query: str, num_search_pages: int = None, max_content_words: int = None, backend: str = None, region: str = None) -> Dict[str, Any]: |
|
|
""" |
|
|
Searches using DDGS for the given query and retrieves content from multiple pages. |
|
|
|
|
|
Args: |
|
|
query (str): The search query. |
|
|
num_search_pages (int): Number of search results to retrieve |
|
|
max_content_words (int): Maximum number of words to include in content, None means no limit |
|
|
backend (str): Search backend to use (overrides instance default) |
|
|
region (str): Search region to use (overrides instance default) |
|
|
|
|
|
Returns: |
|
|
Dict[str, Any]: Contains a list of search results and optional error message. |
|
|
""" |
|
|
|
|
|
num_search_pages = num_search_pages or self.num_search_pages |
|
|
max_content_words = max_content_words or self.max_content_words |
|
|
backend = backend or self.backend |
|
|
region = region or self.region |
|
|
|
|
|
results = [] |
|
|
try: |
|
|
|
|
|
with DDGS() as ddgs: |
|
|
search_results = list(ddgs.text( |
|
|
query, |
|
|
max_results=num_search_pages, |
|
|
backend=backend, |
|
|
region=region |
|
|
)) |
|
|
|
|
|
if not search_results: |
|
|
return {"results": [], "error": "No search results found."} |
|
|
|
|
|
|
|
|
for result in search_results: |
|
|
try: |
|
|
title = result.get('title', 'No Title') |
|
|
url = result.get('href', '') or result.get('link', '') or result.get('url', '') |
|
|
|
|
|
|
|
|
if url and url.startswith(('http://', 'https://')): |
|
|
try: |
|
|
scraped_title, scraped_content = self._scrape_page(url) |
|
|
if scraped_content: |
|
|
title = scraped_title or title |
|
|
content = scraped_content |
|
|
else: |
|
|
|
|
|
content = result.get('body', '') |
|
|
except Exception: |
|
|
|
|
|
content = result.get('body', '') |
|
|
else: |
|
|
|
|
|
content = result.get('body', '') |
|
|
|
|
|
if content: |
|
|
|
|
|
display_content = self._truncate_content(content, max_content_words) |
|
|
|
|
|
results.append({ |
|
|
"title": title, |
|
|
"content": display_content, |
|
|
"url": url, |
|
|
}) |
|
|
|
|
|
except Exception: |
|
|
continue |
|
|
|
|
|
return {"results": results, "error": None} |
|
|
|
|
|
except Exception as e: |
|
|
return {"results": [], "error": str(e)} |
|
|
|
|
|
|
|
|
class DDGSSearchTool(Tool): |
|
|
name: str = "ddgs_search" |
|
|
description: str = "Search using DDGS (Dux Distributed Global Search) which aggregates results from multiple search engines including DuckDuckGo, Google, Bing, and others" |
|
|
inputs: Dict[str, Dict[str, str]] = { |
|
|
"query": { |
|
|
"type": "string", |
|
|
"description": "The search query to execute" |
|
|
}, |
|
|
"num_search_pages": { |
|
|
"type": "integer", |
|
|
"description": "Number of search results to retrieve. Default: 5" |
|
|
}, |
|
|
"max_content_words": { |
|
|
"type": "integer", |
|
|
"description": "Maximum number of words to include in content per result. None means no limit. Default: None" |
|
|
}, |
|
|
"backend": { |
|
|
"type": "string", |
|
|
"description": "Search backend to use. Options: 'auto', 'duckduckgo', 'google', 'bing', 'brave', 'yahoo'. Default: 'auto'" |
|
|
}, |
|
|
"region": { |
|
|
"type": "string", |
|
|
"description": "Search region (e.g., 'us-en', 'uk-en', 'ru-ru'). Default: 'us-en'" |
|
|
} |
|
|
} |
|
|
required: Optional[List[str]] = ["query"] |
|
|
|
|
|
def __init__(self, search_ddgs: SearchDDGS = None): |
|
|
super().__init__() |
|
|
self.search_ddgs = search_ddgs |
|
|
|
|
|
def __call__(self, query: str, num_search_pages: int = None, max_content_words: int = None, backend: str = None, region: str = None) -> Dict[str, Any]: |
|
|
"""Execute DDGS search using the SearchDDGS instance.""" |
|
|
if not self.search_ddgs: |
|
|
raise RuntimeError("DDGS search instance not initialized") |
|
|
|
|
|
try: |
|
|
return self.search_ddgs.search(query, num_search_pages, max_content_words, backend, region) |
|
|
except Exception as e: |
|
|
return {"results": [], "error": f"Error executing DDGS search: {str(e)}"} |
|
|
|
|
|
|
|
|
class DDGSSearchToolkit(Toolkit): |
|
|
def __init__( |
|
|
self, |
|
|
name: str = "DDGSSearchToolkit", |
|
|
num_search_pages: Optional[int] = 5, |
|
|
max_content_words: Optional[int] = None, |
|
|
backend: str = "auto", |
|
|
region: str = "us-en", |
|
|
**kwargs |
|
|
): |
|
|
|
|
|
search_ddgs = SearchDDGS( |
|
|
name="DDGSSearch", |
|
|
num_search_pages=num_search_pages, |
|
|
max_content_words=max_content_words, |
|
|
backend=backend, |
|
|
region=region, |
|
|
**kwargs |
|
|
) |
|
|
|
|
|
|
|
|
tools = [ |
|
|
DDGSSearchTool(search_ddgs=search_ddgs) |
|
|
] |
|
|
|
|
|
|
|
|
super().__init__(name=name, tools=tools) |
|
|
|
|
|
|
|
|
self.search_ddgs = search_ddgs |
|
|
|
|
|
|
|
|
class PERTSearchTool(Tool): |
|
|
name: str = "pert_search" |
|
|
description: str = "Search gene regulatory network and return the gene-gene pair" |
|
|
inputs: Dict[str, Dict[str, str]] = { |
|
|
"source_gene_name": { |
|
|
"type": "string", |
|
|
"description": "name of perturbed gene" |
|
|
}, |
|
|
"target_gene_name": { |
|
|
"type": "string", |
|
|
"description": "name of targeted gene" |
|
|
}, |
|
|
"cell_line": { |
|
|
"type": "string", |
|
|
"description": "Name of selected cell line" |
|
|
}, |
|
|
} |
|
|
required: Optional[List[str]] = ["source_gene_name", "target_gene_name", "cell_line"] |
|
|
|
|
|
def __init__(self,sourcekey='k562', toplist = 20): |
|
|
super().__init__() |
|
|
self.toplist = toplist |
|
|
self.sourcekey = sourcekey |
|
|
self.filelist = pd.read_csv(f"/gpfs/radev/home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/{sourcekey}_processed_grn.csv",index_col=0) |
|
|
|
|
|
def __call__(self, source_gene_name: str, target_gene_name: str, cell_line: str) -> Dict[str, Any]: |
|
|
"""Execute DDGS search using the SearchDDGS instance.""" |
|
|
print(source_gene_name, target_gene_name, cell_line) |
|
|
gene_name = target_gene_name |
|
|
try: |
|
|
searchinfo =f'''The detected gene list and gene regulatory strength in cell line {self.sourcekey} is: ''' |
|
|
finditem = self.filelist.loc[:,gene_name].sort_values(ascending=False).iloc[0:self.toplist] |
|
|
searchinfo += '''RegulatorGeneName TargetGeneName Score\n''' |
|
|
for name, sten in zip(finditem.index, finditem.values): |
|
|
searchinfo += f'''{name} {target_gene_name} {sten}\n''' |
|
|
print(searchinfo) |
|
|
return {"results": searchinfo} |
|
|
except Exception as e: |
|
|
return {"results": [], "error": f"Error executing Perturbation searching: {str(e)}"} |
|
|
|
|
|
class PertToolkit(Toolkit): |
|
|
def __init__( |
|
|
self, |
|
|
name: str = "PertToolkit", |
|
|
sourcekey = "k562", |
|
|
toplist = 20, |
|
|
**kwargs |
|
|
): |
|
|
|
|
|
|
|
|
|
|
|
tools = [ |
|
|
PERTSearchTool(sourcekey=sourcekey,toplist=toplist) |
|
|
] |
|
|
|
|
|
|
|
|
super().__init__(name=name, tools=tools) |
|
|
|
|
|
|