Sborole commited on
Commit
7de8b44
·
verified ·
1 Parent(s): b0cdee0

Update tools/SemanticScholar.py

Browse files
Files changed (1) hide show
  1. tools/SemanticScholar.py +30 -57
tools/SemanticScholar.py CHANGED
@@ -2,73 +2,46 @@ import os
2
  from smolagents import Tool
3
  from googleapiclient.discovery import build
4
 
5
- class GoogleAcademicSearchTool(Tool):
6
  """
7
- A specialized search tool that uses the Google Custom Search Engine (CSE)
8
- API to find academic papers by applying strong search filters (e.g., site:scholar.google.com).
9
- This replaces the Semantic Scholar tool which was experiencing timeouts.
10
  """
11
- name = "academic_paper_search"
12
- description = "Searches for academic papers by restricting the Google search to academic domains (like Google Scholar) and returns relevant titles, snippets, and source links."
13
-
 
 
 
 
14
  inputs = {
15
- "query": {"type": "string", "description": "Search query for academic papers (title, keywords, author, etc.)"}
16
  }
 
17
  output_type = "string"
18
 
19
  def __init__(self, **kwargs):
20
  super().__init__(**kwargs)
21
-
22
- # Retrieve credentials from environment variables
23
- self.api_key = os.getenv("GOOGLE_API_KEY")
24
- self.cse_id = os.getenv("GOOGLE_CSE_ID")
25
-
26
- if not self.api_key or not self.cse_id:
27
- raise ValueError("GOOGLE_API_KEY or GOOGLE_CSE_ID secret not found.")
28
-
29
- # Initialize the Google Custom Search service
30
- self.service = build(
31
- "customsearch", "v1", developerKey=self.api_key
32
- )
33
 
34
  def forward(self, query: str) -> str:
35
- """
36
- Executes a Google search query, restricting results to academic domains.
37
- """
38
- print(f"Executing Google Academic search for: '{query}'")
39
-
40
- # Modify the query to prioritize academic sources
41
- # We use OR to check multiple popular domains for better coverage
42
- academic_filter = "site:scholar.google.com OR site:researchgate.net OR site:pubmed.ncbi.nlm.nih.gov"
43
- google_academic_query = f"{query} {academic_filter}"
44
-
45
  try:
46
- # Execute the search request for up to 3 results
47
- res = self.service.cse().list(
48
- q=google_academic_query,
49
- cx=self.cse_id,
50
- num=3
51
- ).execute()
52
-
53
- items = res.get('items', [])
54
-
55
- if not items:
56
- return "XX record info: No academic results found using Google search filters."
57
-
58
- search_results = []
59
- for i, item in enumerate(items):
60
- title = item.get('title', 'N/A')
61
- snippet = item.get('snippet', 'No snippet available.')
62
- link = item.get('link', 'N/A')
63
-
64
- # Format similar to the previous academic tool
65
- search_results.append(
66
- f"RESULT {i+1}: **{title}**\n"
67
- f"CONTENT: {snippet}\n"
68
- f"SOURCE: {link}"
69
  )
70
-
71
- return "\n\n---SEPARATOR---\n\n".join(search_results)
72
-
73
  except Exception as e:
74
- return f"Error during Google Academic Search API call: {e}"
 
2
  from smolagents import Tool
3
  from googleapiclient.discovery import build
4
 
5
+ class TavilyResearchTool(Tool):
6
  """
7
+ Tavily deep-research search tool.
8
+ Use this when the question needs academic papers,
9
+ scientific background or research-level accuracy.
10
  """
11
+
12
+ name = "tavily_research"
13
+ description = (
14
+ "Use Tavily deep research mode to find academic-level content "
15
+ "including papers, research summaries, and high-quality sources."
16
+ )
17
+
18
  inputs = {
19
+ "query": {"type": "string", "description": "Research topic to search"}
20
  }
21
+
22
  output_type = "string"
23
 
24
  def __init__(self, **kwargs):
25
  super().__init__(**kwargs)
26
+ api_key = os.getenv("TAVILY_API_KEY")
27
+ if not api_key:
28
+ raise ValueError("Missing TAVILY_API_KEY.")
29
+ self.client = TavilyClient(api_key=api_key)
 
 
 
 
 
 
 
 
30
 
31
  def forward(self, query: str) -> str:
 
 
 
 
 
 
 
 
 
 
32
  try:
33
+ response = self.client.search(
34
+ query=query,
35
+ search_depth="advanced", # <-- academic/longform mode
36
+ max_results=5
37
+ )
38
+ out = []
39
+ for r in response.get("results", []):
40
+ out.append(
41
+ f"TITLE: {r.get('title')}\n"
42
+ f"CONTENT: {r.get('content')}\n"
43
+ f"SOURCE: {r.get('url')}"
 
 
 
 
 
 
 
 
 
 
 
 
44
  )
45
+ return "\n\n---SEPARATOR---\n\n".join(out)
 
 
46
  except Exception as e:
47
+ return f"Tavily research error: {e}"