Sborole commited on
Commit
842a0b0
·
verified ·
1 Parent(s): c7685dc

Update tools/SemanticScholar.py

Browse files
Files changed (1) hide show
  1. tools/SemanticScholar.py +60 -27
tools/SemanticScholar.py CHANGED
@@ -1,41 +1,74 @@
 
1
  from smolagents import Tool
2
- from semanticscholar import SemanticScholar
3
 
4
- class AcademicPaperSearchTool(Tool):
 
 
 
 
 
5
  name = "academic_paper_search"
6
- description = "Use Websearch first for getting factual data. If needed use this tool to Search academic papers via Semantic Scholar and returns the most relevant titles and abstracts."
 
7
  inputs = {
8
- "query": {"type": "string", "description": "Search query for academic papers (title, keywords, etc.)"}
9
  }
10
  output_type = "string"
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def forward(self, query: str) -> str:
13
- sch = SemanticScholar()
14
- try:
15
- # Use query=query to avoid potential keyword issues with the SemanticScholar method
16
- papers = sch.search_paper(query, limit=3) # get top-3 matching papers
17
- except Exception as e:
18
- return f"An error occurred during search: {e}"
 
 
 
19
 
20
- if not papers:
21
- return "No papers found."
 
 
 
 
 
22
 
23
- # Format results concisely
24
- lines = []
25
- for p in papers:
26
- # --- THE FIX IS HERE ---
27
- # Access attributes directly using dot notation (p.title, p.year, p.abstract)
28
- # instead of the dictionary method p.get(...)
29
- title = p.title
30
- year = p.year
31
- abstract = p.abstract if p.abstract else "" # Check if abstract exists
32
 
33
- # Only take the first sentence or two of the abstract
34
- abstract_snip = abstract.split(". ")[0] if abstract else ""
35
 
36
- # Ensure 'year' is handled gracefully (it can sometimes be None)
37
- year_str = str(year) if year else "N/A"
 
 
 
 
 
 
 
 
 
 
38
 
39
- lines.append(f"**{title}** ({year_str}): {abstract_snip}...")
40
 
41
- return "\n".join(lines)
 
 
1
+ import os
2
  from smolagents import Tool
3
+ from googleapiclient.discovery import build
4
 
5
+ class GoogleAcademicSearchTool(Tool):
6
+ """
7
+ A specialized search tool that uses the Google Custom Search Engine (CSE)
8
+ API to find academic papers by applying strong search filters (e.g., site:scholar.google.com).
9
+ This replaces the Semantic Scholar tool which was experiencing timeouts.
10
+ """
11
  name = "academic_paper_search"
12
+ description = "Searches for academic papers by restricting the Google search to academic domains (like Google Scholar) and returns relevant titles, snippets, and source links."
13
+
14
  inputs = {
15
+ "query": {"type": "string", "description": "Search query for academic papers (title, keywords, author, etc.)"}
16
  }
17
  output_type = "string"
18
 
19
+ def __init__(self, **kwargs):
20
+ super().__init__(**kwargs)
21
+
22
+ # Retrieve credentials from environment variables
23
+ self.api_key = os.getenv("GOOGLE_API_KEY")
24
+ self.cse_id = os.getenv("GOOGLE_CSE_ID")
25
+
26
+ if not self.api_key or not self.cse_id:
27
+ raise ValueError("GOOGLE_API_KEY or GOOGLE_CSE_ID secret not found.")
28
+
29
+ # Initialize the Google Custom Search service
30
+ self.service = build(
31
+ "customsearch", "v1", developerKey=self.api_key
32
+ )
33
+
34
  def forward(self, query: str) -> str:
35
+ """
36
+ Executes a Google search query, restricting results to academic domains.
37
+ """
38
+ print(f"Executing Google Academic search for: '{query}'")
39
+
40
+ # Modify the query to prioritize academic sources
41
+ # We use OR to check multiple popular domains for better coverage
42
+ academic_filter = "site:scholar.google.com OR site:researchgate.net OR site:pubmed.ncbi.nlm.nih.gov"
43
+ google_academic_query = f"{query} {academic_filter}"
44
 
45
+ try:
46
+ # Execute the search request for up to 3 results
47
+ res = self.service.cse().list(
48
+ q=google_academic_query,
49
+ cx=self.cse_id,
50
+ num=3
51
+ ).execute()
52
 
53
+ items = res.get('items', [])
 
 
 
 
 
 
 
 
54
 
55
+ if not items:
56
+ return "XX record info: No academic results found using Google search filters."
57
 
58
+ search_results = []
59
+ for i, item in enumerate(items):
60
+ title = item.get('title', 'N/A')
61
+ snippet = item.get('snippet', 'No snippet available.')
62
+ link = item.get('link', 'N/A')
63
+
64
+ # Format similar to the previous academic tool
65
+ search_results.append(
66
+ f"RESULT {i+1}: **{title}**\n"
67
+ f"CONTENT: {snippet}\n"
68
+ f"SOURCE: {link}"
69
+ )
70
 
71
+ return "\n\n---SEPARATOR---\n\n".join(search_results)
72
 
73
+ except Exception as e:
74
+ return f"Error during Google Academic Search API call: {e}"