Spaces:
Sleeping
Sleeping
Update tools/SemanticScholar.py
Browse files- tools/SemanticScholar.py +60 -27
tools/SemanticScholar.py
CHANGED
|
@@ -1,41 +1,74 @@
|
|
|
|
|
| 1 |
from smolagents import Tool
|
| 2 |
-
from
|
| 3 |
|
| 4 |
-
class
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
name = "academic_paper_search"
|
| 6 |
-
description = "
|
|
|
|
| 7 |
inputs = {
|
| 8 |
-
"query": {"type": "string", "description": "Search query for academic papers (title, keywords, etc.)"}
|
| 9 |
}
|
| 10 |
output_type = "string"
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
def forward(self, query: str) -> str:
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
lines = []
|
| 25 |
-
for p in papers:
|
| 26 |
-
# --- THE FIX IS HERE ---
|
| 27 |
-
# Access attributes directly using dot notation (p.title, p.year, p.abstract)
|
| 28 |
-
# instead of the dictionary method p.get(...)
|
| 29 |
-
title = p.title
|
| 30 |
-
year = p.year
|
| 31 |
-
abstract = p.abstract if p.abstract else "" # Check if abstract exists
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
|
| 41 |
-
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
from smolagents import Tool
|
| 3 |
+
from googleapiclient.discovery import build
|
| 4 |
|
| 5 |
+
class GoogleAcademicSearchTool(Tool):
|
| 6 |
+
"""
|
| 7 |
+
A specialized search tool that uses the Google Custom Search Engine (CSE)
|
| 8 |
+
API to find academic papers by applying strong search filters (e.g., site:scholar.google.com).
|
| 9 |
+
This replaces the Semantic Scholar tool which was experiencing timeouts.
|
| 10 |
+
"""
|
| 11 |
name = "academic_paper_search"
|
| 12 |
+
description = "Searches for academic papers by restricting the Google search to academic domains (like Google Scholar) and returns relevant titles, snippets, and source links."
|
| 13 |
+
|
| 14 |
inputs = {
|
| 15 |
+
"query": {"type": "string", "description": "Search query for academic papers (title, keywords, author, etc.)"}
|
| 16 |
}
|
| 17 |
output_type = "string"
|
| 18 |
|
| 19 |
+
def __init__(self, **kwargs):
|
| 20 |
+
super().__init__(**kwargs)
|
| 21 |
+
|
| 22 |
+
# Retrieve credentials from environment variables
|
| 23 |
+
self.api_key = os.getenv("GOOGLE_API_KEY")
|
| 24 |
+
self.cse_id = os.getenv("GOOGLE_CSE_ID")
|
| 25 |
+
|
| 26 |
+
if not self.api_key or not self.cse_id:
|
| 27 |
+
raise ValueError("GOOGLE_API_KEY or GOOGLE_CSE_ID secret not found.")
|
| 28 |
+
|
| 29 |
+
# Initialize the Google Custom Search service
|
| 30 |
+
self.service = build(
|
| 31 |
+
"customsearch", "v1", developerKey=self.api_key
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
def forward(self, query: str) -> str:
|
| 35 |
+
"""
|
| 36 |
+
Executes a Google search query, restricting results to academic domains.
|
| 37 |
+
"""
|
| 38 |
+
print(f"Executing Google Academic search for: '{query}'")
|
| 39 |
+
|
| 40 |
+
# Modify the query to prioritize academic sources
|
| 41 |
+
# We use OR to check multiple popular domains for better coverage
|
| 42 |
+
academic_filter = "site:scholar.google.com OR site:researchgate.net OR site:pubmed.ncbi.nlm.nih.gov"
|
| 43 |
+
google_academic_query = f"{query} {academic_filter}"
|
| 44 |
|
| 45 |
+
try:
|
| 46 |
+
# Execute the search request for up to 3 results
|
| 47 |
+
res = self.service.cse().list(
|
| 48 |
+
q=google_academic_query,
|
| 49 |
+
cx=self.cse_id,
|
| 50 |
+
num=3
|
| 51 |
+
).execute()
|
| 52 |
|
| 53 |
+
items = res.get('items', [])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
if not items:
|
| 56 |
+
return "XX record info: No academic results found using Google search filters."
|
| 57 |
|
| 58 |
+
search_results = []
|
| 59 |
+
for i, item in enumerate(items):
|
| 60 |
+
title = item.get('title', 'N/A')
|
| 61 |
+
snippet = item.get('snippet', 'No snippet available.')
|
| 62 |
+
link = item.get('link', 'N/A')
|
| 63 |
+
|
| 64 |
+
# Format similar to the previous academic tool
|
| 65 |
+
search_results.append(
|
| 66 |
+
f"RESULT {i+1}: **{title}**\n"
|
| 67 |
+
f"CONTENT: {snippet}\n"
|
| 68 |
+
f"SOURCE: {link}"
|
| 69 |
+
)
|
| 70 |
|
| 71 |
+
return "\n\n---SEPARATOR---\n\n".join(search_results)
|
| 72 |
|
| 73 |
+
except Exception as e:
|
| 74 |
+
return f"Error during Google Academic Search API call: {e}"
|