Spaces:
Sleeping
Sleeping
Update tools/SemanticScholar.py
Browse files- tools/SemanticScholar.py +30 -57
tools/SemanticScholar.py
CHANGED
|
@@ -2,73 +2,46 @@ import os
|
|
| 2 |
from smolagents import Tool
|
| 3 |
from googleapiclient.discovery import build
|
| 4 |
|
| 5 |
-
class
|
| 6 |
"""
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
"""
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
inputs = {
|
| 15 |
-
"query": {"type": "string", "description": "
|
| 16 |
}
|
|
|
|
| 17 |
output_type = "string"
|
| 18 |
|
| 19 |
def __init__(self, **kwargs):
|
| 20 |
super().__init__(**kwargs)
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
self.
|
| 25 |
-
|
| 26 |
-
if not self.api_key or not self.cse_id:
|
| 27 |
-
raise ValueError("GOOGLE_API_KEY or GOOGLE_CSE_ID secret not found.")
|
| 28 |
-
|
| 29 |
-
# Initialize the Google Custom Search service
|
| 30 |
-
self.service = build(
|
| 31 |
-
"customsearch", "v1", developerKey=self.api_key
|
| 32 |
-
)
|
| 33 |
|
| 34 |
def forward(self, query: str) -> str:
|
| 35 |
-
"""
|
| 36 |
-
Executes a Google search query, restricting results to academic domains.
|
| 37 |
-
"""
|
| 38 |
-
print(f"Executing Google Academic search for: '{query}'")
|
| 39 |
-
|
| 40 |
-
# Modify the query to prioritize academic sources
|
| 41 |
-
# We use OR to check multiple popular domains for better coverage
|
| 42 |
-
academic_filter = "site:scholar.google.com OR site:researchgate.net OR site:pubmed.ncbi.nlm.nih.gov"
|
| 43 |
-
google_academic_query = f"{query} {academic_filter}"
|
| 44 |
-
|
| 45 |
try:
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
search_results = []
|
| 59 |
-
for i, item in enumerate(items):
|
| 60 |
-
title = item.get('title', 'N/A')
|
| 61 |
-
snippet = item.get('snippet', 'No snippet available.')
|
| 62 |
-
link = item.get('link', 'N/A')
|
| 63 |
-
|
| 64 |
-
# Format similar to the previous academic tool
|
| 65 |
-
search_results.append(
|
| 66 |
-
f"RESULT {i+1}: **{title}**\n"
|
| 67 |
-
f"CONTENT: {snippet}\n"
|
| 68 |
-
f"SOURCE: {link}"
|
| 69 |
)
|
| 70 |
-
|
| 71 |
-
return "\n\n---SEPARATOR---\n\n".join(search_results)
|
| 72 |
-
|
| 73 |
except Exception as e:
|
| 74 |
-
return f"
|
|
|
|
| 2 |
from smolagents import Tool
|
| 3 |
from googleapiclient.discovery import build
|
| 4 |
|
| 5 |
+
class TavilyResearchTool(Tool):
|
| 6 |
"""
|
| 7 |
+
Tavily deep-research search tool.
|
| 8 |
+
Use this when the question needs academic papers,
|
| 9 |
+
scientific background or research-level accuracy.
|
| 10 |
"""
|
| 11 |
+
|
| 12 |
+
name = "tavily_research"
|
| 13 |
+
description = (
|
| 14 |
+
"Use Tavily deep research mode to find academic-level content "
|
| 15 |
+
"including papers, research summaries, and high-quality sources."
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
inputs = {
|
| 19 |
+
"query": {"type": "string", "description": "Research topic to search"}
|
| 20 |
}
|
| 21 |
+
|
| 22 |
output_type = "string"
|
| 23 |
|
| 24 |
def __init__(self, **kwargs):
|
| 25 |
super().__init__(**kwargs)
|
| 26 |
+
api_key = os.getenv("TAVILY_API_KEY")
|
| 27 |
+
if not api_key:
|
| 28 |
+
raise ValueError("Missing TAVILY_API_KEY.")
|
| 29 |
+
self.client = TavilyClient(api_key=api_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
def forward(self, query: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
try:
|
| 33 |
+
response = self.client.search(
|
| 34 |
+
query=query,
|
| 35 |
+
search_depth="advanced", # <-- academic/longform mode
|
| 36 |
+
max_results=5
|
| 37 |
+
)
|
| 38 |
+
out = []
|
| 39 |
+
for r in response.get("results", []):
|
| 40 |
+
out.append(
|
| 41 |
+
f"TITLE: {r.get('title')}\n"
|
| 42 |
+
f"CONTENT: {r.get('content')}\n"
|
| 43 |
+
f"SOURCE: {r.get('url')}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
)
|
| 45 |
+
return "\n\n---SEPARATOR---\n\n".join(out)
|
|
|
|
|
|
|
| 46 |
except Exception as e:
|
| 47 |
+
return f"Tavily research error: {e}"
|