Codemaster67's picture
Update app.py
2a494db verified
import os
import requests
from fastmcp import FastMCP
# --- CONFIG ---
SERP_URL = "https://serpapi.com/search"
SEMANTIC_SCHOLAR_URL = "https://api.semanticscholar.org/graph/v1"
OPEN_ALEX_URL = "https://api.openalex.org"
# Fetch Keys from Hugging Face Secrets
SERP_API_KEY = os.getenv("SERP_API_KEY")
JINA_API_KEY = os.getenv("JINA_API_KEY")
OPEN_ALEX_API_KEY = os.getenv("OPEN_ALEX_API_KEY")
# --- HELPER ---
def reconstruct_abstract(abstract_inverted_index):
"""Reconstruct abstract text from OpenAlex's inverted index format."""
if not abstract_inverted_index:
return "Abstract not available."
try:
words = {}
for word, indices in abstract_inverted_index.items():
for index in indices:
words[index] = word
return " ".join([words[i] for i in sorted(words.keys())])
except Exception:
return "Abstract reconstruction failed."
def _openalex_search(query: str, limit: int):
"""Internal helper: search OpenAlex and return normalized paper list."""
oa_params = {"search": query, "per_page": limit}
headers = {"api-key": OPEN_ALEX_API_KEY} if OPEN_ALEX_API_KEY else {}
res = requests.get(f"{OPEN_ALEX_URL}/works", params=oa_params, headers=headers, timeout=10)
res.raise_for_status()
results = res.json().get("results", [])
normalized = []
for r in results:
normalized.append({
"paperId": r.get("id"),
"title": r.get("title"),
"authors": [{"name": a.get("author", {}).get("display_name")} for a in r.get("authorships", [])],
"year": r.get("publication_year"),
"citationCount": r.get("cited_by_count"),
"url": r.get("doi"),
"openAccessPdf": {"url": r.get("open_access", {}).get("oa_url")} if r.get("open_access", {}).get("oa_url") else None,
"abstract": reconstruct_abstract(r.get("abstract_inverted_index")),
"externalIds": r.get("ids", {}),
"source": "openalex",
})
return normalized
mcp = FastMCP("ResearchAgent")
# --- 1. CONSOLIDATED SEARCH (Web & YouTube) ---
@mcp.tool()
def search_web(query: str, required_links: int = 10):
"""General search for websites, articles, and YouTube videos."""
required_links = min(required_links, 20)
results = []
start = 0
while len(results) < required_links:
params = {
"engine": "google",
"q": query,
"api_key": SERP_API_KEY,
"start": start,
}
try:
res = requests.get(SERP_URL, params=params)
res.raise_for_status()
data = res.json()
organic = data.get("organic_results", [])
if not organic:
break
for item in organic:
results.append({
"title": item.get("title"),
"link": item.get("link"),
"snippet": item.get("snippet"),
})
start += 10
except Exception as e:
return {"error": f"Search failed: {e}"}
return results[:required_links]
# --- 2. WEB CONTENT READER ---
@mcp.tool()
def fetch_web_content(url: str) -> str:
"""Extracts Markdown text from a URL. Does NOT work for YouTube links."""
if "youtube.com" in url or "youtu.be" in url:
return "Error: This tool cannot read YouTube videos. Please use a YouTube Transcript tool or summarize based on search snippets."
reader_url = f"https://r.jina.ai/{url}"
headers = {"Authorization": f"Bearer {JINA_API_KEY}"} if JINA_API_KEY else {}
try:
response = requests.get(reader_url, headers=headers, timeout=15)
response.raise_for_status()
return response.text
except Exception as e:
return f"Error accessing page: {str(e)}"
# --- 3. ACADEMIC ENGINE ---
@mcp.tool()
def academic_research(query: str, limit: int = 5):
"""Finds research papers, citation counts, and direct PDF links."""
search_url = f"{SEMANTIC_SCHOLAR_URL}/paper/search"
params = {
"query": query,
"limit": limit,
"fields": "paperId,title,authors,year,citationCount,url,openAccessPdf,abstract,externalIds",
}
try:
res = requests.get(search_url, params=params, timeout=10)
res.raise_for_status()
data = res.json().get("data", [])
if data:
return data
except Exception as e:
print(f"[academic_research] Semantic Scholar failed: {e}. Falling back to OpenAlex...")
try:
return _openalex_search(query, limit)
except Exception as e:
return f"Academic search failed (both Semantic Scholar and OpenAlex): {e}"
# --- 4. GET PAPER ID ---
@mcp.tool()
def get_paper_id(query: str):
"""Search for a paper by title/keywords and return all available IDs."""
results = academic_research(query, limit=1)
if isinstance(results, list) and len(results) > 0:
paper = results[0]
ext_ids = paper.get("externalIds", {})
paper_id = paper.get("paperId", "")
return {
"title": paper.get("title"),
"paperId": paper_id,
"doi": ext_ids.get("DOI") or ext_ids.get("doi"),
"openalex": ext_ids.get("openalex") or (paper_id if "openalex.org" in str(paper_id) else None),
"arxiv": ext_ids.get("ArXiv") or ext_ids.get("arxiv"),
"source": paper.get("source", "semantic_scholar"),
}
return "No paper found or an error occurred during ID lookup."
# --- 5. FIND RELATED PAPERS ---
@mcp.tool()
def find_related_papers(paper_id: str, limit: int = 5):
"""Finds similar or recommended papers based on a Paper ID."""
if "openalex.org" not in paper_id:
rec_url = f"{SEMANTIC_SCHOLAR_URL}/recommendations/papers/{paper_id}"
params = {"limit": limit, "fields": "paperId,title,authors,year,citationCount,url"}
try:
res = requests.get(rec_url, params=params, timeout=10)
res.raise_for_status()
return res.json().get("recommendedPapers", [])
except Exception as e:
print(f"[find_related_papers] Semantic Scholar failed: {e}. Falling back to OpenAlex...")
if "openalex.org" in paper_id:
oa_filter = f"related_to:{paper_id}"
elif paper_id.startswith("10.") or "doi.org" in paper_id:
doi = paper_id.replace("https://doi.org/", "").replace("http://doi.org/", "")
oa_filter = f"related_to:doi:{doi}"
else:
return "Could not find related papers: provide an OpenAlex ID or DOI for the OpenAlex fallback."
try:
oa_url = f"{OPEN_ALEX_URL}/works"
oa_params = {"filter": oa_filter, "per_page": limit}
headers = {"api-key": OPEN_ALEX_API_KEY} if OPEN_ALEX_API_KEY else {}
res = requests.get(oa_url, params=oa_params, headers=headers, timeout=10)
res.raise_for_status()
results = res.json().get("results", [])
return [{
"paperId": r.get("id"),
"title": r.get("title"),
"authors": [{"name": a.get("author", {}).get("display_name")} for a in r.get("authorships", [])],
"year": r.get("publication_year"),
"citationCount": r.get("cited_by_count"),
"url": r.get("doi"),
} for r in results]
except Exception as e:
return f"Could not find related papers: {e}"
if __name__ == "__main__":
mcp.run(transport="http", host="0.0.0.0", port=7860)