snowman-ai / mcp_tools.py
nextmarte's picture
feat: Snowman AI - MCP Literature Review Assistant for Hackathon
04fa10d
"""
MCP Tools for Snowman AI Agent.
Estas funções são expostas como ferramentas MCP que podem ser usadas
por LLMs como Claude Desktop, Cursor, etc.
"""
import os
import json
from typing import Optional, List
from search_services import CascadeSearcher, SearchResult, classify_reference
from cache import get_cache
def search_academic_reference(
reference: str,
include_abstract: bool = True
) -> str:
"""
Search for an academic reference and retrieve its metadata and abstract.
This tool searches multiple academic databases (CrossRef, Semantic Scholar,
OpenAlex, DuckDuckGo) to find information about a bibliographic reference.
Args:
reference: The bibliographic reference text to search for. Can be a full
citation, paper title, or DOI (e.g., "10.1000/xyz123").
include_abstract: Whether to include the abstract in the response.
Returns:
JSON string containing the search result with title, authors, year,
abstract, DOI, and URL.
Example:
>>> search_academic_reference("Smith et al. 2020 Machine Learning in Healthcare")
{"title": "Machine Learning Applications in Healthcare", "authors": "John Smith, ...", ...}
"""
# Check cache first
cache = get_cache()
cached = cache.get(reference)
if cached is not None:
from cache import CACHE_NOT_FOUND
if cached == CACHE_NOT_FOUND:
return json.dumps({
"status": "not_found",
"message": "Reference not found in academic databases",
"query": reference[:100]
}, ensure_ascii=False)
result = {
"status": "found",
"source": f"cache ({cached.source})",
"title": cached.title,
"authors": ", ".join(cached.authors or []),
"year": cached.year,
"doi": cached.doi or "N/A",
"url": cached.url,
}
if include_abstract:
result["abstract"] = cached.abstract
return json.dumps(result, ensure_ascii=False, indent=2)
# Search using cascade
searcher = CascadeSearcher(
log_callback=None, use_tavily=bool(os.getenv("TAVILY_API_KEY")))
try:
search_result = searcher.search(reference)
cache.set(reference, search_result)
result = {
"status": "found" if search_result.title != "Não encontrado" else "not_found",
"source": search_result.source,
"title": search_result.title,
"authors": ", ".join(search_result.authors or []),
"year": search_result.year,
"doi": search_result.doi or "N/A",
"url": search_result.url,
}
if include_abstract:
result["abstract"] = search_result.abstract
return json.dumps(result, ensure_ascii=False, indent=2)
except Exception as e:
return json.dumps({
"status": "error",
"message": str(e),
"query": reference[:100]
}, ensure_ascii=False)
finally:
searcher.close()
def get_abstract_by_doi(doi: str) -> str:
"""
Retrieve the abstract of an academic paper using its DOI.
This tool fetches paper metadata and abstract from academic databases
using the DOI (Digital Object Identifier).
Args:
doi: The DOI of the paper (e.g., "10.1000/xyz123" or full URL
"https://doi.org/10.1000/xyz123").
Returns:
JSON string containing the paper's title, authors, abstract, and URL.
Example:
>>> get_abstract_by_doi("10.1038/nature12373")
{"title": "...", "abstract": "...", "authors": "...", ...}
"""
# Clean DOI
doi = doi.strip()
if doi.startswith("https://doi.org/"):
doi = doi.replace("https://doi.org/", "")
elif doi.startswith("http://doi.org/"):
doi = doi.replace("http://doi.org/", "")
elif doi.startswith("doi:"):
doi = doi.replace("doi:", "")
return search_academic_reference(f"DOI: {doi}", include_abstract=True)
def classify_reference_type(reference: str) -> str:
"""
Classify the type of a bibliographic reference.
This tool analyzes a reference text and determines its type (article,
book, chapter, website, thesis, report, or legislation).
Args:
reference: The bibliographic reference text to classify.
Returns:
JSON string with the classification result and explanation.
Example:
>>> classify_reference_type("Smith, J. (2020). Title. Journal of Science, 10(2), 100-120.")
{"type": "article", "description": "Scientific journal article"}
"""
ref_type = classify_reference(reference)
type_descriptions = {
"article": "Scientific journal article, conference paper, or proceedings",
"book": "Complete book or monograph",
"chapter": "Book chapter with editors",
"website": "Web page, blog post, or online resource",
"thesis": "PhD dissertation or Master's thesis",
"report": "Technical report, working paper, or white paper",
"legislation": "Law, regulation, decree, or legal document",
"other": "Other type of reference"
}
return json.dumps({
"type": ref_type,
"description": type_descriptions.get(ref_type, "Unknown type"),
"searchable": ref_type == "article"
}, ensure_ascii=False, indent=2)
def evaluate_paper_relevance(
paper_title: str,
paper_abstract: str,
research_topic: str,
inclusion_criteria: str = "",
exclusion_criteria: str = ""
) -> str:
"""
Evaluate if a paper is relevant for a systematic literature review.
This tool uses AI to analyze whether a paper should be included or
excluded from a systematic review based on the provided criteria.
Args:
paper_title: The title of the paper to evaluate.
paper_abstract: The abstract of the paper.
research_topic: The main topic/objective of your literature review.
inclusion_criteria: Criteria for including papers (one per line).
exclusion_criteria: Criteria for excluding papers (one per line).
Returns:
JSON string with the evaluation decision and justification.
Example:
>>> evaluate_paper_relevance(
... "AI in Healthcare",
... "This paper presents...",
... "Machine learning applications in medicine"
... )
{"decision": "INCLUDE", "reason": "Directly addresses the research topic..."}
"""
from graph import evaluate_paper_for_rsl
result = evaluate_paper_for_rsl(
paper_title=paper_title,
paper_abstract=paper_abstract,
review_title=research_topic,
review_objectives=research_topic,
inclusion_criteria=inclusion_criteria,
exclusion_criteria=exclusion_criteria,
)
return json.dumps({
"decision": result.get("decisao", "INCLUDE"),
"reason": result.get("motivo", ""),
"paper_title": paper_title[:100]
}, ensure_ascii=False, indent=2)
def batch_search_references(references_json: str) -> str:
"""
Search for multiple academic references in batch.
This tool performs parallel searches for multiple references,
returning metadata and abstracts for each.
Args:
references_json: A JSON array of reference strings to search.
Example: '["Smith 2020 Title...", "Jones 2019 Another..."]'
Returns:
JSON string containing an array of search results.
Example:
>>> batch_search_references('["Smith 2020 Machine Learning", "Jones 2019 Deep Learning"]')
{"total": 2, "found": 2, "results": [...]}
"""
try:
references = json.loads(references_json)
if not isinstance(references, list):
return json.dumps({"error": "Input must be a JSON array of strings"})
except json.JSONDecodeError as e:
return json.dumps({"error": f"Invalid JSON: {str(e)}"})
results = []
found_count = 0
for ref in references[:20]: # Limit to 20 to avoid overload
result_json = search_academic_reference(ref, include_abstract=True)
result = json.loads(result_json)
results.append(result)
if result.get("status") == "found":
found_count += 1
return json.dumps({
"total": len(references),
"searched": len(results),
"found": found_count,
"results": results
}, ensure_ascii=False, indent=2)
def get_cache_statistics() -> str:
"""
Get statistics about the reference cache.
This tool returns information about cached search results,
including hit rates and storage statistics.
Returns:
JSON string with cache statistics.
"""
cache = get_cache()
stats = cache.get_stats()
pdf_stats = cache.get_pdf_cache_stats()
return json.dumps({
"search_cache": {
"total_entries": stats.get("total_entries", 0),
"found_references": stats.get("found", 0),
"not_found": stats.get("not_found", 0),
"total_hits": stats.get("total_hits", 0),
"by_source": stats.get("by_source", {})
},
"pdf_cache": {
"pdfs_cached": pdf_stats.get("pdfs_cached", 0),
"total_refs_cached": pdf_stats.get("total_refs_cached", 0),
"total_hits": pdf_stats.get("total_hits", 0)
}
}, ensure_ascii=False, indent=2)