Spaces:
Sleeping
Sleeping
| """ | |
| MCP Tools for Snowman AI Agent. | |
| Estas funções são expostas como ferramentas MCP que podem ser usadas | |
| por LLMs como Claude Desktop, Cursor, etc. | |
| """ | |
| import os | |
| import json | |
| from typing import Optional, List | |
| from search_services import CascadeSearcher, SearchResult, classify_reference | |
| from cache import get_cache | |
| def search_academic_reference( | |
| reference: str, | |
| include_abstract: bool = True | |
| ) -> str: | |
| """ | |
| Search for an academic reference and retrieve its metadata and abstract. | |
| This tool searches multiple academic databases (CrossRef, Semantic Scholar, | |
| OpenAlex, DuckDuckGo) to find information about a bibliographic reference. | |
| Args: | |
| reference: The bibliographic reference text to search for. Can be a full | |
| citation, paper title, or DOI (e.g., "10.1000/xyz123"). | |
| include_abstract: Whether to include the abstract in the response. | |
| Returns: | |
| JSON string containing the search result with title, authors, year, | |
| abstract, DOI, and URL. | |
| Example: | |
| >>> search_academic_reference("Smith et al. 2020 Machine Learning in Healthcare") | |
| {"title": "Machine Learning Applications in Healthcare", "authors": "John Smith, ...", ...} | |
| """ | |
| # Check cache first | |
| cache = get_cache() | |
| cached = cache.get(reference) | |
| if cached is not None: | |
| from cache import CACHE_NOT_FOUND | |
| if cached == CACHE_NOT_FOUND: | |
| return json.dumps({ | |
| "status": "not_found", | |
| "message": "Reference not found in academic databases", | |
| "query": reference[:100] | |
| }, ensure_ascii=False) | |
| result = { | |
| "status": "found", | |
| "source": f"cache ({cached.source})", | |
| "title": cached.title, | |
| "authors": ", ".join(cached.authors or []), | |
| "year": cached.year, | |
| "doi": cached.doi or "N/A", | |
| "url": cached.url, | |
| } | |
| if include_abstract: | |
| result["abstract"] = cached.abstract | |
| return json.dumps(result, ensure_ascii=False, indent=2) | |
| # Search using cascade | |
| searcher = CascadeSearcher( | |
| log_callback=None, use_tavily=bool(os.getenv("TAVILY_API_KEY"))) | |
| try: | |
| search_result = searcher.search(reference) | |
| cache.set(reference, search_result) | |
| result = { | |
| "status": "found" if search_result.title != "Não encontrado" else "not_found", | |
| "source": search_result.source, | |
| "title": search_result.title, | |
| "authors": ", ".join(search_result.authors or []), | |
| "year": search_result.year, | |
| "doi": search_result.doi or "N/A", | |
| "url": search_result.url, | |
| } | |
| if include_abstract: | |
| result["abstract"] = search_result.abstract | |
| return json.dumps(result, ensure_ascii=False, indent=2) | |
| except Exception as e: | |
| return json.dumps({ | |
| "status": "error", | |
| "message": str(e), | |
| "query": reference[:100] | |
| }, ensure_ascii=False) | |
| finally: | |
| searcher.close() | |
| def get_abstract_by_doi(doi: str) -> str: | |
| """ | |
| Retrieve the abstract of an academic paper using its DOI. | |
| This tool fetches paper metadata and abstract from academic databases | |
| using the DOI (Digital Object Identifier). | |
| Args: | |
| doi: The DOI of the paper (e.g., "10.1000/xyz123" or full URL | |
| "https://doi.org/10.1000/xyz123"). | |
| Returns: | |
| JSON string containing the paper's title, authors, abstract, and URL. | |
| Example: | |
| >>> get_abstract_by_doi("10.1038/nature12373") | |
| {"title": "...", "abstract": "...", "authors": "...", ...} | |
| """ | |
| # Clean DOI | |
| doi = doi.strip() | |
| if doi.startswith("https://doi.org/"): | |
| doi = doi.replace("https://doi.org/", "") | |
| elif doi.startswith("http://doi.org/"): | |
| doi = doi.replace("http://doi.org/", "") | |
| elif doi.startswith("doi:"): | |
| doi = doi.replace("doi:", "") | |
| return search_academic_reference(f"DOI: {doi}", include_abstract=True) | |
| def classify_reference_type(reference: str) -> str: | |
| """ | |
| Classify the type of a bibliographic reference. | |
| This tool analyzes a reference text and determines its type (article, | |
| book, chapter, website, thesis, report, or legislation). | |
| Args: | |
| reference: The bibliographic reference text to classify. | |
| Returns: | |
| JSON string with the classification result and explanation. | |
| Example: | |
| >>> classify_reference_type("Smith, J. (2020). Title. Journal of Science, 10(2), 100-120.") | |
| {"type": "article", "description": "Scientific journal article"} | |
| """ | |
| ref_type = classify_reference(reference) | |
| type_descriptions = { | |
| "article": "Scientific journal article, conference paper, or proceedings", | |
| "book": "Complete book or monograph", | |
| "chapter": "Book chapter with editors", | |
| "website": "Web page, blog post, or online resource", | |
| "thesis": "PhD dissertation or Master's thesis", | |
| "report": "Technical report, working paper, or white paper", | |
| "legislation": "Law, regulation, decree, or legal document", | |
| "other": "Other type of reference" | |
| } | |
| return json.dumps({ | |
| "type": ref_type, | |
| "description": type_descriptions.get(ref_type, "Unknown type"), | |
| "searchable": ref_type == "article" | |
| }, ensure_ascii=False, indent=2) | |
| def evaluate_paper_relevance( | |
| paper_title: str, | |
| paper_abstract: str, | |
| research_topic: str, | |
| inclusion_criteria: str = "", | |
| exclusion_criteria: str = "" | |
| ) -> str: | |
| """ | |
| Evaluate if a paper is relevant for a systematic literature review. | |
| This tool uses AI to analyze whether a paper should be included or | |
| excluded from a systematic review based on the provided criteria. | |
| Args: | |
| paper_title: The title of the paper to evaluate. | |
| paper_abstract: The abstract of the paper. | |
| research_topic: The main topic/objective of your literature review. | |
| inclusion_criteria: Criteria for including papers (one per line). | |
| exclusion_criteria: Criteria for excluding papers (one per line). | |
| Returns: | |
| JSON string with the evaluation decision and justification. | |
| Example: | |
| >>> evaluate_paper_relevance( | |
| ... "AI in Healthcare", | |
| ... "This paper presents...", | |
| ... "Machine learning applications in medicine" | |
| ... ) | |
| {"decision": "INCLUDE", "reason": "Directly addresses the research topic..."} | |
| """ | |
| from graph import evaluate_paper_for_rsl | |
| result = evaluate_paper_for_rsl( | |
| paper_title=paper_title, | |
| paper_abstract=paper_abstract, | |
| review_title=research_topic, | |
| review_objectives=research_topic, | |
| inclusion_criteria=inclusion_criteria, | |
| exclusion_criteria=exclusion_criteria, | |
| ) | |
| return json.dumps({ | |
| "decision": result.get("decisao", "INCLUDE"), | |
| "reason": result.get("motivo", ""), | |
| "paper_title": paper_title[:100] | |
| }, ensure_ascii=False, indent=2) | |
| def batch_search_references(references_json: str) -> str: | |
| """ | |
| Search for multiple academic references in batch. | |
| This tool performs parallel searches for multiple references, | |
| returning metadata and abstracts for each. | |
| Args: | |
| references_json: A JSON array of reference strings to search. | |
| Example: '["Smith 2020 Title...", "Jones 2019 Another..."]' | |
| Returns: | |
| JSON string containing an array of search results. | |
| Example: | |
| >>> batch_search_references('["Smith 2020 Machine Learning", "Jones 2019 Deep Learning"]') | |
| {"total": 2, "found": 2, "results": [...]} | |
| """ | |
| try: | |
| references = json.loads(references_json) | |
| if not isinstance(references, list): | |
| return json.dumps({"error": "Input must be a JSON array of strings"}) | |
| except json.JSONDecodeError as e: | |
| return json.dumps({"error": f"Invalid JSON: {str(e)}"}) | |
| results = [] | |
| found_count = 0 | |
| for ref in references[:20]: # Limit to 20 to avoid overload | |
| result_json = search_academic_reference(ref, include_abstract=True) | |
| result = json.loads(result_json) | |
| results.append(result) | |
| if result.get("status") == "found": | |
| found_count += 1 | |
| return json.dumps({ | |
| "total": len(references), | |
| "searched": len(results), | |
| "found": found_count, | |
| "results": results | |
| }, ensure_ascii=False, indent=2) | |
| def get_cache_statistics() -> str: | |
| """ | |
| Get statistics about the reference cache. | |
| This tool returns information about cached search results, | |
| including hit rates and storage statistics. | |
| Returns: | |
| JSON string with cache statistics. | |
| """ | |
| cache = get_cache() | |
| stats = cache.get_stats() | |
| pdf_stats = cache.get_pdf_cache_stats() | |
| return json.dumps({ | |
| "search_cache": { | |
| "total_entries": stats.get("total_entries", 0), | |
| "found_references": stats.get("found", 0), | |
| "not_found": stats.get("not_found", 0), | |
| "total_hits": stats.get("total_hits", 0), | |
| "by_source": stats.get("by_source", {}) | |
| }, | |
| "pdf_cache": { | |
| "pdfs_cached": pdf_stats.get("pdfs_cached", 0), | |
| "total_refs_cached": pdf_stats.get("total_refs_cached", 0), | |
| "total_hits": pdf_stats.get("total_hits", 0) | |
| } | |
| }, ensure_ascii=False, indent=2) | |