|
|
import arxiv |
|
|
import json |
|
|
import os |
|
|
from typing import List |
|
|
from mcp.server.fastmcp import FastMCP |
|
|
|
|
|
PAPER_DIR = "papers" |
|
|
|
|
|
|
|
|
mcp = FastMCP("research", port=8001) |
|
|
|
|
|
@mcp.tool() |
|
|
def search_papers(topic: str, max_results: int = 5) -> List[str]: |
|
|
""" |
|
|
Search for papers on arXiv based on a topic and store their information. |
|
|
|
|
|
Args: |
|
|
topic: The topic to search for |
|
|
max_results: Maximum number of results to retrieve (default: 5) |
|
|
|
|
|
Returns: |
|
|
List of paper IDs found in the search |
|
|
""" |
|
|
|
|
|
|
|
|
client = arxiv.Client() |
|
|
|
|
|
|
|
|
search = arxiv.Search( |
|
|
query = topic, |
|
|
max_results = max_results, |
|
|
sort_by = arxiv.SortCriterion.Relevance |
|
|
) |
|
|
|
|
|
papers = client.results(search) |
|
|
|
|
|
|
|
|
path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_")) |
|
|
os.makedirs(path, exist_ok=True) |
|
|
|
|
|
file_path = os.path.join(path, "papers_info.json") |
|
|
|
|
|
|
|
|
try: |
|
|
with open(file_path, "r") as json_file: |
|
|
papers_info = json.load(json_file) |
|
|
except (FileNotFoundError, json.JSONDecodeError): |
|
|
papers_info = {} |
|
|
|
|
|
|
|
|
paper_ids = [] |
|
|
for paper in papers: |
|
|
paper_ids.append(paper.get_short_id()) |
|
|
paper_info = { |
|
|
'title': paper.title, |
|
|
'authors': [author.name for author in paper.authors], |
|
|
'summary': paper.summary, |
|
|
'pdf_url': paper.pdf_url, |
|
|
'published': str(paper.published.date()) |
|
|
} |
|
|
papers_info[paper.get_short_id()] = paper_info |
|
|
|
|
|
|
|
|
with open(file_path, "w") as json_file: |
|
|
json.dump(papers_info, json_file, indent=2) |
|
|
|
|
|
print(f"Results are saved in: {file_path}") |
|
|
|
|
|
return paper_ids |
|
|
|
|
|
@mcp.tool() |
|
|
def extract_info(paper_id: str) -> str: |
|
|
""" |
|
|
Search for information about a specific paper across all topic directories. |
|
|
|
|
|
Args: |
|
|
paper_id: The ID of the paper to look for |
|
|
|
|
|
Returns: |
|
|
JSON string with paper information if found, error message if not found |
|
|
""" |
|
|
|
|
|
for item in os.listdir(PAPER_DIR): |
|
|
item_path = os.path.join(PAPER_DIR, item) |
|
|
if os.path.isdir(item_path): |
|
|
file_path = os.path.join(item_path, "papers_info.json") |
|
|
if os.path.isfile(file_path): |
|
|
try: |
|
|
with open(file_path, "r") as json_file: |
|
|
papers_info = json.load(json_file) |
|
|
if paper_id in papers_info: |
|
|
return json.dumps(papers_info[paper_id], indent=2) |
|
|
except (FileNotFoundError, json.JSONDecodeError) as e: |
|
|
print(f"Error reading {file_path}: {str(e)}") |
|
|
continue |
|
|
|
|
|
return f"There's no saved information related to paper {paper_id}." |
|
|
|
|
|
|
|
|
|
|
|
@mcp.resource("papers://folders") |
|
|
def get_available_folders() -> str: |
|
|
""" |
|
|
List all available topic folders in the papers directory. |
|
|
|
|
|
This resource provides a simple list of all available topic folders. |
|
|
""" |
|
|
folders = [] |
|
|
|
|
|
|
|
|
if os.path.exists(PAPER_DIR): |
|
|
for topic_dir in os.listdir(PAPER_DIR): |
|
|
topic_path = os.path.join(PAPER_DIR, topic_dir) |
|
|
if os.path.isdir(topic_path): |
|
|
papers_file = os.path.join(topic_path, "papers_info.json") |
|
|
if os.path.exists(papers_file): |
|
|
folders.append(topic_dir) |
|
|
|
|
|
|
|
|
content = "# Available Topics\n\n" |
|
|
if folders: |
|
|
for folder in folders: |
|
|
content += f"- {folder}\n" |
|
|
content += f"\nUse @{folder} to access papers in that topic.\n" |
|
|
else: |
|
|
content += "No topics found.\n" |
|
|
|
|
|
return content |
|
|
|
|
|
@mcp.resource("papers://{topic}") |
|
|
def get_topic_papers(topic: str) -> str: |
|
|
""" |
|
|
Get detailed information about papers on a specific topic. |
|
|
|
|
|
Args: |
|
|
topic: The research topic to retrieve papers for |
|
|
""" |
|
|
topic_dir = topic.lower().replace(" ", "_") |
|
|
papers_file = os.path.join(PAPER_DIR, topic_dir, "papers_info.json") |
|
|
|
|
|
if not os.path.exists(papers_file): |
|
|
return f"# No papers found for topic: {topic}\n\nTry searching for papers on this topic first." |
|
|
|
|
|
try: |
|
|
with open(papers_file, 'r') as f: |
|
|
papers_data = json.load(f) |
|
|
|
|
|
|
|
|
content = f"# Papers on {topic.replace('_', ' ').title()}\n\n" |
|
|
content += f"Total papers: {len(papers_data)}\n\n" |
|
|
|
|
|
for paper_id, paper_info in papers_data.items(): |
|
|
content += f"## {paper_info['title']}\n" |
|
|
content += f"- **Paper ID**: {paper_id}\n" |
|
|
content += f"- **Authors**: {', '.join(paper_info['authors'])}\n" |
|
|
content += f"- **Published**: {paper_info['published']}\n" |
|
|
content += f"- **PDF URL**: [{paper_info['pdf_url']}]({paper_info['pdf_url']})\n\n" |
|
|
content += f"### Summary\n{paper_info['summary'][:500]}...\n\n" |
|
|
content += "---\n\n" |
|
|
|
|
|
return content |
|
|
except json.JSONDecodeError: |
|
|
return f"# Error reading papers data for {topic}\n\nThe papers data file is corrupted." |
|
|
|
|
|
@mcp.prompt() |
|
|
def generate_search_prompt(topic: str, num_papers: int = 5) -> str: |
|
|
"""Generate a prompt for Claude to find and discuss academic papers on a specific topic.""" |
|
|
return f"""Search for {num_papers} academic papers about '{topic}' using the search_papers tool. |
|
|
|
|
|
Follow these instructions: |
|
|
1. First, search for papers using search_papers(topic='{topic}', max_results={num_papers}) |
|
|
2. For each paper found, extract and organize the following information: |
|
|
- Paper title |
|
|
- Authors |
|
|
- Publication date |
|
|
- Brief summary of the key findings |
|
|
- Main contributions or innovations |
|
|
- Methodologies used |
|
|
- Relevance to the topic '{topic}' |
|
|
|
|
|
3. Provide a comprehensive summary that includes: |
|
|
- Overview of the current state of research in '{topic}' |
|
|
- Common themes and trends across the papers |
|
|
- Key research gaps or areas for future investigation |
|
|
- Most impactful or influential papers in this area |
|
|
|
|
|
4. Organize your findings in a clear, structured format with headings and bullet points for easy readability. |
|
|
|
|
|
Please present both detailed information about each paper and a high-level synthesis of the research landscape in {topic}.""" |
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
def gradio_search(topic: str, num_papers: int = 5): |
|
|
|
|
|
search_papers(topic, max_results=num_papers) |
|
|
return get_topic_papers(topic) |
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=gradio_search, |
|
|
inputs=[ |
|
|
gr.Textbox(label="Research Topic", placeholder="e.g. Large Language Models"), |
|
|
gr.Slider(1, 10, value=5, step=1, label="Number of Papers") |
|
|
], |
|
|
outputs=gr.Markdown(label="Results"), |
|
|
title="Academic Paper Search (arXiv)", |
|
|
description="Enter a research topic to search for recent academic papers and get a summary." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
|
import threading |
|
|
threading.Thread(target=demo.launch, kwargs={"share": True}, daemon=True).start() |
|
|
|
|
|
mcp.run(transport='stdio') |