File size: 7,518 Bytes
7875068 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 | import arxiv
import json
import os
from typing import List
from mcp.server.fastmcp import FastMCP
PAPER_DIR = "papers"
# Initialize FastMCP server
mcp = FastMCP("research", port=8001)
@mcp.tool()
def search_papers(topic: str, max_results: int = 5) -> List[str]:
"""
Search for papers on arXiv based on a topic and store their information.
Args:
topic: The topic to search for
max_results: Maximum number of results to retrieve (default: 5)
Returns:
List of paper IDs found in the search
"""
# Use arxiv to find the papers
client = arxiv.Client()
# Search for the most relevant articles matching the queried topic
search = arxiv.Search(
query = topic,
max_results = max_results,
sort_by = arxiv.SortCriterion.Relevance
)
papers = client.results(search)
# Create directory for this topic
path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
os.makedirs(path, exist_ok=True)
file_path = os.path.join(path, "papers_info.json")
# Try to load existing papers info
try:
with open(file_path, "r") as json_file:
papers_info = json.load(json_file)
except (FileNotFoundError, json.JSONDecodeError):
papers_info = {}
# Process each paper and add to papers_info
paper_ids = []
for paper in papers:
paper_ids.append(paper.get_short_id())
paper_info = {
'title': paper.title,
'authors': [author.name for author in paper.authors],
'summary': paper.summary,
'pdf_url': paper.pdf_url,
'published': str(paper.published.date())
}
papers_info[paper.get_short_id()] = paper_info
# Save updated papers_info to json file
with open(file_path, "w") as json_file:
json.dump(papers_info, json_file, indent=2)
print(f"Results are saved in: {file_path}")
return paper_ids
@mcp.tool()
def extract_info(paper_id: str) -> str:
"""
Search for information about a specific paper across all topic directories.
Args:
paper_id: The ID of the paper to look for
Returns:
JSON string with paper information if found, error message if not found
"""
for item in os.listdir(PAPER_DIR):
item_path = os.path.join(PAPER_DIR, item)
if os.path.isdir(item_path):
file_path = os.path.join(item_path, "papers_info.json")
if os.path.isfile(file_path):
try:
with open(file_path, "r") as json_file:
papers_info = json.load(json_file)
if paper_id in papers_info:
return json.dumps(papers_info[paper_id], indent=2)
except (FileNotFoundError, json.JSONDecodeError) as e:
print(f"Error reading {file_path}: {str(e)}")
continue
return f"There's no saved information related to paper {paper_id}."
@mcp.resource("papers://folders")
def get_available_folders() -> str:
"""
List all available topic folders in the papers directory.
This resource provides a simple list of all available topic folders.
"""
folders = []
# Get all topic directories
if os.path.exists(PAPER_DIR):
for topic_dir in os.listdir(PAPER_DIR):
topic_path = os.path.join(PAPER_DIR, topic_dir)
if os.path.isdir(topic_path):
papers_file = os.path.join(topic_path, "papers_info.json")
if os.path.exists(papers_file):
folders.append(topic_dir)
# Create a simple markdown list
content = "# Available Topics\n\n"
if folders:
for folder in folders:
content += f"- {folder}\n"
content += f"\nUse @{folder} to access papers in that topic.\n"
else:
content += "No topics found.\n"
return content
@mcp.resource("papers://{topic}")
def get_topic_papers(topic: str) -> str:
"""
Get detailed information about papers on a specific topic.
Args:
topic: The research topic to retrieve papers for
"""
topic_dir = topic.lower().replace(" ", "_")
papers_file = os.path.join(PAPER_DIR, topic_dir, "papers_info.json")
if not os.path.exists(papers_file):
return f"# No papers found for topic: {topic}\n\nTry searching for papers on this topic first."
try:
with open(papers_file, 'r') as f:
papers_data = json.load(f)
# Create markdown content with paper details
content = f"# Papers on {topic.replace('_', ' ').title()}\n\n"
content += f"Total papers: {len(papers_data)}\n\n"
for paper_id, paper_info in papers_data.items():
content += f"## {paper_info['title']}\n"
content += f"- **Paper ID**: {paper_id}\n"
content += f"- **Authors**: {', '.join(paper_info['authors'])}\n"
content += f"- **Published**: {paper_info['published']}\n"
content += f"- **PDF URL**: [{paper_info['pdf_url']}]({paper_info['pdf_url']})\n\n"
content += f"### Summary\n{paper_info['summary'][:500]}...\n\n"
content += "---\n\n"
return content
except json.JSONDecodeError:
return f"# Error reading papers data for {topic}\n\nThe papers data file is corrupted."
@mcp.prompt()
def generate_search_prompt(topic: str, num_papers: int = 5) -> str:
"""Generate a prompt for Claude to find and discuss academic papers on a specific topic."""
return f"""Search for {num_papers} academic papers about '{topic}' using the search_papers tool.
Follow these instructions:
1. First, search for papers using search_papers(topic='{topic}', max_results={num_papers})
2. For each paper found, extract and organize the following information:
- Paper title
- Authors
- Publication date
- Brief summary of the key findings
- Main contributions or innovations
- Methodologies used
- Relevance to the topic '{topic}'
3. Provide a comprehensive summary that includes:
- Overview of the current state of research in '{topic}'
- Common themes and trends across the papers
- Key research gaps or areas for future investigation
- Most impactful or influential papers in this area
4. Organize your findings in a clear, structured format with headings and bullet points for easy readability.
Please present both detailed information about each paper and a high-level synthesis of the research landscape in {topic}."""
import gradio as gr
def gradio_search(topic: str, num_papers: int = 5):
# Search for papers and return markdown summary
search_papers(topic, max_results=num_papers)
return get_topic_papers(topic)
demo = gr.Interface(
fn=gradio_search,
inputs=[
gr.Textbox(label="Research Topic", placeholder="e.g. Large Language Models"),
gr.Slider(1, 10, value=5, step=1, label="Number of Papers")
],
outputs=gr.Markdown(label="Results"),
title="Academic Paper Search (arXiv)",
description="Enter a research topic to search for recent academic papers and get a summary."
)
if __name__ == "__main__":
# Initialize and run the server
# Launch Gradio UI in a separate thread, then run MCP server
import threading
threading.Thread(target=demo.launch, kwargs={"share": True}, daemon=True).start()
# mcp.run(transport='sse')
mcp.run(transport='stdio') |