Spaces:

selinazarzour
/

MCP_Research_Server

Paused

App Files Files Community

MCP_Research_Server / research_server.py

selinazarzour

Upload folder using huggingface_hub

7875068 verified 9 months ago

raw

history blame contribute delete

7.52 kB

	import arxiv
	import json
	import os
	from typing import List
	from mcp.server.fastmcp import FastMCP

	PAPER_DIR = "papers"

	# Initialize FastMCP server
	mcp = FastMCP("research", port=8001)

	@mcp.tool()
	def search_papers(topic: str, max_results: int = 5) -> List[str]:
	"""
	Search for papers on arXiv based on a topic and store their information.

	Args:
	topic: The topic to search for
	max_results: Maximum number of results to retrieve (default: 5)

	Returns:
	List of paper IDs found in the search
	"""

	# Use arxiv to find the papers
	client = arxiv.Client()

	# Search for the most relevant articles matching the queried topic
	search = arxiv.Search(
	query = topic,
	max_results = max_results,
	sort_by = arxiv.SortCriterion.Relevance
	)

	papers = client.results(search)

	# Create directory for this topic
	path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
	os.makedirs(path, exist_ok=True)

	file_path = os.path.join(path, "papers_info.json")

	# Try to load existing papers info
	try:
	with open(file_path, "r") as json_file:
	papers_info = json.load(json_file)
	except (FileNotFoundError, json.JSONDecodeError):
	papers_info = {}

	# Process each paper and add to papers_info
	paper_ids = []
	for paper in papers:
	paper_ids.append(paper.get_short_id())
	paper_info = {
	'title': paper.title,
	'authors': [author.name for author in paper.authors],
	'summary': paper.summary,
	'pdf_url': paper.pdf_url,
	'published': str(paper.published.date())
	}
	papers_info[paper.get_short_id()] = paper_info

	# Save updated papers_info to json file
	with open(file_path, "w") as json_file:
	json.dump(papers_info, json_file, indent=2)

	print(f"Results are saved in: {file_path}")

	return paper_ids

	@mcp.tool()
	def extract_info(paper_id: str) -> str:
	"""
	Search for information about a specific paper across all topic directories.

	Args:
	paper_id: The ID of the paper to look for

	Returns:
	JSON string with paper information if found, error message if not found
	"""

	for item in os.listdir(PAPER_DIR):
	item_path = os.path.join(PAPER_DIR, item)
	if os.path.isdir(item_path):
	file_path = os.path.join(item_path, "papers_info.json")
	if os.path.isfile(file_path):
	try:
	with open(file_path, "r") as json_file:
	papers_info = json.load(json_file)
	if paper_id in papers_info:
	return json.dumps(papers_info[paper_id], indent=2)
	except (FileNotFoundError, json.JSONDecodeError) as e:
	print(f"Error reading {file_path}: {str(e)}")
	continue

	return f"There's no saved information related to paper {paper_id}."



	@mcp.resource("papers://folders")
	def get_available_folders() -> str:
	"""
	List all available topic folders in the papers directory.

	This resource provides a simple list of all available topic folders.
	"""
	folders = []

	# Get all topic directories
	if os.path.exists(PAPER_DIR):
	for topic_dir in os.listdir(PAPER_DIR):
	topic_path = os.path.join(PAPER_DIR, topic_dir)
	if os.path.isdir(topic_path):
	papers_file = os.path.join(topic_path, "papers_info.json")
	if os.path.exists(papers_file):
	folders.append(topic_dir)

	# Create a simple markdown list
	content = "# Available Topics\n\n"
	if folders:
	for folder in folders:
	content += f"- {folder}\n"
	content += f"\nUse @{folder} to access papers in that topic.\n"
	else:
	content += "No topics found.\n"

	return content

	@mcp.resource("papers://{topic}")
	def get_topic_papers(topic: str) -> str:
	"""
	Get detailed information about papers on a specific topic.

	Args:
	topic: The research topic to retrieve papers for
	"""
	topic_dir = topic.lower().replace(" ", "_")
	papers_file = os.path.join(PAPER_DIR, topic_dir, "papers_info.json")

	if not os.path.exists(papers_file):
	return f"# No papers found for topic: {topic}\n\nTry searching for papers on this topic first."

	try:
	with open(papers_file, 'r') as f:
	papers_data = json.load(f)

	# Create markdown content with paper details
	content = f"# Papers on {topic.replace('_', ' ').title()}\n\n"
	content += f"Total papers: {len(papers_data)}\n\n"

	for paper_id, paper_info in papers_data.items():
	content += f"## {paper_info['title']}\n"
	content += f"- Paper ID: {paper_id}\n"
	content += f"- Authors: {', '.join(paper_info['authors'])}\n"
	content += f"- Published: {paper_info['published']}\n"
	content += f"- PDF URL: [{paper_info['pdf_url']}]({paper_info['pdf_url']})\n\n"
	content += f"### Summary\n{paper_info['summary'][:500]}...\n\n"
	content += "---\n\n"

	return content
	except json.JSONDecodeError:
	return f"# Error reading papers data for {topic}\n\nThe papers data file is corrupted."

	@mcp.prompt()
	def generate_search_prompt(topic: str, num_papers: int = 5) -> str:
	"""Generate a prompt for Claude to find and discuss academic papers on a specific topic."""
	return f"""Search for {num_papers} academic papers about '{topic}' using the search_papers tool.

	Follow these instructions:
	1. First, search for papers using search_papers(topic='{topic}', max_results={num_papers})
	2. For each paper found, extract and organize the following information:
	- Paper title
	- Authors
	- Publication date
	- Brief summary of the key findings
	- Main contributions or innovations
	- Methodologies used
	- Relevance to the topic '{topic}'

	3. Provide a comprehensive summary that includes:
	- Overview of the current state of research in '{topic}'
	- Common themes and trends across the papers
	- Key research gaps or areas for future investigation
	- Most impactful or influential papers in this area

	4. Organize your findings in a clear, structured format with headings and bullet points for easy readability.

	Please present both detailed information about each paper and a high-level synthesis of the research landscape in {topic}."""

	import gradio as gr

	def gradio_search(topic: str, num_papers: int = 5):
	# Search for papers and return markdown summary
	search_papers(topic, max_results=num_papers)
	return get_topic_papers(topic)

	demo = gr.Interface(
	fn=gradio_search,
	inputs=[
	gr.Textbox(label="Research Topic", placeholder="e.g. Large Language Models"),
	gr.Slider(1, 10, value=5, step=1, label="Number of Papers")
	],
	outputs=gr.Markdown(label="Results"),
	title="Academic Paper Search (arXiv)",
	description="Enter a research topic to search for recent academic papers and get a summary."
	)

	if __name__ == "__main__":
	# Initialize and run the server
	# Launch Gradio UI in a separate thread, then run MCP server
	import threading
	threading.Thread(target=demo.launch, kwargs={"share": True}, daemon=True).start()
	# mcp.run(transport='sse')
	mcp.run(transport='stdio')