Spaces:

SPerva
/

github-mcp-server

Sleeping

App Files Files Community

github-mcp-server / app.py

SPerva

Create app.py

57b8ecd verified 25 days ago

raw

history blame contribute delete

17.9 kB

	#!/usr/bin/env python3
	"""
	GitHub MCP Server
	Provides GitHub API access via Model Context Protocol using Gradio
	"""

	import base64
	import json
	import logging
	import os
	from typing import List, Dict, Any

	import aiohttp
	import gradio as gr

	LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
	GITHUB_TOKEN = os.getenv("GITHUB_TOKEN", "")
	GITHUB_API_BASE = os.getenv("GITHUB_API_BASE_URL", "https://api.github.com")

	logging.basicConfig(
	level=getattr(logging, LOG_LEVEL.upper()),
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	)
	logger = logging.getLogger(__name__)

	# API Constants
	RESULTS_PER_PAGE = 100
	SEARCH_RESULTS_LIMIT = 50


	# ============================================================================
	# Helper Functions
	# ============================================================================

	def create_headers() -> Dict[str, str]:
	"""
	Create GitHub API request headers with authentication

	Returns:
	Dictionary of HTTP headers for GitHub API requests
	"""
	headers = {
	"Accept": "application/vnd.github.v3+json",
	"User-Agent": "GitHub-MCP-Server/1.0"
	}

	# Add authorization if token is available
	if GITHUB_TOKEN:
	headers["Authorization"] = f"token {GITHUB_TOKEN}"

	return headers


	async def check_doc_folder(
	session: aiohttp.ClientSession,
	org: str,
	repo: str
	) -> bool:
	"""
	Check if a repository has a /doc folder

	Args:
	session: aiohttp ClientSession (reuse connection)
	org: Organization name
	repo: Repository name

	Returns:
	True if /doc folder exists, False otherwise
	"""
	headers = create_headers()
	url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/doc"

	try:
	async with session.get(url, headers=headers) as response:
	return response.status == 200
	except Exception as e:
	logger.debug(f"Error checking /doc folder for {org}/{repo}: {e}")
	return False


	def determine_content_type(filename: str) -> str:
	"""
	Determine content type from filename

	Args:
	filename: Name of the file

	Returns:
	Content type: 'markdown', 'mermaid', 'svg', 'openapi', 'postman', or 'unknown'
	"""
	lower_name = filename.lower()

	if lower_name.endswith(('.mmd', '.mermaid')):
	return 'mermaid'
	elif lower_name.endswith('.md'):
	return 'markdown'
	elif lower_name.endswith('.svg'):
	return 'svg'
	elif lower_name.endswith(('.yml', '.yaml')):
	return 'openapi'
	elif lower_name.endswith('.json'):
	# Check if it's a Postman collection first, otherwise assume OpenAPI
	return 'postman' if lower_name.startswith('postman') else 'openapi'
	else:
	return 'unknown'


	# ============================================================================
	# Business Logic Functions (testable)
	# ============================================================================

	async def get_org_repos(org: str) -> List[Dict[str, Any]]:
	async with aiohttp.ClientSession() as session:
	headers = create_headers()

	# Strategy 1: Use GitHub Search API (efficient - one request)
	search_url = f"{GITHUB_API_BASE}/search/code"
	params = {
	"q": f"org:{org} path:/doc",
	"per_page": RESULTS_PER_PAGE
	}

	try:
	async with session.get(search_url, headers=headers, params=params) as response:
	if response.status == 200:
	data = await response.json()

	# Extract unique repositories from search results
	repos_with_docs = {}
	for item in data.get("items", []):
	repo_info = item.get("repository", {})
	repo_name = repo_info.get("name")

	if repo_name and repo_name not in repos_with_docs:
	repos_with_docs[repo_name] = {
	"id": str(repo_info.get("id", "")),
	"name": repo_name,
	"description": repo_info.get("description") or "",
	"url": repo_info.get("html_url", ""),
	"hasDocFolder": True
	}

	logger.info(f"Found {len(repos_with_docs)} repos with /doc via search")
	return list(repos_with_docs.values())

	except Exception as e:
	logger.warning(f"Search API failed: {e}, falling back to list all repos")

	# Strategy 2: Fallback - List all repos and check each one
	repos_url = f"{GITHUB_API_BASE}/orgs/{org}/repos"
	all_repos = []
	page = 1

	logger.info(f"Fetching repos for organization: {org}")

	while True:
	async with session.get(
	repos_url,
	headers=headers,
	params={"per_page": RESULTS_PER_PAGE, "page": page, "sort": "updated"}
	) as response:
	if response.status != 200:
	error_text = await response.text()
	raise Exception(f"GitHub API error {response.status}: {error_text}")

	repos = await response.json()
	if not repos:
	break

	all_repos.extend(repos)
	logger.info(f"Fetched page {page} ({len(repos)} repos)")
	page += 1

	# Stop if we got less than full page (last page)
	if len(repos) < RESULTS_PER_PAGE:
	break

	logger.info(f"Total repos fetched: {len(all_repos)}")

	# Check each repo for /doc folder
	result = []
	for idx, repo in enumerate(all_repos, 1):
	logger.info(f"Checking {idx}/{len(all_repos)}: {repo['name']}")
	has_doc = await check_doc_folder(session, org, repo["name"])

	result.append({
	"id": str(repo["id"]),
	"name": repo["name"],
	"description": repo.get("description") or "",
	"url": repo["html_url"],
	"hasDocFolder": has_doc
	})

	repos_with_docs_count = sum(1 for r in result if r["hasDocFolder"])
	logger.info(f"Found {repos_with_docs_count} repos with /doc folder")

	return result


	async def get_repo_docs(org: str, repo: str) -> List[Dict[str, Any]]:
	"""
	Get all documentation files from a repository's /doc folder

	Filters for supported file types: Markdown, Mermaid, SVG, OpenAPI, Postman

	Args:
	org: GitHub organization name
	repo: Repository name

	Returns:
	List of documentation file dictionaries:
	[
	{
	"id": "abc123...",
	"name": "README.md",
	"path": "doc/README.md",
	"type": "markdown",
	"url": "https://github.com/org/repo/blob/main/doc/README.md",
	"download_url": "https://raw.githubusercontent.com/.../README.md",
	},
	...
	]

	Example:
	docs = await get_repo_docs("anthropics", "anthropic-sdk-python")
	"""
	async with aiohttp.ClientSession() as session:
	headers = create_headers()
	url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/doc"

	logger.info(f"Fetching docs from: {org}/{repo}/doc")

	async with session.get(url, headers=headers) as response:
	if response.status == 404:
	logger.warning(f"No /doc folder found in {org}/{repo}")
	return []

	if response.status != 200:
	error_text = await response.text()
	raise Exception(f"GitHub API error {response.status}: {error_text}")

	contents = await response.json()

	# Filter for supported file types
	supported_extensions = [
	'.md', # Markdown
	'.mmd', # Mermaid
	'.mermaid', # Mermaid
	'.svg', # SVG images
	'.yml', # YAML (OpenAPI)
	'.yaml', # YAML (OpenAPI)
	'.json' # JSON (OpenAPI/Postman)
	]

	docs = []
	skipped = 0

	for item in contents:
	# Only process files (not directories)
	if item["type"] == "file":
	name = item["name"]

	# Check if file extension is supported
	if any(name.lower().endswith(ext) for ext in supported_extensions):
	content_type = determine_content_type(name)

	docs.append({
	"id": item["sha"],
	"name": name,
	"path": item["path"],
	"type": content_type,
	"url": item["html_url"],
	"download_url": item.get("download_url", ""),
	})
	else:
	skipped += 1

	logger.info(f"Found {len(docs)} documentation files ({skipped} skipped)")
	return docs


	async def get_file_content(org: str, repo: str, path: str) -> Dict[str, Any]:
	"""
	Fetch content of a specific file from GitHub

	Decodes base64-encoded content returned by GitHub API

	Args:
	org: GitHub organization name
	repo: Repository name
	path: File path within repository (e.g., "doc/README.md")

	Returns:
	Dictionary with file metadata and content:
	{
	"name": "README.md",
	"path": "doc/README.md",
	"content": "# Documentation\\n\\nThis is...",
	"encoding": "base64"
	}

	Example:
	content = await get_file_content("anthropics", "sdk", "doc/README.md")
	"""
	async with aiohttp.ClientSession() as session:
	headers = create_headers()
	url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/{path}"

	logger.info(f"Fetching content: {org}/{repo}/{path}")

	async with session.get(url, headers=headers) as response:
	if response.status == 404:
	raise Exception(f"File not found: {path}")

	if response.status != 200:
	error_text = await response.text()
	raise Exception(f"GitHub API error {response.status}: {error_text}")

	data = await response.json()

	# Decode base64 content if present
	content = ""
	if "content" in data and data["content"]:
	try:
	# GitHub returns base64-encoded content with newlines
	encoded_content = data["content"].replace('\n', '')
	content = base64.b64decode(encoded_content).decode('utf-8')
	logger.info(f"Decoded content ({len(content)} characters)")
	except Exception as e:
	logger.warning(f"Failed to decode content: {e}")
	content = data.get("content", "")

	return {
	"name": data["name"],
	"path": data["path"],
	"content": content,
	"encoding": data.get("encoding", "base64")
	}


	async def search_documentation(org: str, query: str) -> List[Dict[str, Any]]:
	async with aiohttp.ClientSession() as session:
	headers = create_headers()
	search_url = f"{GITHUB_API_BASE}/search/code"
	params = {
	"q": f"org:{org} path:/doc {query}",
	"per_page": SEARCH_RESULTS_LIMIT
	}

	logger.info(f"Searching for: '{query}' in {org}")

	async with session.get(search_url, headers=headers, params=params) as response:
	if response.status == 403:
	raise Exception("Search API rate limit exceeded. Try again later.")

	if response.status != 200:
	error_text = await response.text()
	raise Exception(f"GitHub API error {response.status}: {error_text}")

	data = await response.json()
	results = []

	for item in data.get("items", []):
	repo_info = item.get("repository", {})
	results.append({
	"name": item["name"],
	"path": item["path"],
	"repository": repo_info.get("name", ""),
	"url": item["html_url"],
	})

	logger.info(f"Found {len(results)} matching files")
	return results


	# ============================================================================
	# Gradio MCP Tool Functions
	# ============================================================================

	async def get_org_repos_tool(org: str) -> str:
	"""
	Fetch all repositories from a GitHub organization with /doc folder detection.

	This tool uses the GitHub Search API to efficiently find repositories
	that have a /doc folder, falling back to checking each repo individually
	if the search API is unavailable.

	Args:
	org (str): GitHub organization name (e.g., "microsoft", "anthropics")

	Returns:
	str: JSON string containing list of repositories with their metadata
	"""
	try:
	result = await get_org_repos(org)
	return json.dumps(result, indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)}, indent=2)


	async def get_repo_docs_tool(org: str, repo: str) -> str:
	"""
	Get all documentation files from a repository's /doc folder.

	Filters for supported file types: Markdown, Mermaid, SVG, OpenAPI, Postman.

	Args:
	org (str): GitHub organization name
	repo (str): Repository name

	Returns:
	str: JSON string containing list of documentation files with metadata
	"""
	try:
	result = await get_repo_docs(org, repo)
	return json.dumps(result, indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)}, indent=2)


	async def get_file_content_tool(org: str, repo: str, path: str) -> str:
	"""
	Fetch and decode content of a specific file from GitHub.

	Automatically decodes base64-encoded content returned by GitHub API.

	Args:
	org (str): GitHub organization name
	repo (str): Repository name
	path (str): File path within repository (e.g., "doc/README.md")

	Returns:
	str: JSON string containing file metadata and decoded content
	"""
	try:
	result = await get_file_content(org, repo, path)
	return json.dumps(result, indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)}, indent=2)


	async def search_documentation_tool(org: str, query: str) -> str:
	"""
	Search for documentation files across all repositories in an organization.

	Uses GitHub Code Search API to find matching files in /doc folders.

	Args:
	org (str): GitHub organization name
	query (str): Search query string (e.g., "authentication", "API", "tutorial")

	Returns:
	str: JSON string containing list of matching files with their locations
	"""
	try:
	result = await search_documentation(org, query)
	return json.dumps(result, indent=2)
	except Exception as e:
	return json.dumps({"error": str(e)}, indent=2)


	# ============================================================================
	# Gradio Interface
	# ============================================================================

	# Create individual interfaces for each tool
	get_repos_interface = gr.Interface(
	fn=get_org_repos_tool,
	inputs=[gr.Textbox(label="Organization", placeholder="e.g., anthropics")],
	outputs=[gr.Textbox(label="Repositories (JSON)", lines=20)],
	title="Get Organization Repos",
	description="Fetch all repositories from a GitHub organization with /doc folder detection",
	)

	get_docs_interface = gr.Interface(
	fn=get_repo_docs_tool,
	inputs=[
	gr.Textbox(label="Organization", placeholder="e.g., anthropics"),
	gr.Textbox(label="Repository", placeholder="e.g., anthropic-sdk-python"),
	],
	outputs=[gr.Textbox(label="Documentation Files (JSON)", lines=20)],
	title="Get Repository Docs",
	description="Get all documentation files from a repository's /doc folder",
	)

	get_content_interface = gr.Interface(
	fn=get_file_content_tool,
	inputs=[
	gr.Textbox(label="Organization", placeholder="e.g., anthropics"),
	gr.Textbox(label="Repository", placeholder="e.g., anthropic-sdk-python"),
	gr.Textbox(label="File Path", placeholder="e.g., doc/README.md"),
	],
	outputs=[gr.Textbox(label="File Content (JSON)", lines=20)],
	title="Get File Content",
	description="Fetch and decode content of a specific file from GitHub",
	)

	search_docs_interface = gr.Interface(
	fn=search_documentation_tool,
	inputs=[
	gr.Textbox(label="Organization", placeholder="e.g., anthropics"),
	gr.Textbox(label="Search Query", placeholder="e.g., streaming"),
	],
	outputs=[gr.Textbox(label="Search Results (JSON)", lines=20)],
	title="Search Documentation",
	description="Search for documentation files across all repositories in an organization",
	)

	# Combine into tabbed interface
	demo = gr.TabbedInterface(
	[get_repos_interface, get_docs_interface, get_content_interface, search_docs_interface],
	["Get Repos", "Get Docs", "Get Content", "Search"],
	title="GitHub MCP Server",
	)


	# ============================================================================
	# Main Entry Point
	# ============================================================================

	if __name__ == "__main__":
	demo.launch(mcp_server=True, server_name="0.0.0.0", server_port=7860)