SPerva's picture
Create app.py
57b8ecd verified
#!/usr/bin/env python3
"""
GitHub MCP Server
Provides GitHub API access via Model Context Protocol using Gradio
"""
import base64
import json
import logging
import os
from typing import List, Dict, Any
import aiohttp
import gradio as gr
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN", "")
GITHUB_API_BASE = os.getenv("GITHUB_API_BASE_URL", "https://api.github.com")
logging.basicConfig(
level=getattr(logging, LOG_LEVEL.upper()),
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
# API Constants
RESULTS_PER_PAGE = 100
SEARCH_RESULTS_LIMIT = 50
# ============================================================================
# Helper Functions
# ============================================================================
def create_headers() -> Dict[str, str]:
"""
Create GitHub API request headers with authentication
Returns:
Dictionary of HTTP headers for GitHub API requests
"""
headers = {
"Accept": "application/vnd.github.v3+json",
"User-Agent": "GitHub-MCP-Server/1.0"
}
# Add authorization if token is available
if GITHUB_TOKEN:
headers["Authorization"] = f"token {GITHUB_TOKEN}"
return headers
async def check_doc_folder(
session: aiohttp.ClientSession,
org: str,
repo: str
) -> bool:
"""
Check if a repository has a /doc folder
Args:
session: aiohttp ClientSession (reuse connection)
org: Organization name
repo: Repository name
Returns:
True if /doc folder exists, False otherwise
"""
headers = create_headers()
url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/doc"
try:
async with session.get(url, headers=headers) as response:
return response.status == 200
except Exception as e:
logger.debug(f"Error checking /doc folder for {org}/{repo}: {e}")
return False
def determine_content_type(filename: str) -> str:
"""
Determine content type from filename
Args:
filename: Name of the file
Returns:
Content type: 'markdown', 'mermaid', 'svg', 'openapi', 'postman', or 'unknown'
"""
lower_name = filename.lower()
if lower_name.endswith(('.mmd', '.mermaid')):
return 'mermaid'
elif lower_name.endswith('.md'):
return 'markdown'
elif lower_name.endswith('.svg'):
return 'svg'
elif lower_name.endswith(('.yml', '.yaml')):
return 'openapi'
elif lower_name.endswith('.json'):
# Check if it's a Postman collection first, otherwise assume OpenAPI
return 'postman' if lower_name.startswith('postman') else 'openapi'
else:
return 'unknown'
# ============================================================================
# Business Logic Functions (testable)
# ============================================================================
async def get_org_repos(org: str) -> List[Dict[str, Any]]:
async with aiohttp.ClientSession() as session:
headers = create_headers()
# Strategy 1: Use GitHub Search API (efficient - one request)
search_url = f"{GITHUB_API_BASE}/search/code"
params = {
"q": f"org:{org} path:/doc",
"per_page": RESULTS_PER_PAGE
}
try:
async with session.get(search_url, headers=headers, params=params) as response:
if response.status == 200:
data = await response.json()
# Extract unique repositories from search results
repos_with_docs = {}
for item in data.get("items", []):
repo_info = item.get("repository", {})
repo_name = repo_info.get("name")
if repo_name and repo_name not in repos_with_docs:
repos_with_docs[repo_name] = {
"id": str(repo_info.get("id", "")),
"name": repo_name,
"description": repo_info.get("description") or "",
"url": repo_info.get("html_url", ""),
"hasDocFolder": True
}
logger.info(f"Found {len(repos_with_docs)} repos with /doc via search")
return list(repos_with_docs.values())
except Exception as e:
logger.warning(f"Search API failed: {e}, falling back to list all repos")
# Strategy 2: Fallback - List all repos and check each one
repos_url = f"{GITHUB_API_BASE}/orgs/{org}/repos"
all_repos = []
page = 1
logger.info(f"Fetching repos for organization: {org}")
while True:
async with session.get(
repos_url,
headers=headers,
params={"per_page": RESULTS_PER_PAGE, "page": page, "sort": "updated"}
) as response:
if response.status != 200:
error_text = await response.text()
raise Exception(f"GitHub API error {response.status}: {error_text}")
repos = await response.json()
if not repos:
break
all_repos.extend(repos)
logger.info(f"Fetched page {page} ({len(repos)} repos)")
page += 1
# Stop if we got less than full page (last page)
if len(repos) < RESULTS_PER_PAGE:
break
logger.info(f"Total repos fetched: {len(all_repos)}")
# Check each repo for /doc folder
result = []
for idx, repo in enumerate(all_repos, 1):
logger.info(f"Checking {idx}/{len(all_repos)}: {repo['name']}")
has_doc = await check_doc_folder(session, org, repo["name"])
result.append({
"id": str(repo["id"]),
"name": repo["name"],
"description": repo.get("description") or "",
"url": repo["html_url"],
"hasDocFolder": has_doc
})
repos_with_docs_count = sum(1 for r in result if r["hasDocFolder"])
logger.info(f"Found {repos_with_docs_count} repos with /doc folder")
return result
async def get_repo_docs(org: str, repo: str) -> List[Dict[str, Any]]:
"""
Get all documentation files from a repository's /doc folder
Filters for supported file types: Markdown, Mermaid, SVG, OpenAPI, Postman
Args:
org: GitHub organization name
repo: Repository name
Returns:
List of documentation file dictionaries:
[
{
"id": "abc123...",
"name": "README.md",
"path": "doc/README.md",
"type": "markdown",
"url": "https://github.com/org/repo/blob/main/doc/README.md",
"download_url": "https://raw.githubusercontent.com/.../README.md",
},
...
]
Example:
docs = await get_repo_docs("anthropics", "anthropic-sdk-python")
"""
async with aiohttp.ClientSession() as session:
headers = create_headers()
url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/doc"
logger.info(f"Fetching docs from: {org}/{repo}/doc")
async with session.get(url, headers=headers) as response:
if response.status == 404:
logger.warning(f"No /doc folder found in {org}/{repo}")
return []
if response.status != 200:
error_text = await response.text()
raise Exception(f"GitHub API error {response.status}: {error_text}")
contents = await response.json()
# Filter for supported file types
supported_extensions = [
'.md', # Markdown
'.mmd', # Mermaid
'.mermaid', # Mermaid
'.svg', # SVG images
'.yml', # YAML (OpenAPI)
'.yaml', # YAML (OpenAPI)
'.json' # JSON (OpenAPI/Postman)
]
docs = []
skipped = 0
for item in contents:
# Only process files (not directories)
if item["type"] == "file":
name = item["name"]
# Check if file extension is supported
if any(name.lower().endswith(ext) for ext in supported_extensions):
content_type = determine_content_type(name)
docs.append({
"id": item["sha"],
"name": name,
"path": item["path"],
"type": content_type,
"url": item["html_url"],
"download_url": item.get("download_url", ""),
})
else:
skipped += 1
logger.info(f"Found {len(docs)} documentation files ({skipped} skipped)")
return docs
async def get_file_content(org: str, repo: str, path: str) -> Dict[str, Any]:
"""
Fetch content of a specific file from GitHub
Decodes base64-encoded content returned by GitHub API
Args:
org: GitHub organization name
repo: Repository name
path: File path within repository (e.g., "doc/README.md")
Returns:
Dictionary with file metadata and content:
{
"name": "README.md",
"path": "doc/README.md",
"content": "# Documentation\\n\\nThis is...",
"encoding": "base64"
}
Example:
content = await get_file_content("anthropics", "sdk", "doc/README.md")
"""
async with aiohttp.ClientSession() as session:
headers = create_headers()
url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/{path}"
logger.info(f"Fetching content: {org}/{repo}/{path}")
async with session.get(url, headers=headers) as response:
if response.status == 404:
raise Exception(f"File not found: {path}")
if response.status != 200:
error_text = await response.text()
raise Exception(f"GitHub API error {response.status}: {error_text}")
data = await response.json()
# Decode base64 content if present
content = ""
if "content" in data and data["content"]:
try:
# GitHub returns base64-encoded content with newlines
encoded_content = data["content"].replace('\n', '')
content = base64.b64decode(encoded_content).decode('utf-8')
logger.info(f"Decoded content ({len(content)} characters)")
except Exception as e:
logger.warning(f"Failed to decode content: {e}")
content = data.get("content", "")
return {
"name": data["name"],
"path": data["path"],
"content": content,
"encoding": data.get("encoding", "base64")
}
async def search_documentation(org: str, query: str) -> List[Dict[str, Any]]:
async with aiohttp.ClientSession() as session:
headers = create_headers()
search_url = f"{GITHUB_API_BASE}/search/code"
params = {
"q": f"org:{org} path:/doc {query}",
"per_page": SEARCH_RESULTS_LIMIT
}
logger.info(f"Searching for: '{query}' in {org}")
async with session.get(search_url, headers=headers, params=params) as response:
if response.status == 403:
raise Exception("Search API rate limit exceeded. Try again later.")
if response.status != 200:
error_text = await response.text()
raise Exception(f"GitHub API error {response.status}: {error_text}")
data = await response.json()
results = []
for item in data.get("items", []):
repo_info = item.get("repository", {})
results.append({
"name": item["name"],
"path": item["path"],
"repository": repo_info.get("name", ""),
"url": item["html_url"],
})
logger.info(f"Found {len(results)} matching files")
return results
# ============================================================================
# Gradio MCP Tool Functions
# ============================================================================
async def get_org_repos_tool(org: str) -> str:
"""
Fetch all repositories from a GitHub organization with /doc folder detection.
This tool uses the GitHub Search API to efficiently find repositories
that have a /doc folder, falling back to checking each repo individually
if the search API is unavailable.
Args:
org (str): GitHub organization name (e.g., "microsoft", "anthropics")
Returns:
str: JSON string containing list of repositories with their metadata
"""
try:
result = await get_org_repos(org)
return json.dumps(result, indent=2)
except Exception as e:
return json.dumps({"error": str(e)}, indent=2)
async def get_repo_docs_tool(org: str, repo: str) -> str:
"""
Get all documentation files from a repository's /doc folder.
Filters for supported file types: Markdown, Mermaid, SVG, OpenAPI, Postman.
Args:
org (str): GitHub organization name
repo (str): Repository name
Returns:
str: JSON string containing list of documentation files with metadata
"""
try:
result = await get_repo_docs(org, repo)
return json.dumps(result, indent=2)
except Exception as e:
return json.dumps({"error": str(e)}, indent=2)
async def get_file_content_tool(org: str, repo: str, path: str) -> str:
"""
Fetch and decode content of a specific file from GitHub.
Automatically decodes base64-encoded content returned by GitHub API.
Args:
org (str): GitHub organization name
repo (str): Repository name
path (str): File path within repository (e.g., "doc/README.md")
Returns:
str: JSON string containing file metadata and decoded content
"""
try:
result = await get_file_content(org, repo, path)
return json.dumps(result, indent=2)
except Exception as e:
return json.dumps({"error": str(e)}, indent=2)
async def search_documentation_tool(org: str, query: str) -> str:
"""
Search for documentation files across all repositories in an organization.
Uses GitHub Code Search API to find matching files in /doc folders.
Args:
org (str): GitHub organization name
query (str): Search query string (e.g., "authentication", "API", "tutorial")
Returns:
str: JSON string containing list of matching files with their locations
"""
try:
result = await search_documentation(org, query)
return json.dumps(result, indent=2)
except Exception as e:
return json.dumps({"error": str(e)}, indent=2)
# ============================================================================
# Gradio Interface
# ============================================================================
# Create individual interfaces for each tool
get_repos_interface = gr.Interface(
fn=get_org_repos_tool,
inputs=[gr.Textbox(label="Organization", placeholder="e.g., anthropics")],
outputs=[gr.Textbox(label="Repositories (JSON)", lines=20)],
title="Get Organization Repos",
description="Fetch all repositories from a GitHub organization with /doc folder detection",
)
get_docs_interface = gr.Interface(
fn=get_repo_docs_tool,
inputs=[
gr.Textbox(label="Organization", placeholder="e.g., anthropics"),
gr.Textbox(label="Repository", placeholder="e.g., anthropic-sdk-python"),
],
outputs=[gr.Textbox(label="Documentation Files (JSON)", lines=20)],
title="Get Repository Docs",
description="Get all documentation files from a repository's /doc folder",
)
get_content_interface = gr.Interface(
fn=get_file_content_tool,
inputs=[
gr.Textbox(label="Organization", placeholder="e.g., anthropics"),
gr.Textbox(label="Repository", placeholder="e.g., anthropic-sdk-python"),
gr.Textbox(label="File Path", placeholder="e.g., doc/README.md"),
],
outputs=[gr.Textbox(label="File Content (JSON)", lines=20)],
title="Get File Content",
description="Fetch and decode content of a specific file from GitHub",
)
search_docs_interface = gr.Interface(
fn=search_documentation_tool,
inputs=[
gr.Textbox(label="Organization", placeholder="e.g., anthropics"),
gr.Textbox(label="Search Query", placeholder="e.g., streaming"),
],
outputs=[gr.Textbox(label="Search Results (JSON)", lines=20)],
title="Search Documentation",
description="Search for documentation files across all repositories in an organization",
)
# Combine into tabbed interface
demo = gr.TabbedInterface(
[get_repos_interface, get_docs_interface, get_content_interface, search_docs_interface],
["Get Repos", "Get Docs", "Get Content", "Search"],
title="GitHub MCP Server",
)
# ============================================================================
# Main Entry Point
# ============================================================================
if __name__ == "__main__":
demo.launch(mcp_server=True, server_name="0.0.0.0", server_port=7860)