#!/usr/bin/env python3 """ GitHub MCP Server Provides GitHub API access via Model Context Protocol using Gradio """ import base64 import json import logging import os from typing import List, Dict, Any import aiohttp import gradio as gr LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") GITHUB_TOKEN = os.getenv("GITHUB_TOKEN", "") GITHUB_API_BASE = os.getenv("GITHUB_API_BASE_URL", "https://api.github.com") logging.basicConfig( level=getattr(logging, LOG_LEVEL.upper()), format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) logger = logging.getLogger(__name__) # API Constants RESULTS_PER_PAGE = 100 SEARCH_RESULTS_LIMIT = 50 # ============================================================================ # Helper Functions # ============================================================================ def create_headers() -> Dict[str, str]: """ Create GitHub API request headers with authentication Returns: Dictionary of HTTP headers for GitHub API requests """ headers = { "Accept": "application/vnd.github.v3+json", "User-Agent": "GitHub-MCP-Server/1.0" } # Add authorization if token is available if GITHUB_TOKEN: headers["Authorization"] = f"token {GITHUB_TOKEN}" return headers async def check_doc_folder( session: aiohttp.ClientSession, org: str, repo: str ) -> bool: """ Check if a repository has a /doc folder Args: session: aiohttp ClientSession (reuse connection) org: Organization name repo: Repository name Returns: True if /doc folder exists, False otherwise """ headers = create_headers() url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/doc" try: async with session.get(url, headers=headers) as response: return response.status == 200 except Exception as e: logger.debug(f"Error checking /doc folder for {org}/{repo}: {e}") return False def determine_content_type(filename: str) -> str: """ Determine content type from filename Args: filename: Name of the file Returns: Content type: 'markdown', 'mermaid', 'svg', 'openapi', 'postman', or 'unknown' """ lower_name = filename.lower() if lower_name.endswith(('.mmd', '.mermaid')): return 'mermaid' elif lower_name.endswith('.md'): return 'markdown' elif lower_name.endswith('.svg'): return 'svg' elif lower_name.endswith(('.yml', '.yaml')): return 'openapi' elif lower_name.endswith('.json'): # Check if it's a Postman collection first, otherwise assume OpenAPI return 'postman' if lower_name.startswith('postman') else 'openapi' else: return 'unknown' # ============================================================================ # Business Logic Functions (testable) # ============================================================================ async def get_org_repos(org: str) -> List[Dict[str, Any]]: async with aiohttp.ClientSession() as session: headers = create_headers() # Strategy 1: Use GitHub Search API (efficient - one request) search_url = f"{GITHUB_API_BASE}/search/code" params = { "q": f"org:{org} path:/doc", "per_page": RESULTS_PER_PAGE } try: async with session.get(search_url, headers=headers, params=params) as response: if response.status == 200: data = await response.json() # Extract unique repositories from search results repos_with_docs = {} for item in data.get("items", []): repo_info = item.get("repository", {}) repo_name = repo_info.get("name") if repo_name and repo_name not in repos_with_docs: repos_with_docs[repo_name] = { "id": str(repo_info.get("id", "")), "name": repo_name, "description": repo_info.get("description") or "", "url": repo_info.get("html_url", ""), "hasDocFolder": True } logger.info(f"Found {len(repos_with_docs)} repos with /doc via search") return list(repos_with_docs.values()) except Exception as e: logger.warning(f"Search API failed: {e}, falling back to list all repos") # Strategy 2: Fallback - List all repos and check each one repos_url = f"{GITHUB_API_BASE}/orgs/{org}/repos" all_repos = [] page = 1 logger.info(f"Fetching repos for organization: {org}") while True: async with session.get( repos_url, headers=headers, params={"per_page": RESULTS_PER_PAGE, "page": page, "sort": "updated"} ) as response: if response.status != 200: error_text = await response.text() raise Exception(f"GitHub API error {response.status}: {error_text}") repos = await response.json() if not repos: break all_repos.extend(repos) logger.info(f"Fetched page {page} ({len(repos)} repos)") page += 1 # Stop if we got less than full page (last page) if len(repos) < RESULTS_PER_PAGE: break logger.info(f"Total repos fetched: {len(all_repos)}") # Check each repo for /doc folder result = [] for idx, repo in enumerate(all_repos, 1): logger.info(f"Checking {idx}/{len(all_repos)}: {repo['name']}") has_doc = await check_doc_folder(session, org, repo["name"]) result.append({ "id": str(repo["id"]), "name": repo["name"], "description": repo.get("description") or "", "url": repo["html_url"], "hasDocFolder": has_doc }) repos_with_docs_count = sum(1 for r in result if r["hasDocFolder"]) logger.info(f"Found {repos_with_docs_count} repos with /doc folder") return result async def get_repo_docs(org: str, repo: str) -> List[Dict[str, Any]]: """ Get all documentation files from a repository's /doc folder Filters for supported file types: Markdown, Mermaid, SVG, OpenAPI, Postman Args: org: GitHub organization name repo: Repository name Returns: List of documentation file dictionaries: [ { "id": "abc123...", "name": "README.md", "path": "doc/README.md", "type": "markdown", "url": "https://github.com/org/repo/blob/main/doc/README.md", "download_url": "https://raw.githubusercontent.com/.../README.md", }, ... ] Example: docs = await get_repo_docs("anthropics", "anthropic-sdk-python") """ async with aiohttp.ClientSession() as session: headers = create_headers() url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/doc" logger.info(f"Fetching docs from: {org}/{repo}/doc") async with session.get(url, headers=headers) as response: if response.status == 404: logger.warning(f"No /doc folder found in {org}/{repo}") return [] if response.status != 200: error_text = await response.text() raise Exception(f"GitHub API error {response.status}: {error_text}") contents = await response.json() # Filter for supported file types supported_extensions = [ '.md', # Markdown '.mmd', # Mermaid '.mermaid', # Mermaid '.svg', # SVG images '.yml', # YAML (OpenAPI) '.yaml', # YAML (OpenAPI) '.json' # JSON (OpenAPI/Postman) ] docs = [] skipped = 0 for item in contents: # Only process files (not directories) if item["type"] == "file": name = item["name"] # Check if file extension is supported if any(name.lower().endswith(ext) for ext in supported_extensions): content_type = determine_content_type(name) docs.append({ "id": item["sha"], "name": name, "path": item["path"], "type": content_type, "url": item["html_url"], "download_url": item.get("download_url", ""), }) else: skipped += 1 logger.info(f"Found {len(docs)} documentation files ({skipped} skipped)") return docs async def get_file_content(org: str, repo: str, path: str) -> Dict[str, Any]: """ Fetch content of a specific file from GitHub Decodes base64-encoded content returned by GitHub API Args: org: GitHub organization name repo: Repository name path: File path within repository (e.g., "doc/README.md") Returns: Dictionary with file metadata and content: { "name": "README.md", "path": "doc/README.md", "content": "# Documentation\\n\\nThis is...", "encoding": "base64" } Example: content = await get_file_content("anthropics", "sdk", "doc/README.md") """ async with aiohttp.ClientSession() as session: headers = create_headers() url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/{path}" logger.info(f"Fetching content: {org}/{repo}/{path}") async with session.get(url, headers=headers) as response: if response.status == 404: raise Exception(f"File not found: {path}") if response.status != 200: error_text = await response.text() raise Exception(f"GitHub API error {response.status}: {error_text}") data = await response.json() # Decode base64 content if present content = "" if "content" in data and data["content"]: try: # GitHub returns base64-encoded content with newlines encoded_content = data["content"].replace('\n', '') content = base64.b64decode(encoded_content).decode('utf-8') logger.info(f"Decoded content ({len(content)} characters)") except Exception as e: logger.warning(f"Failed to decode content: {e}") content = data.get("content", "") return { "name": data["name"], "path": data["path"], "content": content, "encoding": data.get("encoding", "base64") } async def search_documentation(org: str, query: str) -> List[Dict[str, Any]]: async with aiohttp.ClientSession() as session: headers = create_headers() search_url = f"{GITHUB_API_BASE}/search/code" params = { "q": f"org:{org} path:/doc {query}", "per_page": SEARCH_RESULTS_LIMIT } logger.info(f"Searching for: '{query}' in {org}") async with session.get(search_url, headers=headers, params=params) as response: if response.status == 403: raise Exception("Search API rate limit exceeded. Try again later.") if response.status != 200: error_text = await response.text() raise Exception(f"GitHub API error {response.status}: {error_text}") data = await response.json() results = [] for item in data.get("items", []): repo_info = item.get("repository", {}) results.append({ "name": item["name"], "path": item["path"], "repository": repo_info.get("name", ""), "url": item["html_url"], }) logger.info(f"Found {len(results)} matching files") return results # ============================================================================ # Gradio MCP Tool Functions # ============================================================================ async def get_org_repos_tool(org: str) -> str: """ Fetch all repositories from a GitHub organization with /doc folder detection. This tool uses the GitHub Search API to efficiently find repositories that have a /doc folder, falling back to checking each repo individually if the search API is unavailable. Args: org (str): GitHub organization name (e.g., "microsoft", "anthropics") Returns: str: JSON string containing list of repositories with their metadata """ try: result = await get_org_repos(org) return json.dumps(result, indent=2) except Exception as e: return json.dumps({"error": str(e)}, indent=2) async def get_repo_docs_tool(org: str, repo: str) -> str: """ Get all documentation files from a repository's /doc folder. Filters for supported file types: Markdown, Mermaid, SVG, OpenAPI, Postman. Args: org (str): GitHub organization name repo (str): Repository name Returns: str: JSON string containing list of documentation files with metadata """ try: result = await get_repo_docs(org, repo) return json.dumps(result, indent=2) except Exception as e: return json.dumps({"error": str(e)}, indent=2) async def get_file_content_tool(org: str, repo: str, path: str) -> str: """ Fetch and decode content of a specific file from GitHub. Automatically decodes base64-encoded content returned by GitHub API. Args: org (str): GitHub organization name repo (str): Repository name path (str): File path within repository (e.g., "doc/README.md") Returns: str: JSON string containing file metadata and decoded content """ try: result = await get_file_content(org, repo, path) return json.dumps(result, indent=2) except Exception as e: return json.dumps({"error": str(e)}, indent=2) async def search_documentation_tool(org: str, query: str) -> str: """ Search for documentation files across all repositories in an organization. Uses GitHub Code Search API to find matching files in /doc folders. Args: org (str): GitHub organization name query (str): Search query string (e.g., "authentication", "API", "tutorial") Returns: str: JSON string containing list of matching files with their locations """ try: result = await search_documentation(org, query) return json.dumps(result, indent=2) except Exception as e: return json.dumps({"error": str(e)}, indent=2) # ============================================================================ # Gradio Interface # ============================================================================ # Create individual interfaces for each tool get_repos_interface = gr.Interface( fn=get_org_repos_tool, inputs=[gr.Textbox(label="Organization", placeholder="e.g., anthropics")], outputs=[gr.Textbox(label="Repositories (JSON)", lines=20)], title="Get Organization Repos", description="Fetch all repositories from a GitHub organization with /doc folder detection", ) get_docs_interface = gr.Interface( fn=get_repo_docs_tool, inputs=[ gr.Textbox(label="Organization", placeholder="e.g., anthropics"), gr.Textbox(label="Repository", placeholder="e.g., anthropic-sdk-python"), ], outputs=[gr.Textbox(label="Documentation Files (JSON)", lines=20)], title="Get Repository Docs", description="Get all documentation files from a repository's /doc folder", ) get_content_interface = gr.Interface( fn=get_file_content_tool, inputs=[ gr.Textbox(label="Organization", placeholder="e.g., anthropics"), gr.Textbox(label="Repository", placeholder="e.g., anthropic-sdk-python"), gr.Textbox(label="File Path", placeholder="e.g., doc/README.md"), ], outputs=[gr.Textbox(label="File Content (JSON)", lines=20)], title="Get File Content", description="Fetch and decode content of a specific file from GitHub", ) search_docs_interface = gr.Interface( fn=search_documentation_tool, inputs=[ gr.Textbox(label="Organization", placeholder="e.g., anthropics"), gr.Textbox(label="Search Query", placeholder="e.g., streaming"), ], outputs=[gr.Textbox(label="Search Results (JSON)", lines=20)], title="Search Documentation", description="Search for documentation files across all repositories in an organization", ) # Combine into tabbed interface demo = gr.TabbedInterface( [get_repos_interface, get_docs_interface, get_content_interface, search_docs_interface], ["Get Repos", "Get Docs", "Get Content", "Search"], title="GitHub MCP Server", ) # ============================================================================ # Main Entry Point # ============================================================================ if __name__ == "__main__": demo.launch(mcp_server=True, server_name="0.0.0.0", server_port=7860)