Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| GitHub MCP Server | |
| Provides GitHub API access via Model Context Protocol using Gradio | |
| """ | |
| import base64 | |
| import json | |
| import logging | |
| import os | |
| from typing import List, Dict, Any | |
| import aiohttp | |
| import gradio as gr | |
| LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") | |
| GITHUB_TOKEN = os.getenv("GITHUB_TOKEN", "") | |
| GITHUB_API_BASE = os.getenv("GITHUB_API_BASE_URL", "https://api.github.com") | |
| logging.basicConfig( | |
| level=getattr(logging, LOG_LEVEL.upper()), | |
| format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # API Constants | |
| RESULTS_PER_PAGE = 100 | |
| SEARCH_RESULTS_LIMIT = 50 | |
| # ============================================================================ | |
| # Helper Functions | |
| # ============================================================================ | |
| def create_headers() -> Dict[str, str]: | |
| """ | |
| Create GitHub API request headers with authentication | |
| Returns: | |
| Dictionary of HTTP headers for GitHub API requests | |
| """ | |
| headers = { | |
| "Accept": "application/vnd.github.v3+json", | |
| "User-Agent": "GitHub-MCP-Server/1.0" | |
| } | |
| # Add authorization if token is available | |
| if GITHUB_TOKEN: | |
| headers["Authorization"] = f"token {GITHUB_TOKEN}" | |
| return headers | |
| async def check_doc_folder( | |
| session: aiohttp.ClientSession, | |
| org: str, | |
| repo: str | |
| ) -> bool: | |
| """ | |
| Check if a repository has a /doc folder | |
| Args: | |
| session: aiohttp ClientSession (reuse connection) | |
| org: Organization name | |
| repo: Repository name | |
| Returns: | |
| True if /doc folder exists, False otherwise | |
| """ | |
| headers = create_headers() | |
| url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/doc" | |
| try: | |
| async with session.get(url, headers=headers) as response: | |
| return response.status == 200 | |
| except Exception as e: | |
| logger.debug(f"Error checking /doc folder for {org}/{repo}: {e}") | |
| return False | |
| def determine_content_type(filename: str) -> str: | |
| """ | |
| Determine content type from filename | |
| Args: | |
| filename: Name of the file | |
| Returns: | |
| Content type: 'markdown', 'mermaid', 'svg', 'openapi', 'postman', or 'unknown' | |
| """ | |
| lower_name = filename.lower() | |
| if lower_name.endswith(('.mmd', '.mermaid')): | |
| return 'mermaid' | |
| elif lower_name.endswith('.md'): | |
| return 'markdown' | |
| elif lower_name.endswith('.svg'): | |
| return 'svg' | |
| elif lower_name.endswith(('.yml', '.yaml')): | |
| return 'openapi' | |
| elif lower_name.endswith('.json'): | |
| # Check if it's a Postman collection first, otherwise assume OpenAPI | |
| return 'postman' if lower_name.startswith('postman') else 'openapi' | |
| else: | |
| return 'unknown' | |
| # ============================================================================ | |
| # Business Logic Functions (testable) | |
| # ============================================================================ | |
| async def get_org_repos(org: str) -> List[Dict[str, Any]]: | |
| async with aiohttp.ClientSession() as session: | |
| headers = create_headers() | |
| # Strategy 1: Use GitHub Search API (efficient - one request) | |
| search_url = f"{GITHUB_API_BASE}/search/code" | |
| params = { | |
| "q": f"org:{org} path:/doc", | |
| "per_page": RESULTS_PER_PAGE | |
| } | |
| try: | |
| async with session.get(search_url, headers=headers, params=params) as response: | |
| if response.status == 200: | |
| data = await response.json() | |
| # Extract unique repositories from search results | |
| repos_with_docs = {} | |
| for item in data.get("items", []): | |
| repo_info = item.get("repository", {}) | |
| repo_name = repo_info.get("name") | |
| if repo_name and repo_name not in repos_with_docs: | |
| repos_with_docs[repo_name] = { | |
| "id": str(repo_info.get("id", "")), | |
| "name": repo_name, | |
| "description": repo_info.get("description") or "", | |
| "url": repo_info.get("html_url", ""), | |
| "hasDocFolder": True | |
| } | |
| logger.info(f"Found {len(repos_with_docs)} repos with /doc via search") | |
| return list(repos_with_docs.values()) | |
| except Exception as e: | |
| logger.warning(f"Search API failed: {e}, falling back to list all repos") | |
| # Strategy 2: Fallback - List all repos and check each one | |
| repos_url = f"{GITHUB_API_BASE}/orgs/{org}/repos" | |
| all_repos = [] | |
| page = 1 | |
| logger.info(f"Fetching repos for organization: {org}") | |
| while True: | |
| async with session.get( | |
| repos_url, | |
| headers=headers, | |
| params={"per_page": RESULTS_PER_PAGE, "page": page, "sort": "updated"} | |
| ) as response: | |
| if response.status != 200: | |
| error_text = await response.text() | |
| raise Exception(f"GitHub API error {response.status}: {error_text}") | |
| repos = await response.json() | |
| if not repos: | |
| break | |
| all_repos.extend(repos) | |
| logger.info(f"Fetched page {page} ({len(repos)} repos)") | |
| page += 1 | |
| # Stop if we got less than full page (last page) | |
| if len(repos) < RESULTS_PER_PAGE: | |
| break | |
| logger.info(f"Total repos fetched: {len(all_repos)}") | |
| # Check each repo for /doc folder | |
| result = [] | |
| for idx, repo in enumerate(all_repos, 1): | |
| logger.info(f"Checking {idx}/{len(all_repos)}: {repo['name']}") | |
| has_doc = await check_doc_folder(session, org, repo["name"]) | |
| result.append({ | |
| "id": str(repo["id"]), | |
| "name": repo["name"], | |
| "description": repo.get("description") or "", | |
| "url": repo["html_url"], | |
| "hasDocFolder": has_doc | |
| }) | |
| repos_with_docs_count = sum(1 for r in result if r["hasDocFolder"]) | |
| logger.info(f"Found {repos_with_docs_count} repos with /doc folder") | |
| return result | |
| async def get_repo_docs(org: str, repo: str) -> List[Dict[str, Any]]: | |
| """ | |
| Get all documentation files from a repository's /doc folder | |
| Filters for supported file types: Markdown, Mermaid, SVG, OpenAPI, Postman | |
| Args: | |
| org: GitHub organization name | |
| repo: Repository name | |
| Returns: | |
| List of documentation file dictionaries: | |
| [ | |
| { | |
| "id": "abc123...", | |
| "name": "README.md", | |
| "path": "doc/README.md", | |
| "type": "markdown", | |
| "url": "https://github.com/org/repo/blob/main/doc/README.md", | |
| "download_url": "https://raw.githubusercontent.com/.../README.md", | |
| }, | |
| ... | |
| ] | |
| Example: | |
| docs = await get_repo_docs("anthropics", "anthropic-sdk-python") | |
| """ | |
| async with aiohttp.ClientSession() as session: | |
| headers = create_headers() | |
| url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/doc" | |
| logger.info(f"Fetching docs from: {org}/{repo}/doc") | |
| async with session.get(url, headers=headers) as response: | |
| if response.status == 404: | |
| logger.warning(f"No /doc folder found in {org}/{repo}") | |
| return [] | |
| if response.status != 200: | |
| error_text = await response.text() | |
| raise Exception(f"GitHub API error {response.status}: {error_text}") | |
| contents = await response.json() | |
| # Filter for supported file types | |
| supported_extensions = [ | |
| '.md', # Markdown | |
| '.mmd', # Mermaid | |
| '.mermaid', # Mermaid | |
| '.svg', # SVG images | |
| '.yml', # YAML (OpenAPI) | |
| '.yaml', # YAML (OpenAPI) | |
| '.json' # JSON (OpenAPI/Postman) | |
| ] | |
| docs = [] | |
| skipped = 0 | |
| for item in contents: | |
| # Only process files (not directories) | |
| if item["type"] == "file": | |
| name = item["name"] | |
| # Check if file extension is supported | |
| if any(name.lower().endswith(ext) for ext in supported_extensions): | |
| content_type = determine_content_type(name) | |
| docs.append({ | |
| "id": item["sha"], | |
| "name": name, | |
| "path": item["path"], | |
| "type": content_type, | |
| "url": item["html_url"], | |
| "download_url": item.get("download_url", ""), | |
| }) | |
| else: | |
| skipped += 1 | |
| logger.info(f"Found {len(docs)} documentation files ({skipped} skipped)") | |
| return docs | |
| async def get_file_content(org: str, repo: str, path: str) -> Dict[str, Any]: | |
| """ | |
| Fetch content of a specific file from GitHub | |
| Decodes base64-encoded content returned by GitHub API | |
| Args: | |
| org: GitHub organization name | |
| repo: Repository name | |
| path: File path within repository (e.g., "doc/README.md") | |
| Returns: | |
| Dictionary with file metadata and content: | |
| { | |
| "name": "README.md", | |
| "path": "doc/README.md", | |
| "content": "# Documentation\\n\\nThis is...", | |
| "encoding": "base64" | |
| } | |
| Example: | |
| content = await get_file_content("anthropics", "sdk", "doc/README.md") | |
| """ | |
| async with aiohttp.ClientSession() as session: | |
| headers = create_headers() | |
| url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/{path}" | |
| logger.info(f"Fetching content: {org}/{repo}/{path}") | |
| async with session.get(url, headers=headers) as response: | |
| if response.status == 404: | |
| raise Exception(f"File not found: {path}") | |
| if response.status != 200: | |
| error_text = await response.text() | |
| raise Exception(f"GitHub API error {response.status}: {error_text}") | |
| data = await response.json() | |
| # Decode base64 content if present | |
| content = "" | |
| if "content" in data and data["content"]: | |
| try: | |
| # GitHub returns base64-encoded content with newlines | |
| encoded_content = data["content"].replace('\n', '') | |
| content = base64.b64decode(encoded_content).decode('utf-8') | |
| logger.info(f"Decoded content ({len(content)} characters)") | |
| except Exception as e: | |
| logger.warning(f"Failed to decode content: {e}") | |
| content = data.get("content", "") | |
| return { | |
| "name": data["name"], | |
| "path": data["path"], | |
| "content": content, | |
| "encoding": data.get("encoding", "base64") | |
| } | |
| async def search_documentation(org: str, query: str) -> List[Dict[str, Any]]: | |
| async with aiohttp.ClientSession() as session: | |
| headers = create_headers() | |
| search_url = f"{GITHUB_API_BASE}/search/code" | |
| params = { | |
| "q": f"org:{org} path:/doc {query}", | |
| "per_page": SEARCH_RESULTS_LIMIT | |
| } | |
| logger.info(f"Searching for: '{query}' in {org}") | |
| async with session.get(search_url, headers=headers, params=params) as response: | |
| if response.status == 403: | |
| raise Exception("Search API rate limit exceeded. Try again later.") | |
| if response.status != 200: | |
| error_text = await response.text() | |
| raise Exception(f"GitHub API error {response.status}: {error_text}") | |
| data = await response.json() | |
| results = [] | |
| for item in data.get("items", []): | |
| repo_info = item.get("repository", {}) | |
| results.append({ | |
| "name": item["name"], | |
| "path": item["path"], | |
| "repository": repo_info.get("name", ""), | |
| "url": item["html_url"], | |
| }) | |
| logger.info(f"Found {len(results)} matching files") | |
| return results | |
| # ============================================================================ | |
| # Gradio MCP Tool Functions | |
| # ============================================================================ | |
| async def get_org_repos_tool(org: str) -> str: | |
| """ | |
| Fetch all repositories from a GitHub organization with /doc folder detection. | |
| This tool uses the GitHub Search API to efficiently find repositories | |
| that have a /doc folder, falling back to checking each repo individually | |
| if the search API is unavailable. | |
| Args: | |
| org (str): GitHub organization name (e.g., "microsoft", "anthropics") | |
| Returns: | |
| str: JSON string containing list of repositories with their metadata | |
| """ | |
| try: | |
| result = await get_org_repos(org) | |
| return json.dumps(result, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}, indent=2) | |
| async def get_repo_docs_tool(org: str, repo: str) -> str: | |
| """ | |
| Get all documentation files from a repository's /doc folder. | |
| Filters for supported file types: Markdown, Mermaid, SVG, OpenAPI, Postman. | |
| Args: | |
| org (str): GitHub organization name | |
| repo (str): Repository name | |
| Returns: | |
| str: JSON string containing list of documentation files with metadata | |
| """ | |
| try: | |
| result = await get_repo_docs(org, repo) | |
| return json.dumps(result, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}, indent=2) | |
| async def get_file_content_tool(org: str, repo: str, path: str) -> str: | |
| """ | |
| Fetch and decode content of a specific file from GitHub. | |
| Automatically decodes base64-encoded content returned by GitHub API. | |
| Args: | |
| org (str): GitHub organization name | |
| repo (str): Repository name | |
| path (str): File path within repository (e.g., "doc/README.md") | |
| Returns: | |
| str: JSON string containing file metadata and decoded content | |
| """ | |
| try: | |
| result = await get_file_content(org, repo, path) | |
| return json.dumps(result, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}, indent=2) | |
| async def search_documentation_tool(org: str, query: str) -> str: | |
| """ | |
| Search for documentation files across all repositories in an organization. | |
| Uses GitHub Code Search API to find matching files in /doc folders. | |
| Args: | |
| org (str): GitHub organization name | |
| query (str): Search query string (e.g., "authentication", "API", "tutorial") | |
| Returns: | |
| str: JSON string containing list of matching files with their locations | |
| """ | |
| try: | |
| result = await search_documentation(org, query) | |
| return json.dumps(result, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}, indent=2) | |
| # ============================================================================ | |
| # Gradio Interface | |
| # ============================================================================ | |
| # Create individual interfaces for each tool | |
| get_repos_interface = gr.Interface( | |
| fn=get_org_repos_tool, | |
| inputs=[gr.Textbox(label="Organization", placeholder="e.g., anthropics")], | |
| outputs=[gr.Textbox(label="Repositories (JSON)", lines=20)], | |
| title="Get Organization Repos", | |
| description="Fetch all repositories from a GitHub organization with /doc folder detection", | |
| ) | |
| get_docs_interface = gr.Interface( | |
| fn=get_repo_docs_tool, | |
| inputs=[ | |
| gr.Textbox(label="Organization", placeholder="e.g., anthropics"), | |
| gr.Textbox(label="Repository", placeholder="e.g., anthropic-sdk-python"), | |
| ], | |
| outputs=[gr.Textbox(label="Documentation Files (JSON)", lines=20)], | |
| title="Get Repository Docs", | |
| description="Get all documentation files from a repository's /doc folder", | |
| ) | |
| get_content_interface = gr.Interface( | |
| fn=get_file_content_tool, | |
| inputs=[ | |
| gr.Textbox(label="Organization", placeholder="e.g., anthropics"), | |
| gr.Textbox(label="Repository", placeholder="e.g., anthropic-sdk-python"), | |
| gr.Textbox(label="File Path", placeholder="e.g., doc/README.md"), | |
| ], | |
| outputs=[gr.Textbox(label="File Content (JSON)", lines=20)], | |
| title="Get File Content", | |
| description="Fetch and decode content of a specific file from GitHub", | |
| ) | |
| search_docs_interface = gr.Interface( | |
| fn=search_documentation_tool, | |
| inputs=[ | |
| gr.Textbox(label="Organization", placeholder="e.g., anthropics"), | |
| gr.Textbox(label="Search Query", placeholder="e.g., streaming"), | |
| ], | |
| outputs=[gr.Textbox(label="Search Results (JSON)", lines=20)], | |
| title="Search Documentation", | |
| description="Search for documentation files across all repositories in an organization", | |
| ) | |
| # Combine into tabbed interface | |
| demo = gr.TabbedInterface( | |
| [get_repos_interface, get_docs_interface, get_content_interface, search_docs_interface], | |
| ["Get Repos", "Get Docs", "Get Content", "Search"], | |
| title="GitHub MCP Server", | |
| ) | |
| # ============================================================================ | |
| # Main Entry Point | |
| # ============================================================================ | |
| if __name__ == "__main__": | |
| demo.launch(mcp_server=True, server_name="0.0.0.0", server_port=7860) | |