Spaces:

SPerva
/

github-mcp-server

Paused

App Files Files Community

SPerva commited on Jan 16

Commit

57b8ecd

verified ·

1 Parent(s): dee7da9

Create app.py

Browse files

Files changed (1) hide show

app.py +522 -0

app.py ADDED Viewed

	@@ -0,0 +1,522 @@

+#!/usr/bin/env python3
+"""
+GitHub MCP Server
+Provides GitHub API access via Model Context Protocol using Gradio
+"""
+import base64
+import json
+import logging
+import os
+from typing import List, Dict, Any
+import aiohttp
+import gradio as gr
+LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
+GITHUB_TOKEN = os.getenv("GITHUB_TOKEN", "")
+GITHUB_API_BASE = os.getenv("GITHUB_API_BASE_URL", "https://api.github.com")
+logging.basicConfig(
+    level=getattr(logging, LOG_LEVEL.upper()),
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+# API Constants
+RESULTS_PER_PAGE = 100
+SEARCH_RESULTS_LIMIT = 50
+# ============================================================================
+# Helper Functions
+# ============================================================================
+def create_headers() -> Dict[str, str]:
+    """
+    Create GitHub API request headers with authentication
+    Returns:
+        Dictionary of HTTP headers for GitHub API requests
+    """
+    headers = {
+        "Accept": "application/vnd.github.v3+json",
+        "User-Agent": "GitHub-MCP-Server/1.0"
+    }
+    # Add authorization if token is available
+    if GITHUB_TOKEN:
+        headers["Authorization"] = f"token {GITHUB_TOKEN}"
+    return headers
+async def check_doc_folder(
+    session: aiohttp.ClientSession,
+    org: str,
+    repo: str
+) -> bool:
+    """
+    Check if a repository has a /doc folder
+    Args:
+        session: aiohttp ClientSession (reuse connection)
+        org: Organization name
+        repo: Repository name
+    Returns:
+        True if /doc folder exists, False otherwise
+    """
+    headers = create_headers()
+    url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/doc"
+    try:
+        async with session.get(url, headers=headers) as response:
+            return response.status == 200
+    except Exception as e:
+        logger.debug(f"Error checking /doc folder for {org}/{repo}: {e}")
+        return False
+def determine_content_type(filename: str) -> str:
+    """
+    Determine content type from filename
+    Args:
+        filename: Name of the file
+    Returns:
+        Content type: 'markdown', 'mermaid', 'svg', 'openapi', 'postman', or 'unknown'
+    """
+    lower_name = filename.lower()
+    if lower_name.endswith(('.mmd', '.mermaid')):
+        return 'mermaid'
+    elif lower_name.endswith('.md'):
+        return 'markdown'
+    elif lower_name.endswith('.svg'):
+        return 'svg'
+    elif lower_name.endswith(('.yml', '.yaml')):
+        return 'openapi'
+    elif lower_name.endswith('.json'):
+        # Check if it's a Postman collection first, otherwise assume OpenAPI
+        return 'postman' if lower_name.startswith('postman') else 'openapi'
+    else:
+        return 'unknown'
+# ============================================================================
+# Business Logic Functions (testable)
+# ============================================================================
+async def get_org_repos(org: str) -> List[Dict[str, Any]]:
+    async with aiohttp.ClientSession() as session:
+        headers = create_headers()
+        # Strategy 1: Use GitHub Search API (efficient - one request)
+        search_url = f"{GITHUB_API_BASE}/search/code"
+        params = {
+            "q": f"org:{org} path:/doc",
+            "per_page": RESULTS_PER_PAGE
+        }
+        try:
+            async with session.get(search_url, headers=headers, params=params) as response:
+                if response.status == 200:
+                    data = await response.json()
+                    # Extract unique repositories from search results
+                    repos_with_docs = {}
+                    for item in data.get("items", []):
+                        repo_info = item.get("repository", {})
+                        repo_name = repo_info.get("name")
+                        if repo_name and repo_name not in repos_with_docs:
+                            repos_with_docs[repo_name] = {
+                                "id": str(repo_info.get("id", "")),
+                                "name": repo_name,
+                                "description": repo_info.get("description") or "",
+                                "url": repo_info.get("html_url", ""),
+                                "hasDocFolder": True
+                            }
+                    logger.info(f"Found {len(repos_with_docs)} repos with /doc via search")
+                    return list(repos_with_docs.values())
+        except Exception as e:
+            logger.warning(f"Search API failed: {e}, falling back to list all repos")
+        # Strategy 2: Fallback - List all repos and check each one
+        repos_url = f"{GITHUB_API_BASE}/orgs/{org}/repos"
+        all_repos = []
+        page = 1
+        logger.info(f"Fetching repos for organization: {org}")
+        while True:
+            async with session.get(
+                repos_url,
+                headers=headers,
+                params={"per_page": RESULTS_PER_PAGE, "page": page, "sort": "updated"}
+            ) as response:
+                if response.status != 200:
+                    error_text = await response.text()
+                    raise Exception(f"GitHub API error {response.status}: {error_text}")
+                repos = await response.json()
+                if not repos:
+                    break
+                all_repos.extend(repos)
+                logger.info(f"Fetched page {page} ({len(repos)} repos)")
+                page += 1
+                # Stop if we got less than full page (last page)
+                if len(repos) < RESULTS_PER_PAGE:
+                    break
+        logger.info(f"Total repos fetched: {len(all_repos)}")
+        # Check each repo for /doc folder
+        result = []
+        for idx, repo in enumerate(all_repos, 1):
+            logger.info(f"Checking {idx}/{len(all_repos)}: {repo['name']}")
+            has_doc = await check_doc_folder(session, org, repo["name"])
+            result.append({
+                "id": str(repo["id"]),
+                "name": repo["name"],
+                "description": repo.get("description") or "",
+                "url": repo["html_url"],
+                "hasDocFolder": has_doc
+            })
+        repos_with_docs_count = sum(1 for r in result if r["hasDocFolder"])
+        logger.info(f"Found {repos_with_docs_count} repos with /doc folder")
+        return result
+async def get_repo_docs(org: str, repo: str) -> List[Dict[str, Any]]:
+    """
+    Get all documentation files from a repository's /doc folder
+    Filters for supported file types: Markdown, Mermaid, SVG, OpenAPI, Postman
+    Args:
+        org: GitHub organization name
+        repo: Repository name
+    Returns:
+        List of documentation file dictionaries:
+        [
+            {
+                "id": "abc123...",
+                "name": "README.md",
+                "path": "doc/README.md",
+                "type": "markdown",
+                "url": "https://github.com/org/repo/blob/main/doc/README.md",
+                "download_url": "https://raw.githubusercontent.com/.../README.md",
+            },
+            ...
+        ]
+    Example:
+        docs = await get_repo_docs("anthropics", "anthropic-sdk-python")
+    """
+    async with aiohttp.ClientSession() as session:
+        headers = create_headers()
+        url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/doc"
+        logger.info(f"Fetching docs from: {org}/{repo}/doc")
+        async with session.get(url, headers=headers) as response:
+            if response.status == 404:
+                logger.warning(f"No /doc folder found in {org}/{repo}")
+                return []
+            if response.status != 200:
+                error_text = await response.text()
+                raise Exception(f"GitHub API error {response.status}: {error_text}")
+            contents = await response.json()
+            # Filter for supported file types
+            supported_extensions = [
+                '.md',       # Markdown
+                '.mmd',      # Mermaid
+                '.mermaid',  # Mermaid
+                '.svg',      # SVG images
+                '.yml',      # YAML (OpenAPI)
+                '.yaml',     # YAML (OpenAPI)
+                '.json'      # JSON (OpenAPI/Postman)
+            ]
+            docs = []
+            skipped = 0
+            for item in contents:
+                # Only process files (not directories)
+                if item["type"] == "file":
+                    name = item["name"]
+                    # Check if file extension is supported
+                    if any(name.lower().endswith(ext) for ext in supported_extensions):
+                        content_type = determine_content_type(name)
+                        docs.append({
+                            "id": item["sha"],
+                            "name": name,
+                            "path": item["path"],
+                            "type": content_type,
+                            "url": item["html_url"],
+                            "download_url": item.get("download_url", ""),
+                        })
+                    else:
+                        skipped += 1
+            logger.info(f"Found {len(docs)} documentation files ({skipped} skipped)")
+            return docs
+async def get_file_content(org: str, repo: str, path: str) -> Dict[str, Any]:
+    """
+    Fetch content of a specific file from GitHub
+    Decodes base64-encoded content returned by GitHub API
+    Args:
+        org: GitHub organization name
+        repo: Repository name
+        path: File path within repository (e.g., "doc/README.md")
+    Returns:
+        Dictionary with file metadata and content:
+        {
+            "name": "README.md",
+            "path": "doc/README.md",
+            "content": "# Documentation\\n\\nThis is...",
+            "encoding": "base64"
+        }
+    Example:
+        content = await get_file_content("anthropics", "sdk", "doc/README.md")
+    """
+    async with aiohttp.ClientSession() as session:
+        headers = create_headers()
+        url = f"{GITHUB_API_BASE}/repos/{org}/{repo}/contents/{path}"
+        logger.info(f"Fetching content: {org}/{repo}/{path}")
+        async with session.get(url, headers=headers) as response:
+            if response.status == 404:
+                raise Exception(f"File not found: {path}")
+            if response.status != 200:
+                error_text = await response.text()
+                raise Exception(f"GitHub API error {response.status}: {error_text}")
+            data = await response.json()
+            # Decode base64 content if present
+            content = ""
+            if "content" in data and data["content"]:
+                try:
+                    # GitHub returns base64-encoded content with newlines
+                    encoded_content = data["content"].replace('\n', '')
+                    content = base64.b64decode(encoded_content).decode('utf-8')
+                    logger.info(f"Decoded content ({len(content)} characters)")
+                except Exception as e:
+                    logger.warning(f"Failed to decode content: {e}")
+                    content = data.get("content", "")
+            return {
+                "name": data["name"],
+                "path": data["path"],
+                "content": content,
+                "encoding": data.get("encoding", "base64")
+            }
+async def search_documentation(org: str, query: str) -> List[Dict[str, Any]]:
+    async with aiohttp.ClientSession() as session:
+        headers = create_headers()
+        search_url = f"{GITHUB_API_BASE}/search/code"
+        params = {
+            "q": f"org:{org} path:/doc {query}",
+            "per_page": SEARCH_RESULTS_LIMIT
+        }
+        logger.info(f"Searching for: '{query}' in {org}")
+        async with session.get(search_url, headers=headers, params=params) as response:
+            if response.status == 403:
+                raise Exception("Search API rate limit exceeded. Try again later.")
+            if response.status != 200:
+                error_text = await response.text()
+                raise Exception(f"GitHub API error {response.status}: {error_text}")
+            data = await response.json()
+            results = []
+            for item in data.get("items", []):
+                repo_info = item.get("repository", {})
+                results.append({
+                    "name": item["name"],
+                    "path": item["path"],
+                    "repository": repo_info.get("name", ""),
+                    "url": item["html_url"],
+                })
+            logger.info(f"Found {len(results)} matching files")
+            return results
+# ============================================================================
+# Gradio MCP Tool Functions
+# ============================================================================
+async def get_org_repos_tool(org: str) -> str:
+    """
+    Fetch all repositories from a GitHub organization with /doc folder detection.
+    This tool uses the GitHub Search API to efficiently find repositories
+    that have a /doc folder, falling back to checking each repo individually
+    if the search API is unavailable.
+    Args:
+        org (str): GitHub organization name (e.g., "microsoft", "anthropics")
+    Returns:
+        str: JSON string containing list of repositories with their metadata
+    """
+    try:
+        result = await get_org_repos(org)
+        return json.dumps(result, indent=2)
+    except Exception as e:
+        return json.dumps({"error": str(e)}, indent=2)
+async def get_repo_docs_tool(org: str, repo: str) -> str:
+    """
+    Get all documentation files from a repository's /doc folder.
+    Filters for supported file types: Markdown, Mermaid, SVG, OpenAPI, Postman.
+    Args:
+        org (str): GitHub organization name
+        repo (str): Repository name
+    Returns:
+        str: JSON string containing list of documentation files with metadata
+    """
+    try:
+        result = await get_repo_docs(org, repo)
+        return json.dumps(result, indent=2)
+    except Exception as e:
+        return json.dumps({"error": str(e)}, indent=2)
+async def get_file_content_tool(org: str, repo: str, path: str) -> str:
+    """
+    Fetch and decode content of a specific file from GitHub.
+    Automatically decodes base64-encoded content returned by GitHub API.
+    Args:
+        org (str): GitHub organization name
+        repo (str): Repository name
+        path (str): File path within repository (e.g., "doc/README.md")
+    Returns:
+        str: JSON string containing file metadata and decoded content
+    """
+    try:
+        result = await get_file_content(org, repo, path)
+        return json.dumps(result, indent=2)
+    except Exception as e:
+        return json.dumps({"error": str(e)}, indent=2)
+async def search_documentation_tool(org: str, query: str) -> str:
+    """
+    Search for documentation files across all repositories in an organization.
+    Uses GitHub Code Search API to find matching files in /doc folders.
+    Args:
+        org (str): GitHub organization name
+        query (str): Search query string (e.g., "authentication", "API", "tutorial")
+    Returns:
+        str: JSON string containing list of matching files with their locations
+    """
+    try:
+        result = await search_documentation(org, query)
+        return json.dumps(result, indent=2)
+    except Exception as e:
+        return json.dumps({"error": str(e)}, indent=2)
+# ============================================================================
+# Gradio Interface
+# ============================================================================
+# Create individual interfaces for each tool
+get_repos_interface = gr.Interface(
+    fn=get_org_repos_tool,
+    inputs=[gr.Textbox(label="Organization", placeholder="e.g., anthropics")],
+    outputs=[gr.Textbox(label="Repositories (JSON)", lines=20)],
+    title="Get Organization Repos",
+    description="Fetch all repositories from a GitHub organization with /doc folder detection",
+)
+get_docs_interface = gr.Interface(
+    fn=get_repo_docs_tool,
+    inputs=[
+        gr.Textbox(label="Organization", placeholder="e.g., anthropics"),
+        gr.Textbox(label="Repository", placeholder="e.g., anthropic-sdk-python"),
+    ],
+    outputs=[gr.Textbox(label="Documentation Files (JSON)", lines=20)],
+    title="Get Repository Docs",
+    description="Get all documentation files from a repository's /doc folder",
+)
+get_content_interface = gr.Interface(
+    fn=get_file_content_tool,
+    inputs=[
+        gr.Textbox(label="Organization", placeholder="e.g., anthropics"),
+        gr.Textbox(label="Repository", placeholder="e.g., anthropic-sdk-python"),
+        gr.Textbox(label="File Path", placeholder="e.g., doc/README.md"),
+    ],
+    outputs=[gr.Textbox(label="File Content (JSON)", lines=20)],
+    title="Get File Content",
+    description="Fetch and decode content of a specific file from GitHub",
+)
+search_docs_interface = gr.Interface(
+    fn=search_documentation_tool,
+    inputs=[
+        gr.Textbox(label="Organization", placeholder="e.g., anthropics"),
+        gr.Textbox(label="Search Query", placeholder="e.g., streaming"),
+    ],
+    outputs=[gr.Textbox(label="Search Results (JSON)", lines=20)],
+    title="Search Documentation",
+    description="Search for documentation files across all repositories in an organization",
+)
+# Combine into tabbed interface
+demo = gr.TabbedInterface(
+    [get_repos_interface, get_docs_interface, get_content_interface, search_docs_interface],
+    ["Get Repos", "Get Docs", "Get Content", "Search"],
+    title="GitHub MCP Server",
+)
+# ============================================================================
+# Main Entry Point
+# ============================================================================
+if __name__ == "__main__":
+    demo.launch(mcp_server=True, server_name="0.0.0.0", server_port=7860)