| """ |
| GitHub Code Search Tool - Search code across GitHub with advanced filtering |
| |
| Find code patterns using regex and glob filters for repositories and file paths. |
| """ |
|
|
| import fnmatch |
| import os |
| import re |
| from typing import Any, Dict, Optional |
|
|
| import requests |
|
|
| from agent.tools.types import ToolResult |
|
|
|
|
| def _glob_match(text: str, pattern: str) -> bool: |
| """Check if text matches glob pattern, supporting ** for multi-level paths""" |
| if "**" in pattern: |
| regex_pattern = pattern.replace("**", "<<<DOUBLESTAR>>>") |
| regex_pattern = fnmatch.translate(regex_pattern) |
| regex_pattern = regex_pattern.replace("<<<DOUBLESTAR>>>", ".*") |
| return re.match(regex_pattern, text) is not None |
| return fnmatch.fnmatch(text, pattern) |
|
|
|
|
| def search_code( |
| query: str, |
| repo_glob: Optional[str] = None, |
| path_glob: Optional[str] = None, |
| regex: bool = False, |
| max_results: int = 20, |
| ) -> ToolResult: |
| """ |
| Search for code across GitHub with glob filtering. |
| |
| Args: |
| query: Search term or pattern to find in code |
| repo_glob: Glob pattern to filter repositories (e.g., "github/*", "*/react") |
| path_glob: Glob pattern to filter file paths (e.g., "*.py", "src/**/*.js") |
| regex: If True, treat query as regular expression |
| max_results: Maximum number of results to return (default 20) |
| |
| Returns: |
| ToolResult with code matches and snippets |
| """ |
| token = os.environ.get("GITHUB_TOKEN") |
| if not token: |
| return { |
| "formatted": "Error: GITHUB_TOKEN environment variable is required", |
| "totalResults": 0, |
| "resultsShared": 0, |
| "isError": True, |
| } |
|
|
| |
| query_parts = [] |
|
|
| if regex: |
| query_parts.append(f"/{query}/") |
| else: |
| query_parts.append(f'"{query}"' if " " in query else query) |
|
|
| |
| if repo_glob: |
| if "/" in repo_glob: |
| query_parts.append(f"repo:{repo_glob}") |
| else: |
| query_parts.append(f"user:{repo_glob}") |
|
|
| |
| if path_glob: |
| if "*" not in path_glob and "?" not in path_glob: |
| query_parts.append(f"path:{path_glob}") |
| elif path_glob.startswith("*."): |
| ext = path_glob[2:] |
| query_parts.append(f"extension:{ext}") |
| elif "/" not in path_glob and "*" in path_glob: |
| query_parts.append(f"filename:{path_glob}") |
| else: |
| |
| ext_match = re.search(r"\*\.(\w+)", path_glob) |
| if ext_match: |
| query_parts.append(f"extension:{ext_match.group(1)}") |
|
|
| github_query = " ".join(query_parts) |
|
|
| headers = { |
| "Accept": "application/vnd.github.text-match+json", |
| "X-GitHub-Api-Version": "2022-11-28", |
| "Authorization": f"Bearer {token}", |
| } |
|
|
| all_matches = [] |
| page = 1 |
| per_page = min(100, max_results) |
|
|
| try: |
| while len(all_matches) < max_results: |
| params = { |
| "q": github_query, |
| "page": page, |
| "per_page": per_page, |
| } |
|
|
| response = requests.get( |
| "https://api.github.com/search/code", |
| headers=headers, |
| params=params, |
| timeout=30, |
| ) |
|
|
| if response.status_code == 403: |
| error_data = response.json() |
| return { |
| "formatted": f"GitHub API rate limit or permission error: {error_data.get('message', 'Unknown error')}", |
| "totalResults": 0, |
| "resultsShared": 0, |
| "isError": True, |
| } |
|
|
| if response.status_code != 200: |
| error_msg = f"GitHub API error (status {response.status_code})" |
| try: |
| error_data = response.json() |
| if "message" in error_data: |
| error_msg += f": {error_data['message']}" |
| except Exception: |
| pass |
| return { |
| "formatted": error_msg, |
| "totalResults": 0, |
| "resultsShared": 0, |
| "isError": True, |
| } |
|
|
| data = response.json() |
| items = data.get("items", []) |
|
|
| if not items: |
| break |
|
|
| for item in items: |
| repo_name = item.get("repository", {}).get("full_name", "unknown") |
| file_path = item.get("path", "") |
| sha = item.get("sha", "") |
|
|
| |
| if repo_glob and not _glob_match(repo_name, repo_glob): |
| continue |
| if path_glob and not _glob_match(file_path, path_glob): |
| continue |
|
|
| |
| text_matches = item.get("text_matches", []) |
| if text_matches: |
| for text_match in text_matches: |
| fragment = text_match.get("fragment", "") |
| lines = fragment.split("\n") |
| line_count = len([line for line in lines if line.strip()]) |
|
|
| all_matches.append( |
| { |
| "repo": repo_name, |
| "path": file_path, |
| "ref": sha, |
| "line_start": 1, |
| "line_end": line_count, |
| "snippet": fragment.strip(), |
| "url": item.get("html_url", ""), |
| } |
| ) |
| else: |
| all_matches.append( |
| { |
| "repo": repo_name, |
| "path": file_path, |
| "ref": sha, |
| "line_start": 1, |
| "line_end": 1, |
| "snippet": "(snippet not available)", |
| "url": item.get("html_url", ""), |
| } |
| ) |
|
|
| if len(all_matches) >= data.get("total_count", 0): |
| break |
|
|
| page += 1 |
|
|
| except requests.exceptions.RequestException as e: |
| return { |
| "formatted": f"Failed to connect to GitHub API: {str(e)}", |
| "totalResults": 0, |
| "resultsShared": 0, |
| "isError": True, |
| } |
|
|
| results = all_matches[:max_results] |
|
|
| if not results: |
| return { |
| "formatted": f"No code matches found for query: {query}", |
| "totalResults": 0, |
| "resultsShared": 0, |
| } |
|
|
| |
| lines_output = [f"**Found {len(results)} code matches:**\n"] |
|
|
| for i, match in enumerate(results, 1): |
| lines_output.append(f"{i}. **{match['repo']}:{match['path']}**") |
| lines_output.append( |
| f" Lines: {match['line_start']}-{match['line_end']} | Ref: {match['ref'][:7]}" |
| ) |
| lines_output.append(f" URL: {match['url']}") |
|
|
| |
| snippet_lines = match["snippet"].split("\n")[:5] |
| if snippet_lines: |
| lines_output.append(" ```") |
| for line in snippet_lines: |
| lines_output.append(f" {line}") |
| if len(match["snippet"].split("\n")) > 5: |
| lines_output.append(" ...") |
| lines_output.append(" ```") |
| lines_output.append("") |
|
|
| return { |
| "formatted": "\n".join(lines_output), |
| "totalResults": len(results), |
| "resultsShared": len(results), |
| } |
|
|
|
|
| |
| GITHUB_SEARCH_CODE_TOOL_SPEC = { |
| "name": "search_code", |
| "description": ( |
| "Search for code patterns across GitHub with advanced glob filtering.\n\n" |
| "Features:\n" |
| "- Text or regex search\n" |
| "- Repository glob patterns (e.g., 'github/*', '*/react')\n" |
| "- File path glob patterns (e.g., '*.py', 'src/**/*.js')\n" |
| "- Returns code snippets with line numbers\n" |
| "- Direct URLs to matches\n\n" |
| "## Examples:\n\n" |
| "**Search for Python function definitions:**\n" |
| "{'query': 'def search', 'path_glob': '*.py', 'max_results': 10}\n\n" |
| "**Search for TODO comments in specific org:**\n" |
| "{'query': 'TODO', 'repo_glob': 'github/*', 'max_results': 5}\n\n" |
| "**Regex search for test functions:**\n" |
| "{'query': r'func Test\\w+', 'path_glob': '*.go', 'regex': True}\n\n" |
| "**Search in specific repo with path filter:**\n" |
| "{'query': 'SearchCode', 'repo_glob': 'github/github-mcp-server', 'path_glob': '*.go'}\n\n" |
| "**Find imports in TypeScript files:**\n" |
| "{'query': 'import', 'path_glob': 'src/**/*.ts', 'repo_glob': 'facebook/*'}\n\n" |
| "Perfect for finding code patterns, learning from examples, or exploring implementations." |
| ), |
| "parameters": { |
| "type": "object", |
| "properties": { |
| "query": { |
| "type": "string", |
| "description": "Search term or pattern to find in code. Required.", |
| }, |
| "repo_glob": { |
| "type": "string", |
| "description": "Glob pattern to filter repositories (e.g., 'github/*', '*/react'). Optional.", |
| }, |
| "path_glob": { |
| "type": "string", |
| "description": "Glob pattern to filter file paths (e.g., '*.py', 'src/**/*.js'). Optional.", |
| }, |
| "regex": { |
| "type": "boolean", |
| "description": "If true, treat query as regular expression. Default: false.", |
| }, |
| "max_results": { |
| "type": "integer", |
| "description": "Maximum number of results to return. Default: 20.", |
| }, |
| }, |
| "required": ["query"], |
| }, |
| } |
|
|
|
|
| async def github_search_code_handler(arguments: Dict[str, Any]) -> tuple[str, bool]: |
| """Handler for agent tool router""" |
| try: |
| result = search_code( |
| query=arguments["query"], |
| repo_glob=arguments.get("repo_glob"), |
| path_glob=arguments.get("path_glob"), |
| regex=arguments.get("regex", False), |
| max_results=arguments.get("max_results", 20), |
| ) |
| return result["formatted"], not result.get("isError", False) |
| except Exception as e: |
| return f"Error searching code: {str(e)}", False |
|
|