| """ |
| GitHub Read File Tool - Read file contents from any GitHub repository with line range support |
| |
| Fetch exact file contents with metadata, supporting line ranges for efficient reading. |
| """ |
|
|
| import base64 |
| import json |
| import os |
| from typing import Any, Dict, Optional |
|
|
| import nbformat |
| import requests |
| from nbconvert import MarkdownExporter |
| from nbconvert.preprocessors import ClearOutputPreprocessor, TagRemovePreprocessor |
|
|
| from agent.tools.types import ToolResult |
|
|
|
|
| def _convert_ipynb_to_markdown(content: str) -> str: |
| """ |
| Convert Jupyter notebook JSON to LLM-friendly Markdown. |
| |
| Args: |
| content: Raw notebook JSON string |
| |
| Returns: |
| Converted Markdown string |
| """ |
| try: |
| |
| nb_dict = json.loads(content) |
|
|
| |
| if "cells" in nb_dict: |
| for cell in nb_dict["cells"]: |
| if "source" in cell and isinstance(cell["source"], list): |
| cell["source"] = "".join(cell["source"]) |
|
|
| |
| nb = nbformat.reads(json.dumps(nb_dict), as_version=4) |
|
|
| |
| clear = ClearOutputPreprocessor() |
| nb, _ = clear.preprocess(nb, {}) |
|
|
| |
| remove = TagRemovePreprocessor( |
| remove_cell_tags={"hide", "hidden", "remove"}, |
| remove_input_tags=set(), |
| remove_all_outputs_tags=set(), |
| ) |
| nb, _ = remove.preprocess(nb, {}) |
|
|
| |
| exporter = MarkdownExporter() |
| markdown, _ = exporter.from_notebook_node(nb) |
|
|
| return markdown |
|
|
| except json.JSONDecodeError: |
| return content |
| except Exception: |
| return content |
|
|
|
|
| def read_file( |
| repo: str, |
| path: str, |
| ref: str = "HEAD", |
| line_start: Optional[int] = None, |
| line_end: Optional[int] = None, |
| ) -> ToolResult: |
| """ |
| Read file contents from a GitHub repository with line range support. |
| |
| Args: |
| repo: Repository in format "owner/repo" (e.g., "github/github-mcp-server") |
| path: Path to file in repository (e.g., "pkg/github/search.go") |
| ref: Git reference - branch name, tag, or commit SHA (default: "HEAD") |
| line_start: Starting line number (1-indexed, inclusive) |
| line_end: Ending line number (1-indexed, inclusive) |
| |
| Returns: |
| ToolResult with file contents and metadata |
| """ |
| token = os.environ.get("GITHUB_TOKEN") |
| if not token: |
| return { |
| "formatted": "Error: GITHUB_TOKEN environment variable is required", |
| "totalResults": 0, |
| "resultsShared": 0, |
| "isError": True, |
| } |
|
|
| |
| if "/" not in repo: |
| return { |
| "formatted": "Error: repo must be in format 'owner/repo'", |
| "totalResults": 0, |
| "resultsShared": 0, |
| "isError": True, |
| } |
|
|
| owner, repo_name = repo.split("/", 1) |
|
|
| headers = { |
| "Accept": "application/vnd.github+json", |
| "X-GitHub-Api-Version": "2022-11-28", |
| "Authorization": f"Bearer {token}", |
| } |
|
|
| |
| url = f"https://api.github.com/repos/{owner}/{repo_name}/contents/{path}" |
| params = {} |
| if ref and ref != "HEAD": |
| params["ref"] = ref |
|
|
| try: |
| response = requests.get(url, headers=headers, params=params, timeout=30) |
|
|
| if response.status_code == 404: |
| return { |
| "formatted": f"File not found: {path} in {repo} (ref: {ref})", |
| "totalResults": 0, |
| "resultsShared": 0, |
| "isError": True, |
| } |
|
|
| if response.status_code != 200: |
| error_msg = f"GitHub API error (status {response.status_code})" |
| try: |
| error_data = response.json() |
| if "message" in error_data: |
| error_msg += f": {error_data['message']}" |
| except Exception: |
| pass |
| return { |
| "formatted": error_msg, |
| "totalResults": 0, |
| "resultsShared": 0, |
| "isError": True, |
| } |
|
|
| data = response.json() |
|
|
| |
| if data.get("type") != "file": |
| return { |
| "formatted": f"Path {path} is not a file (type: {data.get('type')})", |
| "totalResults": 0, |
| "resultsShared": 0, |
| "isError": True, |
| } |
|
|
| |
| content_b64 = data.get("content", "") |
| if content_b64: |
| content_b64 = content_b64.replace("\n", "").replace(" ", "") |
| content = base64.b64decode(content_b64).decode("utf-8", errors="replace") |
| else: |
| |
| raw_headers = { |
| "Accept": "application/vnd.github.raw", |
| "X-GitHub-Api-Version": "2022-11-28", |
| "Authorization": f"Bearer {token}", |
| } |
| raw_response = requests.get( |
| url, headers=raw_headers, params=params, timeout=30 |
| ) |
| if raw_response.status_code != 200: |
| return { |
| "formatted": "Failed to fetch file content", |
| "totalResults": 0, |
| "resultsShared": 0, |
| "isError": True, |
| } |
| content = raw_response.text |
|
|
| if path.lower().endswith(".ipynb"): |
| content = _convert_ipynb_to_markdown(content) |
|
|
| |
| lines = content.split("\n") |
| total_lines = len(lines) |
|
|
| truncated = False |
|
|
| if line_start is None and line_end is None: |
| |
| if total_lines > 300: |
| line_start = 1 |
| line_end = 300 |
| truncated = True |
| else: |
| line_start = 1 |
| line_end = total_lines |
| else: |
| |
| if line_start is None: |
| line_start = 1 |
| if line_end is None: |
| line_end = total_lines |
|
|
| |
| line_start = max(1, line_start) |
| line_end = min(total_lines, line_end) |
| if line_start > line_end: |
| return { |
| "formatted": f"Invalid range: line_start ({line_start}) > line_end ({line_end})", |
| "totalResults": 0, |
| "resultsShared": 0, |
| "isError": True, |
| } |
|
|
| |
| selected_lines = lines[line_start - 1 : line_end] |
| selected_content = "\n".join(selected_lines) |
|
|
| |
| lines_output = [f"**Reading file from repo: {repo}, path: {path}**"] |
|
|
| if ref and ref != "HEAD": |
| lines_output.append(f"Ref: {ref}") |
|
|
| lines_output.append("\n**File content:") |
| lines_output.append("```") |
| lines_output.append(selected_content) |
| lines_output.append("```") |
| if truncated: |
| lines_output.append( |
| f"Currently showing lines {line_start}-{line_end} out of {total_lines} total lines. Use line_start and line_end to view more lines." |
| ) |
| return { |
| "formatted": "\n".join(lines_output), |
| "totalResults": 1, |
| "resultsShared": 1, |
| } |
|
|
| except requests.exceptions.RequestException as e: |
| return { |
| "formatted": f"Failed to connect to GitHub API: {str(e)}", |
| "totalResults": 0, |
| "resultsShared": 0, |
| "isError": True, |
| } |
|
|
|
|
| |
| GITHUB_READ_FILE_TOOL_SPEC = { |
| "name": "github_read_file", |
| "description": ( |
| "Read file contents from GitHub repositories. Returns first 300 lines by default. " |
| "Auto-converts Jupyter notebooks to markdown.\n\n" |
| "Use AFTER github_find_examples to study the working implementation. " |
| "The purpose is to learn current API patterns — imports, trainer configs, dataset handling — " |
| "so your implementation uses correct, up-to-date code.\n\n" |
| "Use line_start/line_end for large files (>300 lines) to read specific sections.\n\n" |
| "When NOT to use: when you don't know the file path (use github_find_examples first)." |
| ), |
| "parameters": { |
| "type": "object", |
| "properties": { |
| "repo": { |
| "type": "string", |
| "description": "Repository in format 'owner/repo' (e.g., 'github/github-mcp-server'). Required.", |
| }, |
| "path": { |
| "type": "string", |
| "description": "Path to file in repository (e.g., 'src/index.js'). Required.", |
| }, |
| "ref": { |
| "type": "string", |
| "description": "Git reference - branch name, tag, or commit SHA. Default: 'HEAD'.", |
| }, |
| "line_start": { |
| "type": "integer", |
| "description": "Starting line number (1-indexed, inclusive). Optional.", |
| }, |
| "line_end": { |
| "type": "integer", |
| "description": "Ending line number (1-indexed, inclusive). Optional.", |
| }, |
| }, |
| "required": ["repo", "path"], |
| }, |
| } |
|
|
|
|
| async def github_read_file_handler(arguments: Dict[str, Any]) -> tuple[str, bool]: |
| """Handler for agent tool router""" |
| try: |
| result = read_file( |
| repo=arguments["repo"], |
| path=arguments["path"], |
| ref=arguments.get("ref", "HEAD"), |
| line_start=arguments.get("line_start"), |
| line_end=arguments.get("line_end"), |
| ) |
| return result["formatted"], not result.get("isError", False) |
| except Exception as e: |
| return f"Error reading file: {str(e)}", False |
|
|