| | """
|
| | GitHub Read File Tool - Read file contents from any GitHub repository with line range support
|
| |
|
| | Fetch exact file contents with metadata, supporting line ranges for efficient reading.
|
| | """
|
| |
|
| | import base64
|
| | import json
|
| | import os
|
| | from typing import Any, Dict, Optional
|
| |
|
| | import nbformat
|
| | import requests
|
| | from nbconvert import MarkdownExporter
|
| | from nbconvert.preprocessors import ClearOutputPreprocessor, TagRemovePreprocessor
|
| |
|
| | from agent.tools.types import ToolResult
|
| |
|
| |
|
| | def _convert_ipynb_to_markdown(content: str) -> str:
|
| | """
|
| | Convert Jupyter notebook JSON to LLM-friendly Markdown.
|
| |
|
| | Args:
|
| | content: Raw notebook JSON string
|
| |
|
| | Returns:
|
| | Converted Markdown string
|
| | """
|
| | try:
|
| |
|
| | nb_dict = json.loads(content)
|
| |
|
| |
|
| | if "cells" in nb_dict:
|
| | for cell in nb_dict["cells"]:
|
| | if "source" in cell and isinstance(cell["source"], list):
|
| | cell["source"] = "".join(cell["source"])
|
| |
|
| |
|
| | nb = nbformat.reads(json.dumps(nb_dict), as_version=4)
|
| |
|
| |
|
| | clear = ClearOutputPreprocessor()
|
| | nb, _ = clear.preprocess(nb, {})
|
| |
|
| |
|
| | remove = TagRemovePreprocessor(
|
| | remove_cell_tags={"hide", "hidden", "remove"},
|
| | remove_input_tags=set(),
|
| | remove_all_outputs_tags=set(),
|
| | )
|
| | nb, _ = remove.preprocess(nb, {})
|
| |
|
| |
|
| | exporter = MarkdownExporter()
|
| | markdown, _ = exporter.from_notebook_node(nb)
|
| |
|
| | return markdown
|
| |
|
| | except json.JSONDecodeError:
|
| | return content
|
| | except Exception:
|
| | return content
|
| |
|
| |
|
| | def read_file(
|
| | repo: str,
|
| | path: str,
|
| | ref: str = "HEAD",
|
| | line_start: Optional[int] = None,
|
| | line_end: Optional[int] = None,
|
| | ) -> ToolResult:
|
| | """
|
| | Read file contents from a GitHub repository with line range support.
|
| |
|
| | Args:
|
| | repo: Repository in format "owner/repo" (e.g., "github/github-mcp-server")
|
| | path: Path to file in repository (e.g., "pkg/github/search.go")
|
| | ref: Git reference - branch name, tag, or commit SHA (default: "HEAD")
|
| | line_start: Starting line number (1-indexed, inclusive)
|
| | line_end: Ending line number (1-indexed, inclusive)
|
| |
|
| | Returns:
|
| | ToolResult with file contents and metadata
|
| | """
|
| | token = os.environ.get("GITHUB_TOKEN")
|
| | if not token:
|
| | return {
|
| | "formatted": "Error: GITHUB_TOKEN environment variable is required",
|
| | "totalResults": 0,
|
| | "resultsShared": 0,
|
| | "isError": True,
|
| | }
|
| |
|
| |
|
| | if "/" not in repo:
|
| | return {
|
| | "formatted": "Error: repo must be in format 'owner/repo'",
|
| | "totalResults": 0,
|
| | "resultsShared": 0,
|
| | "isError": True,
|
| | }
|
| |
|
| | owner, repo_name = repo.split("/", 1)
|
| |
|
| | headers = {
|
| | "Accept": "application/vnd.github+json",
|
| | "X-GitHub-Api-Version": "2022-11-28",
|
| | "Authorization": f"Bearer {token}",
|
| | }
|
| |
|
| |
|
| | url = f"https://api.github.com/repos/{owner}/{repo_name}/contents/{path}"
|
| | params = {}
|
| | if ref and ref != "HEAD":
|
| | params["ref"] = ref
|
| |
|
| | try:
|
| | response = requests.get(url, headers=headers, params=params, timeout=30)
|
| |
|
| | if response.status_code == 404:
|
| | return {
|
| | "formatted": f"File not found: {path} in {repo} (ref: {ref})",
|
| | "totalResults": 0,
|
| | "resultsShared": 0,
|
| | "isError": True,
|
| | }
|
| |
|
| | if response.status_code != 200:
|
| | error_msg = f"GitHub API error (status {response.status_code})"
|
| | try:
|
| | error_data = response.json()
|
| | if "message" in error_data:
|
| | error_msg += f": {error_data['message']}"
|
| | except Exception:
|
| | pass
|
| | return {
|
| | "formatted": error_msg,
|
| | "totalResults": 0,
|
| | "resultsShared": 0,
|
| | "isError": True,
|
| | }
|
| |
|
| | data = response.json()
|
| |
|
| |
|
| | if data.get("type") != "file":
|
| | return {
|
| | "formatted": f"Path {path} is not a file (type: {data.get('type')})",
|
| | "totalResults": 0,
|
| | "resultsShared": 0,
|
| | "isError": True,
|
| | }
|
| |
|
| |
|
| | content_b64 = data.get("content", "")
|
| | if content_b64:
|
| | content_b64 = content_b64.replace("\n", "").replace(" ", "")
|
| | content = base64.b64decode(content_b64).decode("utf-8", errors="replace")
|
| | else:
|
| |
|
| | raw_headers = {
|
| | "Accept": "application/vnd.github.raw",
|
| | "X-GitHub-Api-Version": "2022-11-28",
|
| | "Authorization": f"Bearer {token}",
|
| | }
|
| | raw_response = requests.get(
|
| | url, headers=raw_headers, params=params, timeout=30
|
| | )
|
| | if raw_response.status_code != 200:
|
| | return {
|
| | "formatted": "Failed to fetch file content",
|
| | "totalResults": 0,
|
| | "resultsShared": 0,
|
| | "isError": True,
|
| | }
|
| | content = raw_response.text
|
| |
|
| | if path.lower().endswith(".ipynb"):
|
| | content = _convert_ipynb_to_markdown(content)
|
| |
|
| |
|
| | lines = content.split("\n")
|
| | total_lines = len(lines)
|
| |
|
| | truncated = False
|
| |
|
| | if line_start is None and line_end is None:
|
| |
|
| | if total_lines > 300:
|
| | line_start = 1
|
| | line_end = 300
|
| | truncated = True
|
| | else:
|
| | line_start = 1
|
| | line_end = total_lines
|
| | else:
|
| |
|
| | if line_start is None:
|
| | line_start = 1
|
| | if line_end is None:
|
| | line_end = total_lines
|
| |
|
| |
|
| | line_start = max(1, line_start)
|
| | line_end = min(total_lines, line_end)
|
| | if line_start > line_end:
|
| | return {
|
| | "formatted": f"Invalid range: line_start ({line_start}) > line_end ({line_end})",
|
| | "totalResults": 0,
|
| | "resultsShared": 0,
|
| | "isError": True,
|
| | }
|
| |
|
| |
|
| | selected_lines = lines[line_start - 1 : line_end]
|
| | selected_content = "\n".join(selected_lines)
|
| |
|
| |
|
| | lines_output = [f"**Reading file from repo: {repo}, path: {path}**"]
|
| |
|
| | if ref and ref != "HEAD":
|
| | lines_output.append(f"Ref: {ref}")
|
| |
|
| | lines_output.append("\n**File content:")
|
| | lines_output.append("```")
|
| | lines_output.append(selected_content)
|
| | lines_output.append("```")
|
| | if truncated:
|
| | lines_output.append(
|
| | f"Currently showing lines {line_start}-{line_end} out of {total_lines} total lines. Use line_start and line_end to view more lines."
|
| | )
|
| | return {
|
| | "formatted": "\n".join(lines_output),
|
| | "totalResults": 1,
|
| | "resultsShared": 1,
|
| | }
|
| |
|
| | except requests.exceptions.RequestException as e:
|
| | return {
|
| | "formatted": f"Failed to connect to GitHub API: {str(e)}",
|
| | "totalResults": 0,
|
| | "resultsShared": 0,
|
| | "isError": True,
|
| | }
|
| |
|
| |
|
| |
|
| | GITHUB_READ_FILE_TOOL_SPEC = {
|
| | "name": "github_read_file",
|
| | "description": (
|
| | "Read file contents from GitHub repositories with line range support (default 300 lines). "
|
| | "⚠️ CRITICAL: Use AFTER github_find_examples to study working implementation code. "
|
| | "**Use when:** (1) Found example file via github_find_examples and need full code, "
|
| | "(2) Need to read trainer class implementation, (3) Study configuration patterns, "
|
| | "(4) Read specific code sections with line ranges, (5) Review code from specific branches/commits. "
|
| | "**Pattern:** github_find_examples (discover files) → github_read_file (read code) → implement using researched patterns. "
|
| | "Returns: File contents with line numbers, formatted for LLM reading. Auto-converts Jupyter notebooks to markdown. "
|
| | "**Then:** Implement using patterns and APIs from the example code. "
|
| | "**Critical for reliability:** Reading working examples prevents API errors and shows current best practices. "
|
| | "Use line_start/line_end for large files (>300 lines) to read specific sections.\n\n"
|
| | "## When to use this tool\n\n"
|
| | "- When reading example code, trainer implementations, or configuration files\n"
|
| | "- After github_find_examples returns file paths you want to study\n"
|
| | "- When investigating specific code sections with line ranges\n"
|
| | "- When reading from specific branches, tags, or commits (use ref parameter)\n\n"
|
| | "## When NOT to use this tool\n\n"
|
| | "- When you don't know exact file path (use github_find_examples or github_search_code first)\n"
|
| | "- When searching for code patterns across repos (use github_search_code instead)\n\n"
|
| | "## Examples\n\n"
|
| | "<example>\n"
|
| | "// ML Workflow Step: Read GRPO trainer class after finding via github_find_examples\n"
|
| | "// Use case: Understand GRPOTrainer API, parameters, and methods\n"
|
| | "{\n"
|
| | " repo: 'huggingface/trl',\n"
|
| | " path: 'trl/trainer/grpo_trainer.py',\n"
|
| | " line_start: 1,\n"
|
| | " line_end: 200\n"
|
| | "}\n"
|
| | "// Read class definition and constructor to understand current API\n"
|
| | "// Shows: __init__ parameters, configuration, required arguments\n"
|
| | "</example>\n\n"
|
| | "<example>\n"
|
| | "// ML Workflow Step: Study complete training script from examples\n"
|
| | "// Use case: Learn end-to-end VLM fine-tuning workflow\n"
|
| | "{\n"
|
| | " repo: 'huggingface/trl',\n"
|
| | " path: 'examples/scripts/grpo_vlm.py'\n"
|
| | "}\n"
|
| | "// Returns first 300 lines - shows full training setup\n"
|
| | "// Use line_start/line_end if need to read more\n"
|
| | "</example>\n\n"
|
| | "<example>\n"
|
| | "// ML Workflow Step: Check TrainingArguments configuration patterns\n"
|
| | "// Use case: Learn how to structure training configs correctly\n"
|
| | "{\n"
|
| | " repo: 'huggingface/transformers',\n"
|
| | " path: 'examples/pytorch/language-modeling/run_clm.py',\n"
|
| | " line_start: 50,\n"
|
| | " line_end: 150\n"
|
| | "}\n"
|
| | "// Read argument parsing and config setup section\n"
|
| | "// Shows: current parameter names, default values, best practices\n"
|
| | "</example>"
|
| | ),
|
| | "parameters": {
|
| | "type": "object",
|
| | "properties": {
|
| | "repo": {
|
| | "type": "string",
|
| | "description": "Repository in format 'owner/repo' (e.g., 'github/github-mcp-server'). Required.",
|
| | },
|
| | "path": {
|
| | "type": "string",
|
| | "description": "Path to file in repository (e.g., 'src/index.js'). Required.",
|
| | },
|
| | "ref": {
|
| | "type": "string",
|
| | "description": "Git reference - branch name, tag, or commit SHA. Default: 'HEAD'.",
|
| | },
|
| | "line_start": {
|
| | "type": "integer",
|
| | "description": "Starting line number (1-indexed, inclusive). Optional.",
|
| | },
|
| | "line_end": {
|
| | "type": "integer",
|
| | "description": "Ending line number (1-indexed, inclusive). Optional.",
|
| | },
|
| | },
|
| | "required": ["repo", "path"],
|
| | },
|
| | }
|
| |
|
| |
|
| | async def github_read_file_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
|
| | """Handler for agent tool router"""
|
| | try:
|
| | result = read_file(
|
| | repo=arguments["repo"],
|
| | path=arguments["path"],
|
| | ref=arguments.get("ref", "HEAD"),
|
| | line_start=arguments.get("line_start"),
|
| | line_end=arguments.get("line_end"),
|
| | )
|
| | return result["formatted"], not result.get("isError", False)
|
| | except Exception as e:
|
| | return f"Error reading file: {str(e)}", False
|
| |
|