Spaces:

smolagents
/

ml-agent

Running

App Files Files Community

akseljoonas HF Staff commited on Jan 4

Commit

63a4db3

1 Parent(s): 53d0a89

github tools updated

Browse files

Files changed (6) hide show

agent/core/tools.py +32 -23
agent/tools/__init__.py +16 -24
agent/tools/github_find_examples.py +196 -383
agent/tools/github_list_repos.py +149 -233
agent/tools/github_read_file.py +178 -304
agent/tools/github_search_code.py +205 -285

agent/core/tools.py CHANGED Viewed

@@ -20,12 +20,21 @@ from agent.tools.docs_tools import (
     hf_docs_fetch_handler,
 )
 from agent.tools.github_find_examples import (
-    FIND_EXAMPLES_TOOL_SPEC,
-    find_examples_handler,
 )
-from agent.tools.github_list_repos import LIST_REPOS_TOOL_SPEC, list_repos_handler
-from agent.tools.github_read_file import READ_FILE_TOOL_SPEC, read_file_handler
-from agent.tools.github_search_code import SEARCH_CODE_TOOL_SPEC, search_code_handler
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 from agent.tools.private_hf_repo_tools import (
@@ -231,7 +240,7 @@ class ToolRouter:
 def create_builtin_tools() -> list[ToolSpec]:
     """Create built-in tool specifications"""
     print(
-        f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}, {PRIVATE_HF_REPO_TOOL_SPEC['name']}, {UTILS_TOOL_SPEC['name']}, {FIND_EXAMPLES_TOOL_SPEC['name']}, {READ_FILE_TOOL_SPEC['name']}, {LIST_REPOS_TOOL_SPEC['name']}, {SEARCH_CODE_TOOL_SPEC['name']}"
     )
     # in order of importance
     return [
@@ -273,29 +282,29 @@ def create_builtin_tools() -> list[ToolSpec]:
             parameters=UTILS_TOOL_SPEC["parameters"],
             handler=utils_handler,
         ),
-        # GitHub tools - 4 separate tools
         ToolSpec(
-            name=FIND_EXAMPLES_TOOL_SPEC["name"],
-            description=FIND_EXAMPLES_TOOL_SPEC["description"],
-            parameters=FIND_EXAMPLES_TOOL_SPEC["parameters"],
-            handler=find_examples_handler,
         ),
         ToolSpec(
-            name=READ_FILE_TOOL_SPEC["name"],
-            description=READ_FILE_TOOL_SPEC["description"],
-            parameters=READ_FILE_TOOL_SPEC["parameters"],
-            handler=read_file_handler,
         ),
         ToolSpec(
-            name=LIST_REPOS_TOOL_SPEC["name"],
-            description=LIST_REPOS_TOOL_SPEC["description"],
-            parameters=LIST_REPOS_TOOL_SPEC["parameters"],
-            handler=list_repos_handler,
         ),
         ToolSpec(
-            name=SEARCH_CODE_TOOL_SPEC["name"],
-            description=SEARCH_CODE_TOOL_SPEC["description"],
-            parameters=SEARCH_CODE_TOOL_SPEC["parameters"],
-            handler=search_code_handler,
         ),
     ]

     hf_docs_fetch_handler,
 )
 from agent.tools.github_find_examples import (
+    GITHUB_FIND_EXAMPLES_TOOL_SPEC,
+    github_find_examples_handler,
+)
+from agent.tools.github_list_repos import (
+    GITHUB_LIST_REPOS_TOOL_SPEC,
+    github_list_repos_handler,
+)
+from agent.tools.github_read_file import (
+    GITHUB_READ_FILE_TOOL_SPEC,
+    github_read_file_handler,
+)
+from agent.tools.github_search_code import (
+    GITHUB_SEARCH_CODE_TOOL_SPEC,
+    github_search_code_handler,
 )
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 from agent.tools.private_hf_repo_tools import (
 def create_builtin_tools() -> list[ToolSpec]:
     """Create built-in tool specifications"""
     print(
+        f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}, {PRIVATE_HF_REPO_TOOL_SPEC['name']}, {UTILS_TOOL_SPEC['name']}, {GITHUB_SEARCH_CODE_TOOL_SPEC['name']}, {GITHUB_FIND_EXAMPLES_TOOL_SPEC['name']}, {GITHUB_LIST_REPOS_TOOL_SPEC['name']}, {GITHUB_READ_FILE_TOOL_SPEC['name']}"
     )
     # in order of importance
     return [
             parameters=UTILS_TOOL_SPEC["parameters"],
             handler=utils_handler,
         ),
+        # GitHub tools
         ToolSpec(
+            name=GITHUB_SEARCH_CODE_TOOL_SPEC["name"],
+            description=GITHUB_SEARCH_CODE_TOOL_SPEC["description"],
+            parameters=GITHUB_SEARCH_CODE_TOOL_SPEC["parameters"],
+            handler=github_search_code_handler,
         ),
         ToolSpec(
+            name=GITHUB_FIND_EXAMPLES_TOOL_SPEC["name"],
+            description=GITHUB_FIND_EXAMPLES_TOOL_SPEC["description"],
+            parameters=GITHUB_FIND_EXAMPLES_TOOL_SPEC["parameters"],
+            handler=github_find_examples_handler,
         ),
         ToolSpec(
+            name=GITHUB_LIST_REPOS_TOOL_SPEC["name"],
+            description=GITHUB_LIST_REPOS_TOOL_SPEC["description"],
+            parameters=GITHUB_LIST_REPOS_TOOL_SPEC["parameters"],
+            handler=github_list_repos_handler,
         ),
         ToolSpec(
+            name=GITHUB_READ_FILE_TOOL_SPEC["name"],
+            description=GITHUB_READ_FILE_TOOL_SPEC["description"],
+            parameters=GITHUB_READ_FILE_TOOL_SPEC["parameters"],
+            handler=github_read_file_handler,
         ),
     ]

agent/tools/__init__.py CHANGED Viewed

@@ -3,24 +3,20 @@ Hugging Face tools for the agent
 """
 from agent.tools.github_find_examples import (
-    FIND_EXAMPLES_TOOL_SPEC,
-    FindExamplesTool,
-    find_examples_handler,
 )
 from agent.tools.github_list_repos import (
-    LIST_REPOS_TOOL_SPEC,
-    ListReposTool,
-    list_repos_handler,
 )
 from agent.tools.github_read_file import (
-    READ_FILE_TOOL_SPEC,
-    ReadFileTool,
-    read_file_handler,
 )
 from agent.tools.github_search_code import (
-    SEARCH_CODE_TOOL_SPEC,
-    SearchCodeTool,
-    search_code_handler,
 )
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
 from agent.tools.types import ToolResult
@@ -30,16 +26,12 @@ __all__ = [
     "HF_JOBS_TOOL_SPEC",
     "hf_jobs_handler",
     "HfJobsTool",
-    "FIND_EXAMPLES_TOOL_SPEC",
-    "find_examples_handler",
-    "FindExamplesTool",
-    "READ_FILE_TOOL_SPEC",
-    "read_file_handler",
-    "ReadFileTool",
-    "LIST_REPOS_TOOL_SPEC",
-    "list_repos_handler",
-    "ListReposTool",
-    "SEARCH_CODE_TOOL_SPEC",
-    "search_code_handler",
-    "SearchCodeTool",
 ]

 """
 from agent.tools.github_find_examples import (
+    GITHUB_FIND_EXAMPLES_TOOL_SPEC,
+    github_find_examples_handler,
 )
 from agent.tools.github_list_repos import (
+    GITHUB_LIST_REPOS_TOOL_SPEC,
+    github_list_repos_handler,
 )
 from agent.tools.github_read_file import (
+    GITHUB_READ_FILE_TOOL_SPEC,
+    github_read_file_handler,
 )
 from agent.tools.github_search_code import (
+    GITHUB_SEARCH_CODE_TOOL_SPEC,
+    github_search_code_handler,
 )
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
 from agent.tools.types import ToolResult
     "HF_JOBS_TOOL_SPEC",
     "hf_jobs_handler",
     "HfJobsTool",
+    "GITHUB_FIND_EXAMPLES_TOOL_SPEC",
+    "github_find_examples_handler",
+    "GITHUB_LIST_REPOS_TOOL_SPEC",
+    "github_list_repos_handler",
+    "GITHUB_READ_FILE_TOOL_SPEC",
+    "github_read_file_handler",
+    "GITHUB_SEARCH_CODE_TOOL_SPEC",
+    "github_search_code_handler",
 ]

agent/tools/github_find_examples.py CHANGED Viewed

@@ -1,115 +1,23 @@
 """
-GitHub Find Examples Tool
-Finds examples, guides, and tutorials for a library using deterministic queries and heuristics.
 """
-import asyncio
 import math
 import os
-from dataclasses import asdict, dataclass
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional
-try:
-    import requests
-except ImportError:
-    raise ImportError(
-        "requests library is required. Install with: pip install requests"
-    )
 from agent.tools.types import ToolResult
-@dataclass
-class Example:
-    """An example file with metadata and relevance score."""
-    repo: str
-    path: str
-    ref: str
-    url: str
-    score: float
-    reason: str
-    repo_stars: int
-    repo_updated: str
-    file_size: int
-    def to_dict(self):
-        return asdict(self)
-class GitHubAPIError(Exception):
-    """Raised when GitHub API returns an error."""
-    pass
-# Path-based scoring weights
-PATH_SCORES = {
-    "README.md": 100,
-    "readme.md": 100,
-    "docs/": 80,
-    "doc/": 80,
-    "examples/": 90,
-    "example/": 90,
-    "notebooks/": 70,
-    "notebook/": 70,
-    "tutorials/": 85,
-    "tutorial/": 85,
-    "guides/": 85,
-    "guide/": 85,
-    "tests/": 40,
-    "test/": 40,
-    "demos/": 75,
-    "demo/": 75,
-    "samples/": 75,
-    "sample/": 75,
-}
-# Content-based scoring keywords
-CONTENT_KEYWORDS = {
-    'if __name__ == "__main__"': 50,
-    "if __name__ == '__main__'": 50,
-    "quickstart": 60,
-    "quick start": 60,
-    "getting started": 60,
-    "tutorial": 50,
-    "example usage": 55,
-    "usage example": 55,
-    "how to use": 45,
-    "basic example": 50,
-    "simple example": 50,
-}
-# File extension preferences
-PREFERRED_EXTENSIONS = {
-    ".py": 10,
-    ".ipynb": 15,
-    ".md": 20,
-    ".rst": 10,
-    ".js": 10,
-    ".ts": 10,
-    ".go": 10,
-    ".java": 10,
-    ".cpp": 10,
-    ".c": 10,
-}
-def _get_github_token() -> str:
-    """Get GitHub token from environment."""
-    token = os.environ.get("GITHUB_TOKEN")
-    if not token:
-        raise GitHubAPIError(
-            "GITHUB_TOKEN environment variable is required. "
-            "Set it with: export GITHUB_TOKEN=your_token_here"
-        )
-    return token
-def _execute_search(query: str, token: str, limit: int = 20) -> List[Dict[str, Any]]:
-    """Execute a GitHub code search query."""
     headers = {
         "Accept": "application/vnd.github.text-match+json",
         "X-GitHub-Api-Version": "2022-11-28",
@@ -123,15 +31,18 @@ def _execute_search(query: str, token: str, limit: int = 20) -> List[Dict[str, A
     try:
         while len(results) < limit:
             params = {"q": query, "per_page": per_page, "page": page}
-            url = "https://api.github.com/search/code"
-            response = requests.get(url, headers=headers, params=params, timeout=30)
             if response.status_code != 200:
                 break
             data = response.json()
             items = data.get("items", [])
             if not items:
                 break
@@ -149,7 +60,6 @@ def _execute_search(query: str, token: str, limit: int = 20) -> List[Dict[str, A
             if len(results) >= limit or len(items) < per_page:
                 break
             page += 1
     except Exception:
@@ -159,7 +69,7 @@ def _execute_search(query: str, token: str, limit: int = 20) -> List[Dict[str, A
 def _fetch_repo_metadata(repos: List[str], token: str) -> Dict[str, Dict[str, Any]]:
-    """Fetch metadata for repositories."""
     headers = {
         "Accept": "application/vnd.github+json",
         "X-GitHub-Api-Version": "2022-11-28",
@@ -167,18 +77,16 @@ def _fetch_repo_metadata(repos: List[str], token: str) -> Dict[str, Dict[str, An
     }
     metadata = {}
     for repo in repos:
         try:
-            url = f"https://api.github.com/repos/{repo}"
-            response = requests.get(url, headers=headers, timeout=10)
             if response.status_code == 200:
                 data = response.json()
                 metadata[repo] = {
                     "stars": data.get("stargazers_count", 0),
                     "updated_at": data.get("updated_at", ""),
-                    "description": data.get("description", ""),
                 }
         except Exception:
             continue
@@ -186,157 +94,89 @@ def _fetch_repo_metadata(repos: List[str], token: str) -> Dict[str, Dict[str, An
     return metadata
-def _score_and_rank(
-    results: List[Dict[str, Any]], library: str, token: str
-) -> List[Example]:
-    """Score results based on heuristics and rank them."""
-    repos = list(set(r["repo"] for r in results))
-    repo_metadata = _fetch_repo_metadata(repos, token)
-    scored_examples = []
-    for result in results:
-        repo = result["repo"]
-        path = result["path"]
-        score = 0.0
-        reasons = []
-        # Path-based scoring
-        path_lower = path.lower()
-        for pattern, points in PATH_SCORES.items():
-            if pattern.lower() in path_lower:
-                score += points
-                reasons.append(f"in {pattern}")
-                break
-        # File extension scoring
-        for ext, points in PREFERRED_EXTENSIONS.items():
-            if path_lower.endswith(ext):
-                score += points
-                break
-        # Content-based scoring
-        text_content = ""
-        for match in result.get("text_matches", []):
-            text_content += match.get("fragment", "").lower() + " "
-        for keyword, points in CONTENT_KEYWORDS.items():
-            if keyword.lower() in text_content:
-                score += points
-                reasons.append(f"contains '{keyword}'")
-        # Repo-based scoring
-        metadata = repo_metadata.get(repo, {})
-        stars = metadata.get("stars", 0)
-        updated = metadata.get("updated_at", "")
-        if stars > 0:
-            star_score = math.log10(stars + 1) * 10
-            score += star_score
-        # Recency bonus
-        if updated:
-            try:
-                updated_date = datetime.fromisoformat(updated.replace("Z", "+00:00"))
-                if datetime.now(updated_date.tzinfo) - updated_date < timedelta(
-                    days=180
-                ):
-                    score += 20
-                    reasons.append("recently updated")
-            except Exception:
-                pass
-        # Filename quality
-        filename = path.split("/")[-1].lower()
-        if any(
-            word in filename
-            for word in ["example", "tutorial", "guide", "quickstart", "demo"]
-        ):
-            score += 30
-            reasons.append("descriptive filename")
-        # Size penalty
-        if result["size"] > 100000:
-            score *= 0.5
-            reasons.append("large file")
-        example = Example(
-            repo=repo,
-            path=path,
-            ref=result["sha"],
-            url=result["url"],
-            score=score,
-            reason=", ".join(reasons) if reasons else "matches library",
-            repo_stars=stars,
-            repo_updated=updated,
-            file_size=result["size"],
-        )
-        scored_examples.append(example)
-    scored_examples.sort(key=lambda x: x.score, reverse=True)
-    return scored_examples
-def _search_by_path(
-    library: str, org: str, repo_scope: Optional[str], token: str
-) -> List[Dict[str, Any]]:
-    """Search for library in example/tutorial/docs directories."""
-    results = []
-    path_patterns = [
-        "examples/",
-        "example/",
-        "docs/",
-        "tutorials/",
-        "notebooks/",
-        "guides/",
-    ]
-    for path in path_patterns:
-        query_parts = [f"org:{org}", f"{library}", f"path:{path}"]
-        if repo_scope:
-            query_parts[0] = f"repo:{org}/{repo_scope}"
-        query = " ".join(query_parts)
-        results.extend(_execute_search(query, token, limit=20))
-    return results
-def _search_by_content(
-    library: str, org: str, repo_scope: Optional[str], token: str
-) -> List[Dict[str, Any]]:
-    """Search for library with specific content patterns."""
-    results = []
-    content_patterns = [
-        f"{library} if __name__",
-        f"{library} quickstart",
-        f"{library} tutorial",
-        f"{library} usage example",
-    ]
-    for pattern in content_patterns:
-        query_parts = [f"org:{org}", pattern]
-        if repo_scope:
-            query_parts[0] = f"repo:{org}/{repo_scope}"
-        query = " ".join(query_parts)
-        results.extend(_execute_search(query, token, limit=15))
-    return results
-def _search_readmes(
-    library: str, org: str, repo_scope: Optional[str], token: str
-) -> List[Dict[str, Any]]:
-    """Search for library mentions in README files."""
-    query_parts = [f"org:{org}", f"{library}", "filename:README"]
-    if repo_scope:
-        query_parts[0] = f"repo:{org}/{repo_scope}"
-    query = " ".join(query_parts)
-    return _execute_search(query, token, limit=20)
 def find_examples(
@@ -344,30 +184,45 @@ def find_examples(
     org: str = "huggingface",
     repo_scope: Optional[str] = None,
     max_results: int = 10,
-) -> List[Example]:
     """
-    Find examples, guides, and tutorials for a library using deterministic queries.
-    Uses a playbook of smart searches and heuristics to find canonical examples:
-    - Prefers README.md, docs/**, examples/**, notebooks/**, tests/**
-    - Prefers files with if __name__ == "__main__", "quickstart", "tutorial"
-    - Prefers repos with higher stars and more recent updates
     Args:
-        library: Library name to search for (e.g., "transformers", "torch")
-        org: GitHub organization to search in (default: "huggingface")
-        repo_scope: Optional specific repository (e.g., "transformers")
-        max_results: Maximum number of results to return (default: 10)
     Returns:
-        List of Example objects, ranked by relevance score
     """
-    token = _get_github_token()
     all_results = []
-    all_results.extend(_search_by_path(library, org, repo_scope, token))
-    all_results.extend(_search_by_content(library, org, repo_scope, token))
-    all_results.extend(_search_readmes(library, org, repo_scope, token))
     # Deduplicate
     seen = set()
@@ -378,135 +233,89 @@ def find_examples(
             seen.add(key)
             unique_results.append(result)
-    scored_examples = _score_and_rank(unique_results, library, token)
-    return scored_examples[:max_results]
-async def _async_call(func, *args, **kwargs):
-    """Wrap synchronous calls for async context."""
-    return await asyncio.to_thread(func, *args, **kwargs)
-def _format_examples_table(examples: List[Example]) -> str:
-    """Format examples as a markdown table."""
-    if not examples:
-        return "No examples found."
-    lines = [
-        "| Rank | File | Score | Stars | Reason |",
-        "|------|------|-------|-------|--------|",
-    ]
-    for i, ex in enumerate(examples, 1):
-        file_path = f"{ex.repo}/{ex.path}"
-        if len(file_path) > 60:
-            file_path = file_path[:57] + "..."
-        reason = ex.reason if len(ex.reason) < 40 else ex.reason[:37] + "..."
-        lines.append(
-            f"| {i} | {file_path} | {ex.score:.1f} | {ex.repo_stars:,} | {reason} |"
-        )
-    return "\n".join(lines)
-class FindExamplesTool:
-    """Tool for finding examples and tutorials for libraries."""
-    async def execute(self, params: Dict[str, Any]) -> ToolResult:
-        """Execute find_examples operation."""
-        library = params.get("library")
-        if not library:
-            return {
-                "formatted": "Error: 'library' parameter is required",
-                "totalResults": 0,
-                "resultsShared": 0,
-                "isError": True,
             }
-        org = params.get("org", "huggingface")
-        repo_scope = params.get("repo_scope")
-        max_results = params.get("max_results", 10)
-        try:
-            examples = await _async_call(
-                find_examples,
-                library=library,
-                org=org,
-                repo_scope=repo_scope,
-                max_results=max_results,
-            )
-            if not examples:
-                return {
-                    "formatted": f"No examples found for '{library}' in {org}",
-                    "totalResults": 0,
-                    "resultsShared": 0,
-                }
-            table = _format_examples_table(examples)
-            response = f"**Found {len(examples)} examples for '{library}' in {org}:**\n\n{table}"
-            # Add URLs and suggest using read_file
-            response += "\n\n**Top examples (use read_file to view):**\n"
-            for i, ex in enumerate(examples[:3], 1):
-                response += f"{i}. [{ex.repo}/{ex.path}]({ex.url})\n"
-                response += f"   Use: read_file(repo='{ex.repo}', path='{ex.path}')\n"
-            return {
-                "formatted": response,
-                "totalResults": len(examples),
-                "resultsShared": len(examples),
-            }
-        except GitHubAPIError as e:
-            return {
-                "formatted": f"GitHub API Error: {str(e)}",
-                "totalResults": 0,
-                "resultsShared": 0,
-                "isError": True,
-            }
-        except Exception as e:
-            return {
-                "formatted": f"Error: {str(e)}",
-                "totalResults": 0,
-                "resultsShared": 0,
-                "isError": True,
-            }
 # Tool specification
-FIND_EXAMPLES_TOOL_SPEC = {
     "name": "find_examples",
     "description": (
-        "Find examples, guides, and tutorials for a library using deterministic queries and heuristics.\n\n"
-        "Uses best practices retrieval without semantic search:\n"
-        "- Prefers README.md, docs/**, examples/**, notebooks/**, tests/**\n"
-        "- Prefers files with if __name__ == '__main__', 'quickstart', 'tutorial', 'usage'\n"
-        "- Prefers repos with higher stars and more recent updates\n\n"
-        "Returns a ranked list of canonical example files.\n\n"
-        "Examples:\n"
-        "- Find transformers examples: {'library': 'transformers', 'org': 'huggingface', 'max_results': 5}\n"
-        "- Find torch examples in specific repo: {'library': 'torch', 'org': 'pytorch', 'repo_scope': 'examples'}\n\n"
-        "Use read_file tool to view the content of returned files.\n\n"
     ),
     "parameters": {
         "type": "object",
         "properties": {
             "library": {
                 "type": "string",
-                "description": "Library name to search for (e.g., 'transformers', 'torch', 'react')",
             },
             "org": {
                 "type": "string",
-                "description": "GitHub organization to search in (default: 'huggingface')",
             },
             "repo_scope": {
                 "type": "string",
-                "description": "Optional specific repository to search within",
             },
             "max_results": {
                 "type": "integer",
-                "description": "Maximum number of results to return (default: 10)",
             },
         },
         "required": ["library"],
@@ -514,11 +323,15 @@ FIND_EXAMPLES_TOOL_SPEC = {
 }
-async def find_examples_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
-    """Handler for agent tool router."""
     try:
-        tool = FindExamplesTool()
-        result = await tool.execute(arguments)
         return result["formatted"], not result.get("isError", False)
     except Exception as e:
-        return f"Error executing find_examples: {str(e)}", False

 """
+GitHub Find Examples Tool - Discover examples, tutorials, and guides for any library
+Uses intelligent heuristics to find the best learning resources on GitHub.
 """
 import math
 import os
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional
+import requests
 from agent.tools.types import ToolResult
+def _search_github_code(
+    query: str, token: str, limit: int = 20
+) -> List[Dict[str, Any]]:
+    """Execute a GitHub code search query"""
     headers = {
         "Accept": "application/vnd.github.text-match+json",
         "X-GitHub-Api-Version": "2022-11-28",
     try:
         while len(results) < limit:
             params = {"q": query, "per_page": per_page, "page": page}
+            response = requests.get(
+                "https://api.github.com/search/code",
+                headers=headers,
+                params=params,
+                timeout=30,
+            )
             if response.status_code != 200:
                 break
             data = response.json()
             items = data.get("items", [])
             if not items:
                 break
             if len(results) >= limit or len(items) < per_page:
                 break
             page += 1
     except Exception:
 def _fetch_repo_metadata(repos: List[str], token: str) -> Dict[str, Dict[str, Any]]:
+    """Fetch star count and update date for repositories"""
     headers = {
         "Accept": "application/vnd.github+json",
         "X-GitHub-Api-Version": "2022-11-28",
     }
     metadata = {}
     for repo in repos:
         try:
+            response = requests.get(
+                f"https://api.github.com/repos/{repo}", headers=headers, timeout=10
+            )
             if response.status_code == 200:
                 data = response.json()
                 metadata[repo] = {
                     "stars": data.get("stargazers_count", 0),
                     "updated_at": data.get("updated_at", ""),
                 }
         except Exception:
             continue
     return metadata
+def _score_example(
+    result: Dict[str, Any], metadata: Dict[str, Dict[str, Any]]
+) -> tuple[float, str]:
+    """Score an example based on multiple heuristics"""
+    path = result["path"].lower()
+    repo = result["repo"]
+    score = 0.0
+    reasons = []
+    # Path-based scoring
+    if "readme.md" in path:
+        score += 100
+        reasons.append("README file")
+    elif "examples/" in path or "example/" in path:
+        score += 90
+        reasons.append("in examples/")
+    elif "tutorials/" in path or "tutorial/" in path:
+        score += 85
+        reasons.append("in tutorials/")
+    elif "docs/" in path or "doc/" in path:
+        score += 80
+        reasons.append("in docs/")
+    elif "notebooks/" in path or "notebook/" in path:
+        score += 70
+        reasons.append("in notebooks/")
+    # Extension scoring
+    if path.endswith(".ipynb"):
+        score += 15
+    elif path.endswith(".md"):
+        score += 20
+    elif path.endswith(".py"):
+        score += 10
+    # Content keywords from text matches
+    text_content = ""
+    for match in result.get("text_matches", []):
+        text_content += match.get("fragment", "").lower() + " "
+    if 'if __name__ == "__main__"' in text_content:
+        score += 50
+        reasons.append("runnable example")
+    if "quickstart" in text_content or "getting started" in text_content:
+        score += 60
+        reasons.append("quickstart guide")
+    if "tutorial" in text_content:
+        score += 50
+        reasons.append("tutorial content")
+    # Repository metadata scoring
+    repo_meta = metadata.get(repo, {})
+    stars = repo_meta.get("stars", 0)
+    updated_at = repo_meta.get("updated_at", "")
+    # Star-based score (logarithmic)
+    if stars > 0:
+        score += math.log10(stars + 1) * 10
+    # Recency bonus (updated in last 6 months)
+    if updated_at:
+        try:
+            updated_date = datetime.fromisoformat(updated_at.replace("Z", "+00:00"))
+            if datetime.now(updated_date.tzinfo) - updated_date < timedelta(days=180):
+                score += 20
+                reasons.append("recently updated")
+        except Exception:
+            pass
+    # Filename quality
+    filename = path.split("/")[-1].lower()
+    if any(
+        word in filename
+        for word in ["example", "tutorial", "guide", "quickstart", "demo"]
+    ):
+        score += 30
+        reasons.append("descriptive filename")
+    # Size penalty for very large files
+    if result["size"] > 100000:
+        score *= 0.5
+        reasons.append("large file")
+    return score, ", ".join(reasons) if reasons else "matches library"
 def find_examples(
     org: str = "huggingface",
     repo_scope: Optional[str] = None,
     max_results: int = 10,
+) -> ToolResult:
     """
+    Find examples, tutorials, and guides for a library using intelligent search.
     Args:
+        library: Library name (e.g., "transformers", "torch", "react")
+        org: GitHub organization to search in
+        repo_scope: Optional specific repository name
+        max_results: Maximum number of results (default 10)
     Returns:
+        ToolResult with ranked examples
     """
+    token = os.environ.get("GITHUB_TOKEN")
+    if not token:
+        return {
+            "formatted": "Error: GITHUB_TOKEN environment variable is required",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    # Build search queries
     all_results = []
+    # Query 1: Search in example directories
+    for path_pattern in ["examples/", "docs/", "tutorials/", "notebooks/"]:
+        query_parts = [f"org:{org}", library, f"path:{path_pattern}"]
+        if repo_scope:
+            query_parts[0] = f"repo:{org}/{repo_scope}"
+        query = " ".join(query_parts)
+        all_results.extend(_search_github_code(query, token, limit=20))
+    # Query 2: Search README files
+    query_parts = [f"org:{org}", library, "filename:README"]
+    if repo_scope:
+        query_parts[0] = f"repo:{org}/{repo_scope}"
+    query = " ".join(query_parts)
+    all_results.extend(_search_github_code(query, token, limit=20))
     # Deduplicate
     seen = set()
             seen.add(key)
             unique_results.append(result)
+    if not unique_results:
+        return {
+            "formatted": f"No examples found for '{library}' in {org}",
+            "totalResults": 0,
+            "resultsShared": 0,
+        }
+    # Fetch repo metadata
+    repos = list(set(r["repo"] for r in unique_results))
+    metadata = _fetch_repo_metadata(repos, token)
+    # Score and rank
+    scored = []
+    for result in unique_results:
+        score, reason = _score_example(result, metadata)
+        repo_meta = metadata.get(result["repo"], {})
+        scored.append(
+            {
+                "repo": result["repo"],
+                "path": result["path"],
+                "url": result["url"],
+                "score": score,
+                "reason": reason,
+                "stars": repo_meta.get("stars", 0),
             }
+        )
+    scored.sort(key=lambda x: x["score"], reverse=True)
+    top_results = scored[:max_results]
+    # Format output
+    lines = [f"**Found {len(top_results)} examples for '{library}' in {org}:**\n"]
+    for i, ex in enumerate(top_results, 1):
+        lines.append(f"{i}. **{ex['repo']}/{ex['path']}**")
+        lines.append(f"   Score: {ex['score']:.1f} | ⭐ {ex['stars']:,} stars")
+        lines.append(f"   Reason: {ex['reason']}")
+        lines.append(f"   URL: {ex['url']}\n")
+    return {
+        "formatted": "\n".join(lines),
+        "totalResults": len(top_results),
+        "resultsShared": len(top_results),
+    }
 # Tool specification
+GITHUB_FIND_EXAMPLES_TOOL_SPEC = {
     "name": "find_examples",
     "description": (
+        "Find examples, tutorials, and guides for any library on GitHub using intelligent heuristic-based search.\n\n"
+        "Uses multiple search strategies and ranks results by:\n"
+        "- Path quality (examples/, docs/, tutorials/ directories)\n"
+        "- Content keywords (quickstart, tutorial, runnable code)\n"
+        "- Repository popularity (stars, recent updates)\n"
+        "- File characteristics (size, extension, descriptive names)\n\n"
+        "## Examples:\n\n"
+        "**Find transformers examples in Hugging Face:**\n"
+        "{'library': 'transformers', 'org': 'huggingface', 'max_results': 5}\n\n"
+        "**Find PyTorch examples in specific repo:**\n"
+        "{'library': 'torch', 'org': 'pytorch', 'repo_scope': 'examples', 'max_results': 10}\n\n"
+        "**Find React quickstart guides:**\n"
+        "{'library': 'react quickstart', 'org': 'facebook', 'max_results': 3}\n\n"
+        "Returns ranked list with file paths, scores, star counts, and direct URLs."
     ),
     "parameters": {
         "type": "object",
         "properties": {
             "library": {
                 "type": "string",
+                "description": "Library name to search for (e.g., 'transformers', 'torch', 'react'). Required.",
             },
             "org": {
                 "type": "string",
+                "description": "GitHub organization to search in. Default: 'huggingface'.",
             },
             "repo_scope": {
                 "type": "string",
+                "description": "Optional specific repository name within the org (e.g., 'transformers').",
             },
             "max_results": {
                 "type": "integer",
+                "description": "Maximum number of results to return. Default: 10.",
             },
         },
         "required": ["library"],
 }
+async def github_find_examples_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router"""
     try:
+        result = find_examples(
+            library=arguments["library"],
+            org=arguments.get("org", "huggingface"),
+            repo_scope=arguments.get("repo_scope"),
+            max_results=arguments.get("max_results", 10),
+        )
         return result["formatted"], not result.get("isError", False)
     except Exception as e:
+        return f"Error finding examples: {str(e)}", False

agent/tools/github_list_repos.py CHANGED Viewed

@@ -1,70 +1,49 @@
 """
-GitHub List Repos Tool
-Lists repositories for a user or organization with sorting options.
 """
-import asyncio
 import os
-from dataclasses import asdict, dataclass
-from typing import Any, Dict, List, Literal, Optional
-try:
-    import requests
-except ImportError:
-    raise ImportError(
-        "requests library is required. Install with: pip install requests"
-    )
 from agent.tools.types import ToolResult
-@dataclass
-class Repository:
-    """Repository information."""
-    id: int
-    name: str
-    full_name: str
-    description: Optional[str]
-    html_url: str
-    language: Optional[str]
-    stars: int
-    forks: int
-    open_issues: int
-    private: bool
-    fork: bool
-    archived: bool
-    default_branch: str
-    created_at: Optional[str] = None
-    updated_at: Optional[str] = None
-    topics: Optional[List[str]] = None
-    def to_dict(self):
-        return asdict(self)
-class GitHubAPIError(Exception):
-    """Raised when GitHub API returns an error."""
-    pass
-def _get_github_token() -> str:
-    """Get GitHub token from environment."""
     token = os.environ.get("GITHUB_TOKEN")
     if not token:
-        raise GitHubAPIError(
-            "GITHUB_TOKEN environment variable is required. "
-            "Set it with: export GITHUB_TOKEN=your_token_here"
-        )
-    return token
-def _fetch_repositories(
-    query: str, sort: str, order: str, limit: Optional[int], token: str
-) -> List[Repository]:
-    """Fetch repositories from GitHub Search API."""
     headers = {
         "Accept": "application/vnd.github+json",
         "X-GitHub-Api-Version": "2022-11-28",
@@ -75,22 +54,46 @@ def _fetch_repositories(
     page = 1
     per_page = min(100, limit) if limit else 100
-    while True:
-        params = {
-            "q": query,
-            "sort": sort,
-            "order": order,
-            "page": page,
-            "per_page": per_page,
-        }
-        url = "https://api.github.com/search/repositories"
-        try:
-            response = requests.get(url, headers=headers, params=params, timeout=30)
             if response.status_code != 200:
-                break
             data = response.json()
             items = data.get("items", [])
@@ -99,214 +102,122 @@ def _fetch_repositories(
                 break
             for item in items:
-                repo = Repository(
-                    id=item.get("id"),
-                    name=item.get("name"),
-                    full_name=item.get("full_name"),
-                    description=item.get("description"),
-                    html_url=item.get("html_url"),
-                    language=item.get("language"),
-                    stars=item.get("stargazers_count", 0),
-                    forks=item.get("forks_count", 0),
-                    open_issues=item.get("open_issues_count", 0),
-                    private=item.get("private", False),
-                    fork=item.get("fork", False),
-                    archived=item.get("archived", False),
-                    default_branch=item.get("default_branch", "main"),
-                    created_at=item.get("created_at"),
-                    updated_at=item.get("updated_at"),
-                    topics=item.get("topics", []),
                 )
-                all_repos.append(repo)
             if limit and len(all_repos) >= limit:
                 all_repos = all_repos[:limit]
                 break
             total_count = data.get("total_count", 0)
-            if len(all_repos) >= total_count:
-                break
-            if page * per_page >= 1000:
                 break
             page += 1
-        except requests.exceptions.RequestException:
-            break
-    return all_repos
-def list_repos(
-    owner: str,
-    owner_type: Literal["user", "org"] = "org",
-    sort: Literal["stars", "forks", "updated", "created"] = "stars",
-    order: Literal["asc", "desc"] = "desc",
-    limit: Optional[int] = None,
-) -> List[Repository]:
-    """
-    List repositories for a user or organization using GitHub Search API.
-    Backed by https://api.github.com/search/repositories?q=org:huggingface&sort=stars&order=desc
-    or can use GraphQL + client-side sort.
-    Args:
-        owner: GitHub username or organization name
-        owner_type: Whether the owner is a "user" or "org" (default: "org")
-        sort: Sort field - "stars", "forks", "updated", or "created" (default: "stars")
-        order: Sort order - "asc" or "desc" (default: "desc")
-        limit: Maximum number of repositories to return (default: no limit)
-    Returns:
-        List of Repository objects
-    """
-    token = _get_github_token()
-    if owner_type == "org":
-        query = f"org:{owner}"
-    else:
-        query = f"user:{owner}"
-    repos = _fetch_repositories(
-        query=query, sort=sort, order=order, limit=limit, token=token
-    )
-    return repos
-async def _async_call(func, *args, **kwargs):
-    """Wrap synchronous calls for async context."""
-    return await asyncio.to_thread(func, *args, **kwargs)
-def _format_repos_table(repos: List[Repository]) -> str:
-    """Format repositories as a markdown table."""
-    if not repos:
-        return "No repositories found."
-    lines = [
-        "| Repo | Stars | Forks | Language | Description |",
-        "|------|-------|-------|----------|-------------|",
-    ]
-    for repo in repos:
-        desc = repo.description or "N/A"
-        if len(desc) > 50:
-            desc = desc[:47] + "..."
-        lang = repo.language or "N/A"
         lines.append(
-            f"| {repo.full_name} | {repo.stars:,} | {repo.forks:,} | {lang} | {desc} |"
         )
-    return "\n".join(lines)
-class ListReposTool:
-    """Tool for listing GitHub repositories."""
-    async def execute(self, params: Dict[str, Any]) -> ToolResult:
-        """Execute list_repos operation."""
-        owner = params.get("owner")
-        if not owner:
-            return {
-                "formatted": "Error: 'owner' parameter is required",
-                "totalResults": 0,
-                "resultsShared": 0,
-                "isError": True,
-            }
-        owner_type = params.get("owner_type", "org")
-        sort = params.get("sort", "stars")
-        order = params.get("order", "desc")
-        limit = params.get("limit")
-        try:
-            repos = await _async_call(
-                list_repos,
-                owner=owner,
-                owner_type=owner_type,
-                sort=sort,
-                order=order,
-                limit=limit,
             )
-            if not repos:
-                return {
-                    "formatted": f"No repositories found for {owner}",
-                    "totalResults": 0,
-                    "resultsShared": 0,
-                }
-            table = _format_repos_table(repos)
-            response = f"**Found {len(repos)} repositories for {owner} (sorted by {sort}, {order}):**\n\n{table}"
-            # Add links to top repos
-            response += "\n\n**Top repositories:**\n"
-            for i, repo in enumerate(repos[:5], 1):
-                response += (
-                    f"{i}. [{repo.full_name}]({repo.html_url}) - ⭐ {repo.stars:,}\n"
-                )
-            return {
-                "formatted": response,
-                "totalResults": len(repos),
-                "resultsShared": len(repos),
-            }
-        except GitHubAPIError as e:
-            return {
-                "formatted": f"GitHub API Error: {str(e)}",
-                "totalResults": 0,
-                "resultsShared": 0,
-                "isError": True,
-            }
-        except Exception as e:
-            return {
-                "formatted": f"Error: {str(e)}",
-                "totalResults": 0,
-                "resultsShared": 0,
-                "isError": True,
-            }
 # Tool specification
-LIST_REPOS_TOOL_SPEC = {
     "name": "list_repos",
     "description": (
-        "List repositories for a user or organization with sorting options.\n\n"
-        "Backed by GitHub Search API: https://api.github.com/search/repositories?q=org:huggingface&sort=stars&order=desc\n\n"
-        "Examples:\n"
-        "- Top 10 starred repos: {'owner': 'huggingface', 'sort': 'stars', 'limit': 10}\n"
-        "- Recently updated: {'owner': 'microsoft', 'sort': 'updated', 'order': 'desc', 'limit': 5}\n"
-        "- User repos: {'owner': 'torvalds', 'owner_type': 'user', 'sort': 'stars'}\n"
-        "- All repos: {'owner': 'pytorch', 'sort': 'forks'}\n\n"
     ),
     "parameters": {
         "type": "object",
         "properties": {
             "owner": {
                 "type": "string",
-                "description": "GitHub username or organization name (e.g., 'huggingface', 'torvalds')",
             },
             "owner_type": {
                 "type": "string",
                 "enum": ["user", "org"],
-                "description": "Whether the owner is a 'user' or 'org' (default: 'org')",
             },
             "sort": {
                 "type": "string",
                 "enum": ["stars", "forks", "updated", "created"],
-                "description": "Sort field: 'stars', 'forks', 'updated', or 'created' (default: 'stars')",
             },
             "order": {
                 "type": "string",
                 "enum": ["asc", "desc"],
-                "description": "Sort order: 'asc' or 'desc' (default: 'desc')",
             },
             "limit": {
                 "type": "integer",
-                "description": "Maximum number of repositories to return (default: no limit, returns all)",
             },
         },
         "required": ["owner"],
@@ -314,11 +225,16 @@ LIST_REPOS_TOOL_SPEC = {
 }
-async def list_repos_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
-    """Handler for agent tool router."""
     try:
-        tool = ListReposTool()
-        result = await tool.execute(arguments)
         return result["formatted"], not result.get("isError", False)
     except Exception as e:
-        return f"Error executing list_repos: {str(e)}", False

 """
+GitHub List Repositories Tool - List and sort repositories for any user or organization
+Efficiently discover repositories with flexible sorting options.
 """
 import os
+from typing import Any, Dict, Literal, Optional
+import requests
 from agent.tools.types import ToolResult
+def list_repos(
+    owner: str,
+    owner_type: Literal["user", "org"] = "org",
+    sort: Literal["stars", "forks", "updated", "created"] = "stars",
+    order: Literal["asc", "desc"] = "desc",
+    limit: Optional[int] = None,
+) -> ToolResult:
+    """
+    List repositories for a user or organization using GitHub Search API.
+    Args:
+        owner: GitHub username or organization name
+        owner_type: Whether the owner is a "user" or "org" (default: "org")
+        sort: Sort field - "stars", "forks", "updated", or "created"
+        order: Sort order - "asc" or "desc" (default: "desc")
+        limit: Maximum number of repositories to return
+    Returns:
+        ToolResult with repository information
+    """
     token = os.environ.get("GITHUB_TOKEN")
     if not token:
+        return {
+            "formatted": "Error: GITHUB_TOKEN environment variable is required",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    # Build search query
+    query = f"org:{owner}" if owner_type == "org" else f"user:{owner}"
     headers = {
         "Accept": "application/vnd.github+json",
         "X-GitHub-Api-Version": "2022-11-28",
     page = 1
     per_page = min(100, limit) if limit else 100
+    try:
+        while True:
+            params = {
+                "q": query,
+                "sort": sort,
+                "order": order,
+                "page": page,
+                "per_page": per_page,
+            }
+            response = requests.get(
+                "https://api.github.com/search/repositories",
+                headers=headers,
+                params=params,
+                timeout=30,
+            )
+            if response.status_code == 403:
+                error_data = response.json()
+                return {
+                    "formatted": f"GitHub API rate limit or permission error: {error_data.get('message', 'Unknown error')}",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
             if response.status_code != 200:
+                error_msg = f"GitHub API error (status {response.status_code})"
+                try:
+                    error_data = response.json()
+                    if "message" in error_data:
+                        error_msg += f": {error_data['message']}"
+                except Exception:
+                    pass
+                return {
+                    "formatted": error_msg,
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
             data = response.json()
             items = data.get("items", [])
                 break
             for item in items:
+                all_repos.append(
+                    {
+                        "name": item.get("name"),
+                        "full_name": item.get("full_name"),
+                        "description": item.get("description"),
+                        "html_url": item.get("html_url"),
+                        "language": item.get("language"),
+                        "stars": item.get("stargazers_count", 0),
+                        "forks": item.get("forks_count", 0),
+                        "open_issues": item.get("open_issues_count", 0),
+                        "topics": item.get("topics", []),
+                        "updated_at": item.get("updated_at"),
+                    }
                 )
+            # Check limits
             if limit and len(all_repos) >= limit:
                 all_repos = all_repos[:limit]
                 break
             total_count = data.get("total_count", 0)
+            if len(all_repos) >= total_count or page * per_page >= 1000:
                 break
             page += 1
+    except requests.exceptions.RequestException as e:
+        return {
+            "formatted": f"Failed to connect to GitHub API: {str(e)}",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    if not all_repos:
+        return {
+            "formatted": f"No repositories found for {owner_type} '{owner}'",
+            "totalResults": 0,
+            "resultsShared": 0,
+        }
+    # Format output
+    lines = [f"**Found {len(all_repos)} repositories for {owner}:**\n"]
+    for i, repo in enumerate(all_repos, 1):
+        lines.append(f"{i}. **{repo['full_name']}**")
         lines.append(
+            f"   ⭐ {repo['stars']:,} stars | 🍴 {repo['forks']:,} forks | Language: {repo['language'] or 'N/A'}"
         )
+        if repo["description"]:
+            desc = (
+                repo["description"][:100] + "..."
+                if len(repo["description"]) > 100
+                else repo["description"]
             )
+            lines.append(f"   {desc}")
+        lines.append(f"   URL: {repo['html_url']}")
+        if repo["topics"]:
+            lines.append(f"   Topics: {', '.join(repo['topics'][:5])}")
+        lines.append("")
+    return {
+        "formatted": "\n".join(lines),
+        "totalResults": len(all_repos),
+        "resultsShared": len(all_repos),
+    }
 # Tool specification
+GITHUB_LIST_REPOS_TOOL_SPEC = {
     "name": "list_repos",
     "description": (
+        "List and sort repositories for any GitHub user or organization.\n\n"
+        "Uses GitHub Search API for efficient sorting by stars, forks, update date, or creation date.\n"
+        "Returns comprehensive repository information including:\n"
+        "- Stars, forks, and open issues count\n"
+        "- Primary programming language\n"
+        "- Repository topics/tags\n"
+        "- Last update timestamp\n"
+        "- Direct URLs\n\n"
+        "## Examples:\n\n"
+        "**List top 10 starred Hugging Face repos:**\n"
+        "{'owner': 'huggingface', 'owner_type': 'org', 'sort': 'stars', 'limit': 10}\n\n"
+        "**List recently updated Microsoft repos:**\n"
+        "{'owner': 'microsoft', 'sort': 'updated', 'order': 'desc', 'limit': 5}\n\n"
+        "**List all repos for a user:**\n"
+        "{'owner': 'torvalds', 'owner_type': 'user', 'sort': 'stars'}\n\n"
+        "**Find most forked Google repos:**\n"
+        "{'owner': 'google', 'sort': 'forks', 'order': 'desc', 'limit': 20}\n\n"
+        "Perfect for discovering popular projects, finding active repositories, or exploring an organization's work."
     ),
     "parameters": {
         "type": "object",
         "properties": {
             "owner": {
                 "type": "string",
+                "description": "GitHub username or organization name. Required.",
             },
             "owner_type": {
                 "type": "string",
                 "enum": ["user", "org"],
+                "description": "Whether the owner is a 'user' or 'org'. Default: 'org'.",
             },
             "sort": {
                 "type": "string",
                 "enum": ["stars", "forks", "updated", "created"],
+                "description": "Sort field. Options: 'stars', 'forks', 'updated', 'created'. Default: 'stars'.",
             },
             "order": {
                 "type": "string",
                 "enum": ["asc", "desc"],
+                "description": "Sort order. Options: 'asc', 'desc'. Default: 'desc'.",
             },
             "limit": {
                 "type": "integer",
+                "description": "Maximum number of repositories to return. No limit if not specified.",
             },
         },
         "required": ["owner"],
 }
+async def github_list_repos_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router"""
     try:
+        result = list_repos(
+            owner=arguments["owner"],
+            owner_type=arguments.get("owner_type", "org"),
+            sort=arguments.get("sort", "stars"),
+            order=arguments.get("order", "desc"),
+            limit=arguments.get("limit"),
+        )
         return result["formatted"], not result.get("isError", False)
     except Exception as e:
+        return f"Error listing repositories: {str(e)}", False

agent/tools/github_read_file.py CHANGED Viewed

@@ -1,135 +1,67 @@
 """
-GitHub Read File Tool
-Reads file contents from a GitHub repository with line range support.
 """
-import asyncio
 import base64
 import os
-from dataclasses import asdict, dataclass
-from typing import Any, Dict, Optional, Tuple
-try:
-    import requests
-except ImportError:
-    raise ImportError(
-        "requests library is required. Install with: pip install requests"
-    )
 from agent.tools.types import ToolResult
-@dataclass
-class FileContents:
-    """File contents with metadata."""
-    content: str
-    sha: str
-    path: str
-    size: int
-    last_modified: Optional[str]
-    last_commit_sha: Optional[str]
-    line_start: int
-    line_end: int
-    total_lines: int
-    truncated: bool
-    message: Optional[str] = None
-    def to_dict(self):
-        return asdict(self)
-class GitHubAPIError(Exception):
-    """Raised when GitHub API returns an error."""
-    pass
-def _get_github_token() -> str:
-    """Get GitHub token from environment."""
     token = os.environ.get("GITHUB_TOKEN")
     if not token:
-        raise GitHubAPIError(
-            "GITHUB_TOKEN environment variable is required. "
-            "Set it with: export GITHUB_TOKEN=your_token_here"
-        )
-    return token
-def _fetch_raw_content(owner: str, repo: str, path: str, ref: str, token: str) -> str:
-    """Fetch raw file content for large files."""
-    headers = {
-        "Accept": "application/vnd.github.raw",
-        "X-GitHub-Api-Version": "2022-11-28",
-        "Authorization": f"Bearer {token}",
-    }
-    url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
-    params = {"ref": ref}
-    response = requests.get(url, headers=headers, params=params, timeout=30)
-    if response.status_code != 200:
-        raise GitHubAPIError(
-            f"Failed to fetch raw content: HTTP {response.status_code}"
-        )
-    return response.text
-def _get_last_commit_info(
-    owner: str, repo: str, path: str, ref: Optional[str], token: str
-) -> Tuple[Optional[str], Optional[str]]:
-    """Get last commit information for a specific file."""
-    headers = {
-        "Accept": "application/vnd.github+json",
-        "X-GitHub-Api-Version": "2022-11-28",
-        "Authorization": f"Bearer {token}",
-    }
-    url = f"https://api.github.com/repos/{owner}/{repo}/commits"
-    params = {"path": path, "per_page": 1}
-    if ref and ref != "HEAD":
-        params["sha"] = ref
-    try:
-        response = requests.get(url, headers=headers, params=params, timeout=30)
-        if response.status_code == 200:
-            commits = response.json()
-            if commits:
-                commit = commits[0]
-                commit_sha = commit.get("sha")
-                commit_date = commit.get("commit", {}).get("committer", {}).get("date")
-                return commit_date, commit_sha
-    except Exception:
-        pass
-    return None, None
-def _fetch_file_contents(
-    owner: str,
-    repo: str,
-    path: str,
-    ref: str,
-    line_start: Optional[int],
-    line_end: Optional[int],
-    token: str,
-) -> FileContents:
-    """Fetch file contents from GitHub API."""
     headers = {
         "Accept": "application/vnd.github+json",
         "X-GitHub-Api-Version": "2022-11-28",
         "Authorization": f"Bearer {token}",
     }
-    url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
     params = {}
     if ref and ref != "HEAD":
         params["ref"] = ref
@@ -137,9 +69,12 @@ def _fetch_file_contents(
         response = requests.get(url, headers=headers, params=params, timeout=30)
         if response.status_code == 404:
-            raise GitHubAPIError(
-                f"File not found: {path} in {owner}/{repo} (ref: {ref})"
-            )
         if response.status_code != 200:
             error_msg = f"GitHub API error (status {response.status_code})"
@@ -149,17 +84,23 @@ def _fetch_file_contents(
                     error_msg += f": {error_data['message']}"
             except Exception:
                 pass
-            raise GitHubAPIError(error_msg)
         data = response.json()
         if data.get("type") != "file":
-            raise GitHubAPIError(
-                f"Path {path} is not a file (type: {data.get('type')})"
-            )
-        file_sha = data.get("sha")
-        file_size = data.get("size", 0)
         # Decode content
         content_b64 = data.get("content", "")
@@ -167,214 +108,142 @@ def _fetch_file_contents(
             content_b64 = content_b64.replace("\n", "").replace(" ", "")
             content = base64.b64decode(content_b64).decode("utf-8", errors="replace")
         else:
-            content = _fetch_raw_content(owner, repo, path, ref or "HEAD", token)
-    except requests.exceptions.RequestException as e:
-        raise GitHubAPIError(f"Failed to connect to GitHub API: {e}")
-    # Get last commit info
-    last_modified, last_commit_sha = _get_last_commit_info(
-        owner, repo, path, ref, token
-    )
-    # Process line ranges
-    lines = content.split("\n")
-    total_lines = len(lines)
-    truncated = False
-    message = None
-    if line_start is None and line_end is None:
-        if total_lines > 300:
-            line_start = 1
-            line_end = 300
-            truncated = True
-            message = (
-                f"File has {total_lines} lines. Returned only the first 300 lines. "
-                f"To view more, use the line_start and line_end parameters."
-            )
-        else:
-            line_start = 1
-            line_end = total_lines
-    else:
-        if line_start is None:
-            line_start = 1
-        if line_end is None:
-            line_end = total_lines
-        if line_start < 1:
-            line_start = 1
-        if line_end > total_lines:
-            line_end = total_lines
-        if line_start > line_end:
-            raise ValueError(
-                f"line_start ({line_start}) cannot be greater than line_end ({line_end})"
-            )
-    selected_lines = lines[line_start - 1 : line_end]
-    selected_content = "\n".join(selected_lines)
-    return FileContents(
-        content=selected_content,
-        sha=file_sha,
-        path=path,
-        size=file_size,
-        last_modified=last_modified,
-        last_commit_sha=last_commit_sha,
-        line_start=line_start,
-        line_end=line_end,
-        total_lines=total_lines,
-        truncated=truncated,
-        message=message,
-    )
-def read_file(
-    repo: str,
-    path: str,
-    ref: str = "HEAD",
-    line_start: Optional[int] = None,
-    line_end: Optional[int] = None,
-) -> FileContents:
-    """
-    Read file contents from a GitHub repository.
-    Returns raw file text plus metadata (commit SHA, last modified).
-    If file is more than 300 lines and no line range is specified,
-    returns only the first 300 lines with a message.
-    Args:
-        repo: Repository in format "owner/repo" (e.g., "huggingface/transformers")
-        path: Path to file in repository (e.g., "README.md")
-        ref: Git reference - branch name, tag, or commit SHA (default: "HEAD")
-        line_start: Starting line number (1-indexed, inclusive)
-        line_end: Ending line number (1-indexed, inclusive)
-    Returns:
-        FileContents object with content and metadata
-    """
-    if "/" not in repo:
-        raise ValueError("repo must be in format 'owner/repo'")
-    owner, repo_name = repo.split("/", 1)
-    token = _get_github_token()
-    return _fetch_file_contents(
-        owner=owner,
-        repo=repo_name,
-        path=path,
-        ref=ref,
-        line_start=line_start,
-        line_end=line_end,
-        token=token,
-    )
-async def _async_call(func, *args, **kwargs):
-    """Wrap synchronous calls for async context."""
-    return await asyncio.to_thread(func, *args, **kwargs)
-class ReadFileTool:
-    """Tool for reading files from GitHub repositories."""
-    async def execute(self, params: Dict[str, Any]) -> ToolResult:
-        """Execute read_file operation."""
-        repo = params.get("repo")
-        path = params.get("path")
-        if not repo or not path:
-            return {
-                "formatted": "Error: 'repo' and 'path' parameters are required",
-                "totalResults": 0,
-                "resultsShared": 0,
-                "isError": True,
             }
-        ref = params.get("ref", "HEAD")
-        line_start = params.get("line_start")
-        line_end = params.get("line_end")
-        try:
-            file_contents = await _async_call(
-                read_file,
-                repo=repo,
-                path=path,
-                ref=ref,
-                line_start=line_start,
-                line_end=line_end,
             )
-            response = f"**File: {file_contents.path}**\n"
-            response += f"**Repo: {repo}**\n"
-            response += f"**Lines:** {file_contents.line_start}-{file_contents.line_end} of {file_contents.total_lines}\n"
-            response += f"**SHA:** {file_contents.sha}\n"
-            if file_contents.last_modified:
-                response += f"**Last modified:** {file_contents.last_modified}\n"
-            if file_contents.message:
-                response += f"\n⚠️ {file_contents.message}\n"
-            response += f"\n```\n{file_contents.content}\n```"
-            return {
-                "formatted": response,
-                "totalResults": 1,
-                "resultsShared": 1,
-            }
-        except GitHubAPIError as e:
-            return {
-                "formatted": f"GitHub API Error: {str(e)}",
-                "totalResults": 0,
-                "resultsShared": 0,
-                "isError": True,
-            }
-        except Exception as e:
-            return {
-                "formatted": f"Error: {str(e)}",
-                "totalResults": 0,
-                "resultsShared": 0,
-                "isError": True,
-            }
 # Tool specification
-READ_FILE_TOOL_SPEC = {
     "name": "read_file",
     "description": (
-        "Read file contents from a GitHub repository.\n\n"
-        "Returns raw file text plus metadata (commit SHA, last modified).\n"
-        "If file is more than 300 lines, returns only the first 300 lines and includes line_start and line_end indexes.\n"
-        "Use line_start and line_end parameters to view specific line ranges.\n\n"
-        "Examples:\n"
-        "- Read README: {'repo': 'huggingface/transformers', 'path': 'README.md'}\n"
-        "- Read specific lines: {'repo': 'huggingface/transformers', 'path': 'src/transformers/__init__.py', 'line_start': 1, 'line_end': 50}\n"
-        "- Read from branch: {'repo': 'torvalds/linux', 'path': 'MAINTAINERS', 'ref': 'master', 'line_start': 1, 'line_end': 20}\n\n"
     ),
     "parameters": {
         "type": "object",
         "properties": {
             "repo": {
                 "type": "string",
-                "description": "Repository in format 'owner/repo' (e.g., 'huggingface/transformers')",
             },
             "path": {
                 "type": "string",
-                "description": "Path to file in repository (e.g., 'README.md', 'src/main.py')",
             },
             "ref": {
                 "type": "string",
-                "description": "Git reference: branch name, tag, or commit SHA (default: 'HEAD')",
             },
             "line_start": {
                 "type": "integer",
-                "description": "Starting line number (1-indexed, inclusive). Use to read specific range.",
             },
             "line_end": {
                 "type": "integer",
-                "description": "Ending line number (1-indexed, inclusive). Use to read specific range.",
             },
         },
         "required": ["repo", "path"],
@@ -382,11 +251,16 @@ READ_FILE_TOOL_SPEC = {
 }
-async def read_file_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
-    """Handler for agent tool router."""
     try:
-        tool = ReadFileTool()
-        result = await tool.execute(arguments)
         return result["formatted"], not result.get("isError", False)
     except Exception as e:
-        return f"Error executing read_file: {str(e)}", False

 """
+GitHub Read File Tool - Read file contents from any GitHub repository with line range support
+Fetch exact file contents with metadata, supporting line ranges for efficient reading.
 """
 import base64
 import os
+from typing import Any, Dict, Optional
+import requests
 from agent.tools.types import ToolResult
+def read_file(
+    repo: str,
+    path: str,
+    ref: str = "HEAD",
+    line_start: Optional[int] = None,
+    line_end: Optional[int] = None,
+) -> ToolResult:
+    """
+    Read file contents from a GitHub repository with line range support.
+    Args:
+        repo: Repository in format "owner/repo" (e.g., "github/github-mcp-server")
+        path: Path to file in repository (e.g., "pkg/github/search.go")
+        ref: Git reference - branch name, tag, or commit SHA (default: "HEAD")
+        line_start: Starting line number (1-indexed, inclusive)
+        line_end: Ending line number (1-indexed, inclusive)
+    Returns:
+        ToolResult with file contents and metadata
+    """
     token = os.environ.get("GITHUB_TOKEN")
     if not token:
+        return {
+            "formatted": "Error: GITHUB_TOKEN environment variable is required",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    # Parse repo
+    if "/" not in repo:
+        return {
+            "formatted": "Error: repo must be in format 'owner/repo'",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    owner, repo_name = repo.split("/", 1)
     headers = {
         "Accept": "application/vnd.github+json",
         "X-GitHub-Api-Version": "2022-11-28",
         "Authorization": f"Bearer {token}",
     }
+    # Fetch file contents
+    url = f"https://api.github.com/repos/{owner}/{repo_name}/contents/{path}"
     params = {}
     if ref and ref != "HEAD":
         params["ref"] = ref
         response = requests.get(url, headers=headers, params=params, timeout=30)
         if response.status_code == 404:
+            return {
+                "formatted": f"File not found: {path} in {repo} (ref: {ref})",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
         if response.status_code != 200:
             error_msg = f"GitHub API error (status {response.status_code})"
                     error_msg += f": {error_data['message']}"
             except Exception:
                 pass
+            return {
+                "formatted": error_msg,
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
         data = response.json()
+        # Check if it's a file
         if data.get("type") != "file":
+            return {
+                "formatted": f"Path {path} is not a file (type: {data.get('type')})",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
         # Decode content
         content_b64 = data.get("content", "")
             content_b64 = content_b64.replace("\n", "").replace(" ", "")
             content = base64.b64decode(content_b64).decode("utf-8", errors="replace")
         else:
+            # For large files, fetch raw content
+            raw_headers = {
+                "Accept": "application/vnd.github.raw",
+                "X-GitHub-Api-Version": "2022-11-28",
+                "Authorization": f"Bearer {token}",
             }
+            raw_response = requests.get(
+                url, headers=raw_headers, params=params, timeout=30
             )
+            if raw_response.status_code != 200:
+                return {
+                    "formatted": "Failed to fetch file content",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+            content = raw_response.text
+        # Get metadata
+        file_sha = data.get("sha")
+        file_size = data.get("size", 0)
+        # Process line ranges
+        lines = content.split("\n")
+        total_lines = len(lines)
+        truncated = False
+        message = None
+        if line_start is None and line_end is None:
+            # No range specified
+            if total_lines > 300:
+                line_start = 1
+                line_end = 300
+                truncated = True
+                message = f"File has {total_lines} lines. Showing first 300 lines. Use line_start and line_end to view more."
+            else:
+                line_start = 1
+                line_end = total_lines
+        else:
+            # Range specified
+            if line_start is None:
+                line_start = 1
+            if line_end is None:
+                line_end = total_lines
+            # Validate range
+            line_start = max(1, line_start)
+            line_end = min(total_lines, line_end)
+            if line_start > line_end:
+                return {
+                    "formatted": f"Invalid range: line_start ({line_start}) > line_end ({line_end})",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        # Extract lines
+        selected_lines = lines[line_start - 1 : line_end]
+        selected_content = "\n".join(selected_lines)
+        # Format output
+        lines_output = [f"**File: {repo}/{path}**"]
+        lines_output.append(f"SHA: {file_sha}")
+        lines_output.append(f"Size: {file_size:,} bytes")
+        lines_output.append(
+            f"Lines: {line_start}-{line_end} of {total_lines} total lines"
+        )
+        if ref and ref != "HEAD":
+            lines_output.append(f"Ref: {ref}")
+        if truncated and message:
+            lines_output.append(f"⚠️  {message}")
+        lines_output.append("\n**Content:**")
+        lines_output.append("```")
+        lines_output.append(selected_content)
+        lines_output.append("```")
+        return {
+            "formatted": "\n".join(lines_output),
+            "totalResults": 1,
+            "resultsShared": 1,
+        }
+    except requests.exceptions.RequestException as e:
+        return {
+            "formatted": f"Failed to connect to GitHub API: {str(e)}",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
 # Tool specification
+GITHUB_READ_FILE_TOOL_SPEC = {
     "name": "read_file",
     "description": (
+        "Read file contents from any GitHub repository with precise line range control.\n\n"
+        "Features:\n"
+        "- Read entire files or specific line ranges\n"
+        "- Auto-truncates large files to 300 lines (with warning)\n"
+        "- Works with any branch, tag, or commit SHA\n"
+        "- Returns file metadata (SHA, size, line count)\n"
+        "- Handles both small and large files efficiently\n\n"
+        "## Examples:\n\n"
+        "**Read entire README:**\n"
+        "{'repo': 'facebook/react', 'path': 'README.md'}\n\n"
+        "**Read specific line range:**\n"
+        "{'repo': 'torvalds/linux', 'path': 'kernel/sched/core.c', 'line_start': 100, 'line_end': 150}\n\n"
+        "**Read from specific branch:**\n"
+        "{'repo': 'python/cpython', 'path': 'Lib/ast.py', 'ref': 'main', 'line_start': 1, 'line_end': 50}\n\n"
+        "**Read from specific commit:**\n"
+        "{'repo': 'github/github-mcp-server', 'path': 'pkg/github/search.go', 'ref': 'abc123def'}\n\n"
+        "Perfect for examining code, reading documentation, or investigating specific implementations."
     ),
     "parameters": {
         "type": "object",
         "properties": {
             "repo": {
                 "type": "string",
+                "description": "Repository in format 'owner/repo' (e.g., 'github/github-mcp-server'). Required.",
             },
             "path": {
                 "type": "string",
+                "description": "Path to file in repository (e.g., 'src/index.js'). Required.",
             },
             "ref": {
                 "type": "string",
+                "description": "Git reference - branch name, tag, or commit SHA. Default: 'HEAD'.",
             },
             "line_start": {
                 "type": "integer",
+                "description": "Starting line number (1-indexed, inclusive). Optional.",
             },
             "line_end": {
                 "type": "integer",
+                "description": "Ending line number (1-indexed, inclusive). Optional.",
             },
         },
         "required": ["repo", "path"],
 }
+async def github_read_file_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router"""
     try:
+        result = read_file(
+            repo=arguments["repo"],
+            path=arguments["path"],
+            ref=arguments.get("ref", "HEAD"),
+            line_start=arguments.get("line_start"),
+            line_end=arguments.get("line_end"),
+        )
         return result["formatted"], not result.get("isError", False)
     except Exception as e:
+        return f"Error reading file: {str(e)}", False

agent/tools/github_search_code.py CHANGED Viewed

@@ -1,123 +1,138 @@
 """
-GitHub Search Code Tool
-Searches code across GitHub with glob filtering and line-level results.
 """
-import asyncio
 import fnmatch
 import os
 import re
-from dataclasses import asdict, dataclass
-from typing import Any, Dict, List, Optional, Tuple
-try:
-    import requests
-except ImportError:
-    raise ImportError(
-        "requests library is required. Install with: pip install requests"
-    )
 from agent.tools.types import ToolResult
-@dataclass
-class CodeMatch:
-    """A code match with location information."""
-    repo: str
-    path: str
-    ref: str
-    line_start: int
-    line_end: int
-    snippet: str
-    def to_dict(self):
-        return asdict(self)
-class GitHubAPIError(Exception):
-    """Raised when GitHub API returns an error."""
-    pass
-def _get_github_token() -> str:
-    """Get GitHub token from environment."""
     token = os.environ.get("GITHUB_TOKEN")
     if not token:
-        raise GitHubAPIError(
-            "GITHUB_TOKEN environment variable is required. "
-            "Set it with: export GITHUB_TOKEN=your_token_here"
-        )
-    return token
-def _build_github_query(
-    query: str, repo_glob: Optional[str], path_glob: Optional[str], regex: bool
-) -> str:
-    """Build GitHub search query string from parameters."""
-    parts = []
     if regex:
-        parts.append(f"/{query}/")
     else:
-        if " " in query:
-            parts.append(f'"{query}"')
-        else:
-            parts.append(query)
     if repo_glob:
         if "/" in repo_glob:
-            parts.append(f"repo:{repo_glob}")
         else:
-            parts.append(f"user:{repo_glob}")
     if path_glob:
         if "*" not in path_glob and "?" not in path_glob:
-            parts.append(f"path:{path_glob}")
         elif path_glob.startswith("*."):
             ext = path_glob[2:]
-            parts.append(f"extension:{ext}")
         elif "/" not in path_glob and "*" in path_glob:
-            parts.append(f"filename:{path_glob}")
         else:
-            if "." in path_glob:
-                ext_match = re.search(r"\*\.(\w+)", path_glob)
-                if ext_match:
-                    parts.append(f"extension:{ext_match.group(1)}")
-    return " ".join(parts)
-def _fetch_code_search_results(
-    query: str, token: str, max_results: int
-) -> List[Dict[str, Any]]:
-    """Fetch code search results from GitHub API."""
     headers = {
         "Accept": "application/vnd.github.text-match+json",
         "X-GitHub-Api-Version": "2022-11-28",
         "Authorization": f"Bearer {token}",
     }
-    all_items = []
     page = 1
     per_page = min(100, max_results)
-    while len(all_items) < max_results:
-        params = {
-            "q": query,
-            "page": page,
-            "per_page": per_page,
-        }
-        url = "https://api.github.com/search/code"
-        try:
-            response = requests.get(url, headers=headers, params=params, timeout=30)
             if response.status_code != 200:
-                break
             data = response.json()
             items = data.get("items", [])
@@ -125,245 +140,145 @@ def _fetch_code_search_results(
             if not items:
                 break
-            all_items.extend(items)
-            if len(all_items) >= data.get("total_count", 0):
                 break
             page += 1
-        except requests.exceptions.RequestException:
-            break
-    return all_items[:max_results]
-def _glob_match(text: str, pattern: str) -> bool:
-    """Check if text matches glob pattern, supporting ** for multi-level paths."""
-    if "**" in pattern:
-        regex_pattern = pattern.replace("**", "<<<DOUBLESTAR>>>")
-        regex_pattern = fnmatch.translate(regex_pattern)
-        regex_pattern = regex_pattern.replace("<<<DOUBLESTAR>>>", ".*")
-        return re.match(regex_pattern, text) is not None
-    else:
-        return fnmatch.fnmatch(text, pattern)
-def _estimate_line_numbers(fragment: str) -> Tuple[int, int]:
-    """Estimate line numbers from a code fragment."""
-    lines = fragment.split("\n")
-    line_count = len([line for line in lines if line.strip()])
-    return 1, line_count
-def _parse_results_to_matches(
-    raw_results: List[Dict[str, Any]],
-    repo_glob: Optional[str],
-    path_glob: Optional[str],
-) -> List[CodeMatch]:
-    """Parse raw GitHub API results into CodeMatch objects."""
-    matches = []
-    for item in raw_results:
-        repo_name = item.get("repository", {}).get("full_name", "unknown/unknown")
-        file_path = item.get("path", "")
-        sha = item.get("sha", "unknown")
-        if repo_glob and not _glob_match(repo_name, repo_glob):
-            continue
-        if path_glob and not _glob_match(file_path, path_glob):
-            continue
-        text_matches = item.get("text_matches", [])
-        if text_matches:
-            for text_match in text_matches:
-                fragment = text_match.get("fragment", "")
-                line_start, line_end = _estimate_line_numbers(fragment)
-                match = CodeMatch(
-                    repo=repo_name,
-                    path=file_path,
-                    ref=sha,
-                    line_start=line_start,
-                    line_end=line_end,
-                    snippet=fragment.strip(),
-                )
-                matches.append(match)
-        else:
-            match = CodeMatch(
-                repo=repo_name,
-                path=file_path,
-                ref=sha,
-                line_start=1,
-                line_end=1,
-                snippet="<match found, but snippet not available>",
-            )
-            matches.append(match)
-    return matches
-def search_code(
-    query: str,
-    repo_glob: Optional[str] = None,
-    path_glob: Optional[str] = None,
-    regex: bool = False,
-    max_results: int = 100,
-) -> List[CodeMatch]:
-    """
-    Search for code across GitHub with glob filtering and line-level results.
-    Returns: repo, path, ref, line_start, line_end, snippet
-    Args:
-        query: Search term or pattern to find in code
-        repo_glob: Glob pattern to filter repositories (e.g., "github/*", "facebook/react")
-        path_glob: Glob pattern to filter file paths (e.g., "*.py", "src/**/*.js")
-        regex: If True, treat query as a regular expression
-        max_results: Maximum number of results to return (default: 100)
-    Returns:
-        List of CodeMatch objects with repo, path, ref, line numbers, and snippet
-    """
-    github_query = _build_github_query(query, repo_glob, path_glob, regex)
-    token = _get_github_token()
-    raw_results = _fetch_code_search_results(github_query, token, max_results)
-    matches = _parse_results_to_matches(raw_results, repo_glob, path_glob)
-    return matches
-async def _async_call(func, *args, **kwargs):
-    """Wrap synchronous calls for async context."""
-    return await asyncio.to_thread(func, *args, **kwargs)
-def _format_code_matches(matches: List[CodeMatch]) -> str:
-    """Format code matches."""
-    if not matches:
-        return "No matches found."
-    lines = []
-    for i, match in enumerate(matches, 1):
-        lines.append(f"**{i}. {match.repo}/{match.path}:{match.line_start}**")
-        lines.append("```")
-        # Show first 5 lines of snippet
-        snippet_lines = match.snippet.split("\n")[:5]
-        lines.extend(snippet_lines)
-        if len(match.snippet.split("\n")) > 5:
-            lines.append("...")
-        lines.append("```")
-        lines.append("")
-    return "\n".join(lines)
-class SearchCodeTool:
-    """Tool for searching code across GitHub."""
-    async def execute(self, params: Dict[str, Any]) -> ToolResult:
-        """Execute search_code operation."""
-        query = params.get("query")
-        if not query:
-            return {
-                "formatted": "Error: 'query' parameter is required",
-                "totalResults": 0,
-                "resultsShared": 0,
-                "isError": True,
-            }
-        repo_glob = params.get("repo_glob")
-        path_glob = params.get("path_glob")
-        regex = params.get("regex", False)
-        max_results = params.get("max_results", 100)
-        try:
-            matches = await _async_call(
-                search_code,
-                query=query,
-                repo_glob=repo_glob,
-                path_glob=path_glob,
-                regex=regex,
-                max_results=max_results,
-            )
-            if not matches:
-                return {
-                    "formatted": "No matches found",
-                    "totalResults": 0,
-                    "resultsShared": 0,
-                }
-            formatted = _format_code_matches(matches)
-            response = f"**Found {len(matches)} code matches:**\n\n{formatted}"
-            # Add note about viewing full files
-            if matches:
-                response += "\n**To view full file, use:**\n"
-                top_match = matches[0]
-                response += (
-                    f"read_file(repo='{top_match.repo}', path='{top_match.path}')"
-                )
-            return {
-                "formatted": response,
-                "totalResults": len(matches),
-                "resultsShared": min(len(matches), 10),
-            }
-        except GitHubAPIError as e:
-            return {
-                "formatted": f"GitHub API Error: {str(e)}",
-                "totalResults": 0,
-                "resultsShared": 0,
-                "isError": True,
-            }
-        except Exception as e:
-            return {
-                "formatted": f"Error: {str(e)}",
-                "totalResults": 0,
-                "resultsShared": 0,
-                "isError": True,
-            }
 # Tool specification
-SEARCH_CODE_TOOL_SPEC = {
     "name": "search_code",
     "description": (
-        "Search code across GitHub with glob filtering and line-level results.\n\n"
-        "Returns: repo, path, ref, line_start, line_end, snippet\n\n"
-        "Examples:\n"
-        "- Search Python functions: {'query': 'def train', 'path_glob': '*.py', 'repo_glob': 'huggingface/*'}\n"
-        "- Search TODO comments: {'query': 'TODO', 'repo_glob': 'github/*', 'max_results': 10}\n"
-        "- Regex search: {'query': r'func Test\\w+', 'path_glob': '*.go', 'regex': True}\n"
-        "- Search in specific repo: {'query': 'HfApi', 'repo_glob': 'huggingface/huggingface_hub', 'path_glob': '*.py'}\n\n"
     ),
     "parameters": {
         "type": "object",
         "properties": {
             "query": {
                 "type": "string",
-                "description": "Search term or pattern to find in code",
             },
             "repo_glob": {
                 "type": "string",
-                "description": "Glob pattern to filter repositories (e.g., 'github/*', 'facebook/react')",
             },
             "path_glob": {
                 "type": "string",
-                "description": "Glob pattern to filter file paths (e.g., '*.py', 'src/**/*.js', 'test_*.py')",
             },
             "regex": {
                 "type": "boolean",
-                "description": "Treat query as regular expression (default: false)",
             },
             "max_results": {
                 "type": "integer",
-                "description": "Maximum number of results to return (default: 100)",
             },
         },
         "required": ["query"],
@@ -371,11 +286,16 @@ SEARCH_CODE_TOOL_SPEC = {
 }
-async def search_code_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
-    """Handler for agent tool router."""
     try:
-        tool = SearchCodeTool()
-        result = await tool.execute(arguments)
         return result["formatted"], not result.get("isError", False)
     except Exception as e:
-        return f"Error executing search_code: {str(e)}", False

 """
+GitHub Code Search Tool - Search code across GitHub with advanced filtering
+Find code patterns using regex and glob filters for repositories and file paths.
 """
 import fnmatch
 import os
 import re
+from typing import Any, Dict, Optional
+import requests
 from agent.tools.types import ToolResult
+def _glob_match(text: str, pattern: str) -> bool:
+    """Check if text matches glob pattern, supporting ** for multi-level paths"""
+    if "**" in pattern:
+        regex_pattern = pattern.replace("**", "<<<DOUBLESTAR>>>")
+        regex_pattern = fnmatch.translate(regex_pattern)
+        regex_pattern = regex_pattern.replace("<<<DOUBLESTAR>>>", ".*")
+        return re.match(regex_pattern, text) is not None
+    return fnmatch.fnmatch(text, pattern)
+def search_code(
+    query: str,
+    repo_glob: Optional[str] = None,
+    path_glob: Optional[str] = None,
+    regex: bool = False,
+    max_results: int = 20,
+) -> ToolResult:
+    """
+    Search for code across GitHub with glob filtering.
+    Args:
+        query: Search term or pattern to find in code
+        repo_glob: Glob pattern to filter repositories (e.g., "github/*", "*/react")
+        path_glob: Glob pattern to filter file paths (e.g., "*.py", "src/**/*.js")
+        regex: If True, treat query as regular expression
+        max_results: Maximum number of results to return (default 20)
+    Returns:
+        ToolResult with code matches and snippets
+    """
     token = os.environ.get("GITHUB_TOKEN")
     if not token:
+        return {
+            "formatted": "Error: GITHUB_TOKEN environment variable is required",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    # Build GitHub query
+    query_parts = []
     if regex:
+        query_parts.append(f"/{query}/")
     else:
+        query_parts.append(f'"{query}"' if " " in query else query)
+    # Add repo filter
     if repo_glob:
         if "/" in repo_glob:
+            query_parts.append(f"repo:{repo_glob}")
         else:
+            query_parts.append(f"user:{repo_glob}")
+    # Add path filter
     if path_glob:
         if "*" not in path_glob and "?" not in path_glob:
+            query_parts.append(f"path:{path_glob}")
         elif path_glob.startswith("*."):
             ext = path_glob[2:]
+            query_parts.append(f"extension:{ext}")
         elif "/" not in path_glob and "*" in path_glob:
+            query_parts.append(f"filename:{path_glob}")
         else:
+            # Complex pattern, extract extension if possible
+            ext_match = re.search(r"\*\.(\w+)", path_glob)
+            if ext_match:
+                query_parts.append(f"extension:{ext_match.group(1)}")
+    github_query = " ".join(query_parts)
     headers = {
         "Accept": "application/vnd.github.text-match+json",
         "X-GitHub-Api-Version": "2022-11-28",
         "Authorization": f"Bearer {token}",
     }
+    all_matches = []
     page = 1
     per_page = min(100, max_results)
+    try:
+        while len(all_matches) < max_results:
+            params = {
+                "q": github_query,
+                "page": page,
+                "per_page": per_page,
+            }
+            response = requests.get(
+                "https://api.github.com/search/code",
+                headers=headers,
+                params=params,
+                timeout=30,
+            )
+            if response.status_code == 403:
+                error_data = response.json()
+                return {
+                    "formatted": f"GitHub API rate limit or permission error: {error_data.get('message', 'Unknown error')}",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
             if response.status_code != 200:
+                error_msg = f"GitHub API error (status {response.status_code})"
+                try:
+                    error_data = response.json()
+                    if "message" in error_data:
+                        error_msg += f": {error_data['message']}"
+                except Exception:
+                    pass
+                return {
+                    "formatted": error_msg,
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
             data = response.json()
             items = data.get("items", [])
             if not items:
                 break
+            for item in items:
+                repo_name = item.get("repository", {}).get("full_name", "unknown")
+                file_path = item.get("path", "")
+                sha = item.get("sha", "")
+                # Apply client-side glob filtering
+                if repo_glob and not _glob_match(repo_name, repo_glob):
+                    continue
+                if path_glob and not _glob_match(file_path, path_glob):
+                    continue
+                # Extract text matches
+                text_matches = item.get("text_matches", [])
+                if text_matches:
+                    for text_match in text_matches:
+                        fragment = text_match.get("fragment", "")
+                        lines = fragment.split("\n")
+                        line_count = len([line for line in lines if line.strip()])
+                        all_matches.append(
+                            {
+                                "repo": repo_name,
+                                "path": file_path,
+                                "ref": sha,
+                                "line_start": 1,
+                                "line_end": line_count,
+                                "snippet": fragment.strip(),
+                                "url": item.get("html_url", ""),
+                            }
+                        )
+                else:
+                    all_matches.append(
+                        {
+                            "repo": repo_name,
+                            "path": file_path,
+                            "ref": sha,
+                            "line_start": 1,
+                            "line_end": 1,
+                            "snippet": "(snippet not available)",
+                            "url": item.get("html_url", ""),
+                        }
+                    )
+            if len(all_matches) >= data.get("total_count", 0):
                 break
             page += 1
+    except requests.exceptions.RequestException as e:
+        return {
+            "formatted": f"Failed to connect to GitHub API: {str(e)}",
+            "totalResults": 0,
+            "resultsShared": 0,
+            "isError": True,
+        }
+    results = all_matches[:max_results]
+    if not results:
+        return {
+            "formatted": f"No code matches found for query: {query}",
+            "totalResults": 0,
+            "resultsShared": 0,
+        }
+    # Format output
+    lines_output = [f"**Found {len(results)} code matches:**\n"]
+    for i, match in enumerate(results, 1):
+        lines_output.append(f"{i}. **{match['repo']}:{match['path']}**")
+        lines_output.append(
+            f"   Lines: {match['line_start']}-{match['line_end']} | Ref: {match['ref'][:7]}"
+        )
+        lines_output.append(f"   URL: {match['url']}")
+        # Show snippet (first 5 lines)
+        snippet_lines = match["snippet"].split("\n")[:5]
+        if snippet_lines:
+            lines_output.append("   ```")
+            for line in snippet_lines:
+                lines_output.append(f"   {line}")
+            if len(match["snippet"].split("\n")) > 5:
+                lines_output.append("   ...")
+            lines_output.append("   ```")
+        lines_output.append("")
+    return {
+        "formatted": "\n".join(lines_output),
+        "totalResults": len(results),
+        "resultsShared": len(results),
+    }
 # Tool specification
+GITHUB_SEARCH_CODE_TOOL_SPEC = {
     "name": "search_code",
     "description": (
+        "Search for code patterns across GitHub with advanced glob filtering.\n\n"
+        "Features:\n"
+        "- Text or regex search\n"
+        "- Repository glob patterns (e.g., 'github/*', '*/react')\n"
+        "- File path glob patterns (e.g., '*.py', 'src/**/*.js')\n"
+        "- Returns code snippets with line numbers\n"
+        "- Direct URLs to matches\n\n"
+        "## Examples:\n\n"
+        "**Search for Python function definitions:**\n"
+        "{'query': 'def search', 'path_glob': '*.py', 'max_results': 10}\n\n"
+        "**Search for TODO comments in specific org:**\n"
+        "{'query': 'TODO', 'repo_glob': 'github/*', 'max_results': 5}\n\n"
+        "**Regex search for test functions:**\n"
+        "{'query': r'func Test\\w+', 'path_glob': '*.go', 'regex': True}\n\n"
+        "**Search in specific repo with path filter:**\n"
+        "{'query': 'SearchCode', 'repo_glob': 'github/github-mcp-server', 'path_glob': '*.go'}\n\n"
+        "**Find imports in TypeScript files:**\n"
+        "{'query': 'import', 'path_glob': 'src/**/*.ts', 'repo_glob': 'facebook/*'}\n\n"
+        "Perfect for finding code patterns, learning from examples, or exploring implementations."
     ),
     "parameters": {
         "type": "object",
         "properties": {
             "query": {
                 "type": "string",
+                "description": "Search term or pattern to find in code. Required.",
             },
             "repo_glob": {
                 "type": "string",
+                "description": "Glob pattern to filter repositories (e.g., 'github/*', '*/react'). Optional.",
             },
             "path_glob": {
                 "type": "string",
+                "description": "Glob pattern to filter file paths (e.g., '*.py', 'src/**/*.js'). Optional.",
             },
             "regex": {
                 "type": "boolean",
+                "description": "If true, treat query as regular expression. Default: false.",
             },
             "max_results": {
                 "type": "integer",
+                "description": "Maximum number of results to return. Default: 20.",
             },
         },
         "required": ["query"],
 }
+async def github_search_code_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router"""
     try:
+        result = search_code(
+            query=arguments["query"],
+            repo_glob=arguments.get("repo_glob"),
+            path_glob=arguments.get("path_glob"),
+            regex=arguments.get("regex", False),
+            max_results=arguments.get("max_results", 20),
+        )
         return result["formatted"], not result.get("isError", False)
     except Exception as e:
+        return f"Error searching code: {str(e)}", False