akseljoonas HF Staff commited on
Commit
63a4db3
·
1 Parent(s): 53d0a89

github tools updated

Browse files
agent/core/tools.py CHANGED
@@ -20,12 +20,21 @@ from agent.tools.docs_tools import (
20
  hf_docs_fetch_handler,
21
  )
22
  from agent.tools.github_find_examples import (
23
- FIND_EXAMPLES_TOOL_SPEC,
24
- find_examples_handler,
 
 
 
 
 
 
 
 
 
 
 
 
25
  )
26
- from agent.tools.github_list_repos import LIST_REPOS_TOOL_SPEC, list_repos_handler
27
- from agent.tools.github_read_file import READ_FILE_TOOL_SPEC, read_file_handler
28
- from agent.tools.github_search_code import SEARCH_CODE_TOOL_SPEC, search_code_handler
29
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
30
  from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
31
  from agent.tools.private_hf_repo_tools import (
@@ -231,7 +240,7 @@ class ToolRouter:
231
  def create_builtin_tools() -> list[ToolSpec]:
232
  """Create built-in tool specifications"""
233
  print(
234
- f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}, {PRIVATE_HF_REPO_TOOL_SPEC['name']}, {UTILS_TOOL_SPEC['name']}, {FIND_EXAMPLES_TOOL_SPEC['name']}, {READ_FILE_TOOL_SPEC['name']}, {LIST_REPOS_TOOL_SPEC['name']}, {SEARCH_CODE_TOOL_SPEC['name']}"
235
  )
236
  # in order of importance
237
  return [
@@ -273,29 +282,29 @@ def create_builtin_tools() -> list[ToolSpec]:
273
  parameters=UTILS_TOOL_SPEC["parameters"],
274
  handler=utils_handler,
275
  ),
276
- # GitHub tools - 4 separate tools
277
  ToolSpec(
278
- name=FIND_EXAMPLES_TOOL_SPEC["name"],
279
- description=FIND_EXAMPLES_TOOL_SPEC["description"],
280
- parameters=FIND_EXAMPLES_TOOL_SPEC["parameters"],
281
- handler=find_examples_handler,
282
  ),
283
  ToolSpec(
284
- name=READ_FILE_TOOL_SPEC["name"],
285
- description=READ_FILE_TOOL_SPEC["description"],
286
- parameters=READ_FILE_TOOL_SPEC["parameters"],
287
- handler=read_file_handler,
288
  ),
289
  ToolSpec(
290
- name=LIST_REPOS_TOOL_SPEC["name"],
291
- description=LIST_REPOS_TOOL_SPEC["description"],
292
- parameters=LIST_REPOS_TOOL_SPEC["parameters"],
293
- handler=list_repos_handler,
294
  ),
295
  ToolSpec(
296
- name=SEARCH_CODE_TOOL_SPEC["name"],
297
- description=SEARCH_CODE_TOOL_SPEC["description"],
298
- parameters=SEARCH_CODE_TOOL_SPEC["parameters"],
299
- handler=search_code_handler,
300
  ),
301
  ]
 
20
  hf_docs_fetch_handler,
21
  )
22
  from agent.tools.github_find_examples import (
23
+ GITHUB_FIND_EXAMPLES_TOOL_SPEC,
24
+ github_find_examples_handler,
25
+ )
26
+ from agent.tools.github_list_repos import (
27
+ GITHUB_LIST_REPOS_TOOL_SPEC,
28
+ github_list_repos_handler,
29
+ )
30
+ from agent.tools.github_read_file import (
31
+ GITHUB_READ_FILE_TOOL_SPEC,
32
+ github_read_file_handler,
33
+ )
34
+ from agent.tools.github_search_code import (
35
+ GITHUB_SEARCH_CODE_TOOL_SPEC,
36
+ github_search_code_handler,
37
  )
 
 
 
38
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
39
  from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
40
  from agent.tools.private_hf_repo_tools import (
 
240
  def create_builtin_tools() -> list[ToolSpec]:
241
  """Create built-in tool specifications"""
242
  print(
243
+ f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}, {PRIVATE_HF_REPO_TOOL_SPEC['name']}, {UTILS_TOOL_SPEC['name']}, {GITHUB_SEARCH_CODE_TOOL_SPEC['name']}, {GITHUB_FIND_EXAMPLES_TOOL_SPEC['name']}, {GITHUB_LIST_REPOS_TOOL_SPEC['name']}, {GITHUB_READ_FILE_TOOL_SPEC['name']}"
244
  )
245
  # in order of importance
246
  return [
 
282
  parameters=UTILS_TOOL_SPEC["parameters"],
283
  handler=utils_handler,
284
  ),
285
+ # GitHub tools
286
  ToolSpec(
287
+ name=GITHUB_SEARCH_CODE_TOOL_SPEC["name"],
288
+ description=GITHUB_SEARCH_CODE_TOOL_SPEC["description"],
289
+ parameters=GITHUB_SEARCH_CODE_TOOL_SPEC["parameters"],
290
+ handler=github_search_code_handler,
291
  ),
292
  ToolSpec(
293
+ name=GITHUB_FIND_EXAMPLES_TOOL_SPEC["name"],
294
+ description=GITHUB_FIND_EXAMPLES_TOOL_SPEC["description"],
295
+ parameters=GITHUB_FIND_EXAMPLES_TOOL_SPEC["parameters"],
296
+ handler=github_find_examples_handler,
297
  ),
298
  ToolSpec(
299
+ name=GITHUB_LIST_REPOS_TOOL_SPEC["name"],
300
+ description=GITHUB_LIST_REPOS_TOOL_SPEC["description"],
301
+ parameters=GITHUB_LIST_REPOS_TOOL_SPEC["parameters"],
302
+ handler=github_list_repos_handler,
303
  ),
304
  ToolSpec(
305
+ name=GITHUB_READ_FILE_TOOL_SPEC["name"],
306
+ description=GITHUB_READ_FILE_TOOL_SPEC["description"],
307
+ parameters=GITHUB_READ_FILE_TOOL_SPEC["parameters"],
308
+ handler=github_read_file_handler,
309
  ),
310
  ]
agent/tools/__init__.py CHANGED
@@ -3,24 +3,20 @@ Hugging Face tools for the agent
3
  """
4
 
5
  from agent.tools.github_find_examples import (
6
- FIND_EXAMPLES_TOOL_SPEC,
7
- FindExamplesTool,
8
- find_examples_handler,
9
  )
10
  from agent.tools.github_list_repos import (
11
- LIST_REPOS_TOOL_SPEC,
12
- ListReposTool,
13
- list_repos_handler,
14
  )
15
  from agent.tools.github_read_file import (
16
- READ_FILE_TOOL_SPEC,
17
- ReadFileTool,
18
- read_file_handler,
19
  )
20
  from agent.tools.github_search_code import (
21
- SEARCH_CODE_TOOL_SPEC,
22
- SearchCodeTool,
23
- search_code_handler,
24
  )
25
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
26
  from agent.tools.types import ToolResult
@@ -30,16 +26,12 @@ __all__ = [
30
  "HF_JOBS_TOOL_SPEC",
31
  "hf_jobs_handler",
32
  "HfJobsTool",
33
- "FIND_EXAMPLES_TOOL_SPEC",
34
- "find_examples_handler",
35
- "FindExamplesTool",
36
- "READ_FILE_TOOL_SPEC",
37
- "read_file_handler",
38
- "ReadFileTool",
39
- "LIST_REPOS_TOOL_SPEC",
40
- "list_repos_handler",
41
- "ListReposTool",
42
- "SEARCH_CODE_TOOL_SPEC",
43
- "search_code_handler",
44
- "SearchCodeTool",
45
  ]
 
3
  """
4
 
5
  from agent.tools.github_find_examples import (
6
+ GITHUB_FIND_EXAMPLES_TOOL_SPEC,
7
+ github_find_examples_handler,
 
8
  )
9
  from agent.tools.github_list_repos import (
10
+ GITHUB_LIST_REPOS_TOOL_SPEC,
11
+ github_list_repos_handler,
 
12
  )
13
  from agent.tools.github_read_file import (
14
+ GITHUB_READ_FILE_TOOL_SPEC,
15
+ github_read_file_handler,
 
16
  )
17
  from agent.tools.github_search_code import (
18
+ GITHUB_SEARCH_CODE_TOOL_SPEC,
19
+ github_search_code_handler,
 
20
  )
21
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
22
  from agent.tools.types import ToolResult
 
26
  "HF_JOBS_TOOL_SPEC",
27
  "hf_jobs_handler",
28
  "HfJobsTool",
29
+ "GITHUB_FIND_EXAMPLES_TOOL_SPEC",
30
+ "github_find_examples_handler",
31
+ "GITHUB_LIST_REPOS_TOOL_SPEC",
32
+ "github_list_repos_handler",
33
+ "GITHUB_READ_FILE_TOOL_SPEC",
34
+ "github_read_file_handler",
35
+ "GITHUB_SEARCH_CODE_TOOL_SPEC",
36
+ "github_search_code_handler",
 
 
 
 
37
  ]
agent/tools/github_find_examples.py CHANGED
@@ -1,115 +1,23 @@
1
  """
2
- GitHub Find Examples Tool
3
 
4
- Finds examples, guides, and tutorials for a library using deterministic queries and heuristics.
5
  """
6
 
7
- import asyncio
8
  import math
9
  import os
10
- from dataclasses import asdict, dataclass
11
  from datetime import datetime, timedelta
12
  from typing import Any, Dict, List, Optional
13
 
14
- try:
15
- import requests
16
- except ImportError:
17
- raise ImportError(
18
- "requests library is required. Install with: pip install requests"
19
- )
20
 
21
  from agent.tools.types import ToolResult
22
 
23
 
24
- @dataclass
25
- class Example:
26
- """An example file with metadata and relevance score."""
27
-
28
- repo: str
29
- path: str
30
- ref: str
31
- url: str
32
- score: float
33
- reason: str
34
- repo_stars: int
35
- repo_updated: str
36
- file_size: int
37
-
38
- def to_dict(self):
39
- return asdict(self)
40
-
41
-
42
- class GitHubAPIError(Exception):
43
- """Raised when GitHub API returns an error."""
44
-
45
- pass
46
-
47
-
48
- # Path-based scoring weights
49
- PATH_SCORES = {
50
- "README.md": 100,
51
- "readme.md": 100,
52
- "docs/": 80,
53
- "doc/": 80,
54
- "examples/": 90,
55
- "example/": 90,
56
- "notebooks/": 70,
57
- "notebook/": 70,
58
- "tutorials/": 85,
59
- "tutorial/": 85,
60
- "guides/": 85,
61
- "guide/": 85,
62
- "tests/": 40,
63
- "test/": 40,
64
- "demos/": 75,
65
- "demo/": 75,
66
- "samples/": 75,
67
- "sample/": 75,
68
- }
69
-
70
- # Content-based scoring keywords
71
- CONTENT_KEYWORDS = {
72
- 'if __name__ == "__main__"': 50,
73
- "if __name__ == '__main__'": 50,
74
- "quickstart": 60,
75
- "quick start": 60,
76
- "getting started": 60,
77
- "tutorial": 50,
78
- "example usage": 55,
79
- "usage example": 55,
80
- "how to use": 45,
81
- "basic example": 50,
82
- "simple example": 50,
83
- }
84
-
85
- # File extension preferences
86
- PREFERRED_EXTENSIONS = {
87
- ".py": 10,
88
- ".ipynb": 15,
89
- ".md": 20,
90
- ".rst": 10,
91
- ".js": 10,
92
- ".ts": 10,
93
- ".go": 10,
94
- ".java": 10,
95
- ".cpp": 10,
96
- ".c": 10,
97
- }
98
-
99
-
100
- def _get_github_token() -> str:
101
- """Get GitHub token from environment."""
102
- token = os.environ.get("GITHUB_TOKEN")
103
- if not token:
104
- raise GitHubAPIError(
105
- "GITHUB_TOKEN environment variable is required. "
106
- "Set it with: export GITHUB_TOKEN=your_token_here"
107
- )
108
- return token
109
-
110
-
111
- def _execute_search(query: str, token: str, limit: int = 20) -> List[Dict[str, Any]]:
112
- """Execute a GitHub code search query."""
113
  headers = {
114
  "Accept": "application/vnd.github.text-match+json",
115
  "X-GitHub-Api-Version": "2022-11-28",
@@ -123,15 +31,18 @@ def _execute_search(query: str, token: str, limit: int = 20) -> List[Dict[str, A
123
  try:
124
  while len(results) < limit:
125
  params = {"q": query, "per_page": per_page, "page": page}
126
- url = "https://api.github.com/search/code"
127
- response = requests.get(url, headers=headers, params=params, timeout=30)
 
 
 
 
128
 
129
  if response.status_code != 200:
130
  break
131
 
132
  data = response.json()
133
  items = data.get("items", [])
134
-
135
  if not items:
136
  break
137
 
@@ -149,7 +60,6 @@ def _execute_search(query: str, token: str, limit: int = 20) -> List[Dict[str, A
149
 
150
  if len(results) >= limit or len(items) < per_page:
151
  break
152
-
153
  page += 1
154
 
155
  except Exception:
@@ -159,7 +69,7 @@ def _execute_search(query: str, token: str, limit: int = 20) -> List[Dict[str, A
159
 
160
 
161
  def _fetch_repo_metadata(repos: List[str], token: str) -> Dict[str, Dict[str, Any]]:
162
- """Fetch metadata for repositories."""
163
  headers = {
164
  "Accept": "application/vnd.github+json",
165
  "X-GitHub-Api-Version": "2022-11-28",
@@ -167,18 +77,16 @@ def _fetch_repo_metadata(repos: List[str], token: str) -> Dict[str, Dict[str, An
167
  }
168
 
169
  metadata = {}
170
-
171
  for repo in repos:
172
  try:
173
- url = f"https://api.github.com/repos/{repo}"
174
- response = requests.get(url, headers=headers, timeout=10)
175
-
176
  if response.status_code == 200:
177
  data = response.json()
178
  metadata[repo] = {
179
  "stars": data.get("stargazers_count", 0),
180
  "updated_at": data.get("updated_at", ""),
181
- "description": data.get("description", ""),
182
  }
183
  except Exception:
184
  continue
@@ -186,157 +94,89 @@ def _fetch_repo_metadata(repos: List[str], token: str) -> Dict[str, Dict[str, An
186
  return metadata
187
 
188
 
189
- def _score_and_rank(
190
- results: List[Dict[str, Any]], library: str, token: str
191
- ) -> List[Example]:
192
- """Score results based on heuristics and rank them."""
193
- repos = list(set(r["repo"] for r in results))
194
- repo_metadata = _fetch_repo_metadata(repos, token)
195
-
196
- scored_examples = []
197
-
198
- for result in results:
199
- repo = result["repo"]
200
- path = result["path"]
201
-
202
- score = 0.0
203
- reasons = []
204
-
205
- # Path-based scoring
206
- path_lower = path.lower()
207
- for pattern, points in PATH_SCORES.items():
208
- if pattern.lower() in path_lower:
209
- score += points
210
- reasons.append(f"in {pattern}")
211
- break
212
-
213
- # File extension scoring
214
- for ext, points in PREFERRED_EXTENSIONS.items():
215
- if path_lower.endswith(ext):
216
- score += points
217
- break
218
-
219
- # Content-based scoring
220
- text_content = ""
221
- for match in result.get("text_matches", []):
222
- text_content += match.get("fragment", "").lower() + " "
223
-
224
- for keyword, points in CONTENT_KEYWORDS.items():
225
- if keyword.lower() in text_content:
226
- score += points
227
- reasons.append(f"contains '{keyword}'")
228
-
229
- # Repo-based scoring
230
- metadata = repo_metadata.get(repo, {})
231
- stars = metadata.get("stars", 0)
232
- updated = metadata.get("updated_at", "")
233
-
234
- if stars > 0:
235
- star_score = math.log10(stars + 1) * 10
236
- score += star_score
237
-
238
- # Recency bonus
239
- if updated:
240
- try:
241
- updated_date = datetime.fromisoformat(updated.replace("Z", "+00:00"))
242
- if datetime.now(updated_date.tzinfo) - updated_date < timedelta(
243
- days=180
244
- ):
245
- score += 20
246
- reasons.append("recently updated")
247
- except Exception:
248
- pass
249
-
250
- # Filename quality
251
- filename = path.split("/")[-1].lower()
252
- if any(
253
- word in filename
254
- for word in ["example", "tutorial", "guide", "quickstart", "demo"]
255
- ):
256
- score += 30
257
- reasons.append("descriptive filename")
258
-
259
- # Size penalty
260
- if result["size"] > 100000:
261
- score *= 0.5
262
- reasons.append("large file")
263
-
264
- example = Example(
265
- repo=repo,
266
- path=path,
267
- ref=result["sha"],
268
- url=result["url"],
269
- score=score,
270
- reason=", ".join(reasons) if reasons else "matches library",
271
- repo_stars=stars,
272
- repo_updated=updated,
273
- file_size=result["size"],
274
- )
275
-
276
- scored_examples.append(example)
277
-
278
- scored_examples.sort(key=lambda x: x.score, reverse=True)
279
- return scored_examples
280
-
281
-
282
- def _search_by_path(
283
- library: str, org: str, repo_scope: Optional[str], token: str
284
- ) -> List[Dict[str, Any]]:
285
- """Search for library in example/tutorial/docs directories."""
286
- results = []
287
- path_patterns = [
288
- "examples/",
289
- "example/",
290
- "docs/",
291
- "tutorials/",
292
- "notebooks/",
293
- "guides/",
294
- ]
295
-
296
- for path in path_patterns:
297
- query_parts = [f"org:{org}", f"{library}", f"path:{path}"]
298
- if repo_scope:
299
- query_parts[0] = f"repo:{org}/{repo_scope}"
300
-
301
- query = " ".join(query_parts)
302
- results.extend(_execute_search(query, token, limit=20))
303
-
304
- return results
305
-
306
-
307
- def _search_by_content(
308
- library: str, org: str, repo_scope: Optional[str], token: str
309
- ) -> List[Dict[str, Any]]:
310
- """Search for library with specific content patterns."""
311
- results = []
312
- content_patterns = [
313
- f"{library} if __name__",
314
- f"{library} quickstart",
315
- f"{library} tutorial",
316
- f"{library} usage example",
317
- ]
318
-
319
- for pattern in content_patterns:
320
- query_parts = [f"org:{org}", pattern]
321
- if repo_scope:
322
- query_parts[0] = f"repo:{org}/{repo_scope}"
323
 
324
- query = " ".join(query_parts)
325
- results.extend(_execute_search(query, token, limit=15))
 
 
 
 
 
 
326
 
327
- return results
 
 
 
328
 
329
-
330
- def _search_readmes(
331
- library: str, org: str, repo_scope: Optional[str], token: str
332
- ) -> List[Dict[str, Any]]:
333
- """Search for library mentions in README files."""
334
- query_parts = [f"org:{org}", f"{library}", "filename:README"]
335
- if repo_scope:
336
- query_parts[0] = f"repo:{org}/{repo_scope}"
337
-
338
- query = " ".join(query_parts)
339
- return _execute_search(query, token, limit=20)
340
 
341
 
342
  def find_examples(
@@ -344,30 +184,45 @@ def find_examples(
344
  org: str = "huggingface",
345
  repo_scope: Optional[str] = None,
346
  max_results: int = 10,
347
- ) -> List[Example]:
348
  """
349
- Find examples, guides, and tutorials for a library using deterministic queries.
350
-
351
- Uses a playbook of smart searches and heuristics to find canonical examples:
352
- - Prefers README.md, docs/**, examples/**, notebooks/**, tests/**
353
- - Prefers files with if __name__ == "__main__", "quickstart", "tutorial"
354
- - Prefers repos with higher stars and more recent updates
355
 
356
  Args:
357
- library: Library name to search for (e.g., "transformers", "torch")
358
- org: GitHub organization to search in (default: "huggingface")
359
- repo_scope: Optional specific repository (e.g., "transformers")
360
- max_results: Maximum number of results to return (default: 10)
361
 
362
  Returns:
363
- List of Example objects, ranked by relevance score
364
  """
365
- token = _get_github_token()
366
-
 
 
 
 
 
 
 
 
367
  all_results = []
368
- all_results.extend(_search_by_path(library, org, repo_scope, token))
369
- all_results.extend(_search_by_content(library, org, repo_scope, token))
370
- all_results.extend(_search_readmes(library, org, repo_scope, token))
 
 
 
 
 
 
 
 
 
 
 
 
371
 
372
  # Deduplicate
373
  seen = set()
@@ -378,135 +233,89 @@ def find_examples(
378
  seen.add(key)
379
  unique_results.append(result)
380
 
381
- scored_examples = _score_and_rank(unique_results, library, token)
382
- return scored_examples[:max_results]
383
-
384
-
385
- async def _async_call(func, *args, **kwargs):
386
- """Wrap synchronous calls for async context."""
387
- return await asyncio.to_thread(func, *args, **kwargs)
388
-
389
-
390
- def _format_examples_table(examples: List[Example]) -> str:
391
- """Format examples as a markdown table."""
392
- if not examples:
393
- return "No examples found."
394
-
395
- lines = [
396
- "| Rank | File | Score | Stars | Reason |",
397
- "|------|------|-------|-------|--------|",
398
- ]
399
-
400
- for i, ex in enumerate(examples, 1):
401
- file_path = f"{ex.repo}/{ex.path}"
402
- if len(file_path) > 60:
403
- file_path = file_path[:57] + "..."
404
- reason = ex.reason if len(ex.reason) < 40 else ex.reason[:37] + "..."
405
- lines.append(
406
- f"| {i} | {file_path} | {ex.score:.1f} | {ex.repo_stars:,} | {reason} |"
407
- )
408
-
409
- return "\n".join(lines)
410
-
411
-
412
- class FindExamplesTool:
413
- """Tool for finding examples and tutorials for libraries."""
414
-
415
- async def execute(self, params: Dict[str, Any]) -> ToolResult:
416
- """Execute find_examples operation."""
417
- library = params.get("library")
418
- if not library:
419
- return {
420
- "formatted": "Error: 'library' parameter is required",
421
- "totalResults": 0,
422
- "resultsShared": 0,
423
- "isError": True,
424
  }
 
425
 
426
- org = params.get("org", "huggingface")
427
- repo_scope = params.get("repo_scope")
428
- max_results = params.get("max_results", 10)
429
-
430
- try:
431
- examples = await _async_call(
432
- find_examples,
433
- library=library,
434
- org=org,
435
- repo_scope=repo_scope,
436
- max_results=max_results,
437
- )
438
-
439
- if not examples:
440
- return {
441
- "formatted": f"No examples found for '{library}' in {org}",
442
- "totalResults": 0,
443
- "resultsShared": 0,
444
- }
445
 
446
- table = _format_examples_table(examples)
447
- response = f"**Found {len(examples)} examples for '{library}' in {org}:**\n\n{table}"
448
 
449
- # Add URLs and suggest using read_file
450
- response += "\n\n**Top examples (use read_file to view):**\n"
451
- for i, ex in enumerate(examples[:3], 1):
452
- response += f"{i}. [{ex.repo}/{ex.path}]({ex.url})\n"
453
- response += f" Use: read_file(repo='{ex.repo}', path='{ex.path}')\n"
454
 
455
- return {
456
- "formatted": response,
457
- "totalResults": len(examples),
458
- "resultsShared": len(examples),
459
- }
460
-
461
- except GitHubAPIError as e:
462
- return {
463
- "formatted": f"GitHub API Error: {str(e)}",
464
- "totalResults": 0,
465
- "resultsShared": 0,
466
- "isError": True,
467
- }
468
- except Exception as e:
469
- return {
470
- "formatted": f"Error: {str(e)}",
471
- "totalResults": 0,
472
- "resultsShared": 0,
473
- "isError": True,
474
- }
475
 
476
 
477
  # Tool specification
478
- FIND_EXAMPLES_TOOL_SPEC = {
479
  "name": "find_examples",
480
  "description": (
481
- "Find examples, guides, and tutorials for a library using deterministic queries and heuristics.\n\n"
482
- "Uses best practices retrieval without semantic search:\n"
483
- "- Prefers README.md, docs/**, examples/**, notebooks/**, tests/**\n"
484
- "- Prefers files with if __name__ == '__main__', 'quickstart', 'tutorial', 'usage'\n"
485
- "- Prefers repos with higher stars and more recent updates\n\n"
486
- "Returns a ranked list of canonical example files.\n\n"
487
- "Examples:\n"
488
- "- Find transformers examples: {'library': 'transformers', 'org': 'huggingface', 'max_results': 5}\n"
489
- "- Find torch examples in specific repo: {'library': 'torch', 'org': 'pytorch', 'repo_scope': 'examples'}\n\n"
490
- "Use read_file tool to view the content of returned files.\n\n"
 
 
 
 
491
  ),
492
  "parameters": {
493
  "type": "object",
494
  "properties": {
495
  "library": {
496
  "type": "string",
497
- "description": "Library name to search for (e.g., 'transformers', 'torch', 'react')",
498
  },
499
  "org": {
500
  "type": "string",
501
- "description": "GitHub organization to search in (default: 'huggingface')",
502
  },
503
  "repo_scope": {
504
  "type": "string",
505
- "description": "Optional specific repository to search within",
506
  },
507
  "max_results": {
508
  "type": "integer",
509
- "description": "Maximum number of results to return (default: 10)",
510
  },
511
  },
512
  "required": ["library"],
@@ -514,11 +323,15 @@ FIND_EXAMPLES_TOOL_SPEC = {
514
  }
515
 
516
 
517
- async def find_examples_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
518
- """Handler for agent tool router."""
519
  try:
520
- tool = FindExamplesTool()
521
- result = await tool.execute(arguments)
 
 
 
 
522
  return result["formatted"], not result.get("isError", False)
523
  except Exception as e:
524
- return f"Error executing find_examples: {str(e)}", False
 
1
  """
2
+ GitHub Find Examples Tool - Discover examples, tutorials, and guides for any library
3
 
4
+ Uses intelligent heuristics to find the best learning resources on GitHub.
5
  """
6
 
 
7
  import math
8
  import os
 
9
  from datetime import datetime, timedelta
10
  from typing import Any, Dict, List, Optional
11
 
12
+ import requests
 
 
 
 
 
13
 
14
  from agent.tools.types import ToolResult
15
 
16
 
17
+ def _search_github_code(
18
+ query: str, token: str, limit: int = 20
19
+ ) -> List[Dict[str, Any]]:
20
+ """Execute a GitHub code search query"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  headers = {
22
  "Accept": "application/vnd.github.text-match+json",
23
  "X-GitHub-Api-Version": "2022-11-28",
 
31
  try:
32
  while len(results) < limit:
33
  params = {"q": query, "per_page": per_page, "page": page}
34
+ response = requests.get(
35
+ "https://api.github.com/search/code",
36
+ headers=headers,
37
+ params=params,
38
+ timeout=30,
39
+ )
40
 
41
  if response.status_code != 200:
42
  break
43
 
44
  data = response.json()
45
  items = data.get("items", [])
 
46
  if not items:
47
  break
48
 
 
60
 
61
  if len(results) >= limit or len(items) < per_page:
62
  break
 
63
  page += 1
64
 
65
  except Exception:
 
69
 
70
 
71
  def _fetch_repo_metadata(repos: List[str], token: str) -> Dict[str, Dict[str, Any]]:
72
+ """Fetch star count and update date for repositories"""
73
  headers = {
74
  "Accept": "application/vnd.github+json",
75
  "X-GitHub-Api-Version": "2022-11-28",
 
77
  }
78
 
79
  metadata = {}
 
80
  for repo in repos:
81
  try:
82
+ response = requests.get(
83
+ f"https://api.github.com/repos/{repo}", headers=headers, timeout=10
84
+ )
85
  if response.status_code == 200:
86
  data = response.json()
87
  metadata[repo] = {
88
  "stars": data.get("stargazers_count", 0),
89
  "updated_at": data.get("updated_at", ""),
 
90
  }
91
  except Exception:
92
  continue
 
94
  return metadata
95
 
96
 
97
+ def _score_example(
98
+ result: Dict[str, Any], metadata: Dict[str, Dict[str, Any]]
99
+ ) -> tuple[float, str]:
100
+ """Score an example based on multiple heuristics"""
101
+ path = result["path"].lower()
102
+ repo = result["repo"]
103
+ score = 0.0
104
+ reasons = []
105
+
106
+ # Path-based scoring
107
+ if "readme.md" in path:
108
+ score += 100
109
+ reasons.append("README file")
110
+ elif "examples/" in path or "example/" in path:
111
+ score += 90
112
+ reasons.append("in examples/")
113
+ elif "tutorials/" in path or "tutorial/" in path:
114
+ score += 85
115
+ reasons.append("in tutorials/")
116
+ elif "docs/" in path or "doc/" in path:
117
+ score += 80
118
+ reasons.append("in docs/")
119
+ elif "notebooks/" in path or "notebook/" in path:
120
+ score += 70
121
+ reasons.append("in notebooks/")
122
+
123
+ # Extension scoring
124
+ if path.endswith(".ipynb"):
125
+ score += 15
126
+ elif path.endswith(".md"):
127
+ score += 20
128
+ elif path.endswith(".py"):
129
+ score += 10
130
+
131
+ # Content keywords from text matches
132
+ text_content = ""
133
+ for match in result.get("text_matches", []):
134
+ text_content += match.get("fragment", "").lower() + " "
135
+
136
+ if 'if __name__ == "__main__"' in text_content:
137
+ score += 50
138
+ reasons.append("runnable example")
139
+ if "quickstart" in text_content or "getting started" in text_content:
140
+ score += 60
141
+ reasons.append("quickstart guide")
142
+ if "tutorial" in text_content:
143
+ score += 50
144
+ reasons.append("tutorial content")
145
+
146
+ # Repository metadata scoring
147
+ repo_meta = metadata.get(repo, {})
148
+ stars = repo_meta.get("stars", 0)
149
+ updated_at = repo_meta.get("updated_at", "")
150
+
151
+ # Star-based score (logarithmic)
152
+ if stars > 0:
153
+ score += math.log10(stars + 1) * 10
154
+
155
+ # Recency bonus (updated in last 6 months)
156
+ if updated_at:
157
+ try:
158
+ updated_date = datetime.fromisoformat(updated_at.replace("Z", "+00:00"))
159
+ if datetime.now(updated_date.tzinfo) - updated_date < timedelta(days=180):
160
+ score += 20
161
+ reasons.append("recently updated")
162
+ except Exception:
163
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
+ # Filename quality
166
+ filename = path.split("/")[-1].lower()
167
+ if any(
168
+ word in filename
169
+ for word in ["example", "tutorial", "guide", "quickstart", "demo"]
170
+ ):
171
+ score += 30
172
+ reasons.append("descriptive filename")
173
 
174
+ # Size penalty for very large files
175
+ if result["size"] > 100000:
176
+ score *= 0.5
177
+ reasons.append("large file")
178
 
179
+ return score, ", ".join(reasons) if reasons else "matches library"
 
 
 
 
 
 
 
 
 
 
180
 
181
 
182
  def find_examples(
 
184
  org: str = "huggingface",
185
  repo_scope: Optional[str] = None,
186
  max_results: int = 10,
187
+ ) -> ToolResult:
188
  """
189
+ Find examples, tutorials, and guides for a library using intelligent search.
 
 
 
 
 
190
 
191
  Args:
192
+ library: Library name (e.g., "transformers", "torch", "react")
193
+ org: GitHub organization to search in
194
+ repo_scope: Optional specific repository name
195
+ max_results: Maximum number of results (default 10)
196
 
197
  Returns:
198
+ ToolResult with ranked examples
199
  """
200
+ token = os.environ.get("GITHUB_TOKEN")
201
+ if not token:
202
+ return {
203
+ "formatted": "Error: GITHUB_TOKEN environment variable is required",
204
+ "totalResults": 0,
205
+ "resultsShared": 0,
206
+ "isError": True,
207
+ }
208
+
209
+ # Build search queries
210
  all_results = []
211
+
212
+ # Query 1: Search in example directories
213
+ for path_pattern in ["examples/", "docs/", "tutorials/", "notebooks/"]:
214
+ query_parts = [f"org:{org}", library, f"path:{path_pattern}"]
215
+ if repo_scope:
216
+ query_parts[0] = f"repo:{org}/{repo_scope}"
217
+ query = " ".join(query_parts)
218
+ all_results.extend(_search_github_code(query, token, limit=20))
219
+
220
+ # Query 2: Search README files
221
+ query_parts = [f"org:{org}", library, "filename:README"]
222
+ if repo_scope:
223
+ query_parts[0] = f"repo:{org}/{repo_scope}"
224
+ query = " ".join(query_parts)
225
+ all_results.extend(_search_github_code(query, token, limit=20))
226
 
227
  # Deduplicate
228
  seen = set()
 
233
  seen.add(key)
234
  unique_results.append(result)
235
 
236
+ if not unique_results:
237
+ return {
238
+ "formatted": f"No examples found for '{library}' in {org}",
239
+ "totalResults": 0,
240
+ "resultsShared": 0,
241
+ }
242
+
243
+ # Fetch repo metadata
244
+ repos = list(set(r["repo"] for r in unique_results))
245
+ metadata = _fetch_repo_metadata(repos, token)
246
+
247
+ # Score and rank
248
+ scored = []
249
+ for result in unique_results:
250
+ score, reason = _score_example(result, metadata)
251
+ repo_meta = metadata.get(result["repo"], {})
252
+ scored.append(
253
+ {
254
+ "repo": result["repo"],
255
+ "path": result["path"],
256
+ "url": result["url"],
257
+ "score": score,
258
+ "reason": reason,
259
+ "stars": repo_meta.get("stars", 0),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  }
261
+ )
262
 
263
+ scored.sort(key=lambda x: x["score"], reverse=True)
264
+ top_results = scored[:max_results]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
+ # Format output
267
+ lines = [f"**Found {len(top_results)} examples for '{library}' in {org}:**\n"]
268
 
269
+ for i, ex in enumerate(top_results, 1):
270
+ lines.append(f"{i}. **{ex['repo']}/{ex['path']}**")
271
+ lines.append(f" Score: {ex['score']:.1f} | ⭐ {ex['stars']:,} stars")
272
+ lines.append(f" Reason: {ex['reason']}")
273
+ lines.append(f" URL: {ex['url']}\n")
274
 
275
+ return {
276
+ "formatted": "\n".join(lines),
277
+ "totalResults": len(top_results),
278
+ "resultsShared": len(top_results),
279
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
 
282
  # Tool specification
283
+ GITHUB_FIND_EXAMPLES_TOOL_SPEC = {
284
  "name": "find_examples",
285
  "description": (
286
+ "Find examples, tutorials, and guides for any library on GitHub using intelligent heuristic-based search.\n\n"
287
+ "Uses multiple search strategies and ranks results by:\n"
288
+ "- Path quality (examples/, docs/, tutorials/ directories)\n"
289
+ "- Content keywords (quickstart, tutorial, runnable code)\n"
290
+ "- Repository popularity (stars, recent updates)\n"
291
+ "- File characteristics (size, extension, descriptive names)\n\n"
292
+ "## Examples:\n\n"
293
+ "**Find transformers examples in Hugging Face:**\n"
294
+ "{'library': 'transformers', 'org': 'huggingface', 'max_results': 5}\n\n"
295
+ "**Find PyTorch examples in specific repo:**\n"
296
+ "{'library': 'torch', 'org': 'pytorch', 'repo_scope': 'examples', 'max_results': 10}\n\n"
297
+ "**Find React quickstart guides:**\n"
298
+ "{'library': 'react quickstart', 'org': 'facebook', 'max_results': 3}\n\n"
299
+ "Returns ranked list with file paths, scores, star counts, and direct URLs."
300
  ),
301
  "parameters": {
302
  "type": "object",
303
  "properties": {
304
  "library": {
305
  "type": "string",
306
+ "description": "Library name to search for (e.g., 'transformers', 'torch', 'react'). Required.",
307
  },
308
  "org": {
309
  "type": "string",
310
+ "description": "GitHub organization to search in. Default: 'huggingface'.",
311
  },
312
  "repo_scope": {
313
  "type": "string",
314
+ "description": "Optional specific repository name within the org (e.g., 'transformers').",
315
  },
316
  "max_results": {
317
  "type": "integer",
318
+ "description": "Maximum number of results to return. Default: 10.",
319
  },
320
  },
321
  "required": ["library"],
 
323
  }
324
 
325
 
326
+ async def github_find_examples_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
327
+ """Handler for agent tool router"""
328
  try:
329
+ result = find_examples(
330
+ library=arguments["library"],
331
+ org=arguments.get("org", "huggingface"),
332
+ repo_scope=arguments.get("repo_scope"),
333
+ max_results=arguments.get("max_results", 10),
334
+ )
335
  return result["formatted"], not result.get("isError", False)
336
  except Exception as e:
337
+ return f"Error finding examples: {str(e)}", False
agent/tools/github_list_repos.py CHANGED
@@ -1,70 +1,49 @@
1
  """
2
- GitHub List Repos Tool
3
 
4
- Lists repositories for a user or organization with sorting options.
5
  """
6
 
7
- import asyncio
8
  import os
9
- from dataclasses import asdict, dataclass
10
- from typing import Any, Dict, List, Literal, Optional
11
 
12
- try:
13
- import requests
14
- except ImportError:
15
- raise ImportError(
16
- "requests library is required. Install with: pip install requests"
17
- )
18
 
19
  from agent.tools.types import ToolResult
20
 
21
 
22
- @dataclass
23
- class Repository:
24
- """Repository information."""
25
-
26
- id: int
27
- name: str
28
- full_name: str
29
- description: Optional[str]
30
- html_url: str
31
- language: Optional[str]
32
- stars: int
33
- forks: int
34
- open_issues: int
35
- private: bool
36
- fork: bool
37
- archived: bool
38
- default_branch: str
39
- created_at: Optional[str] = None
40
- updated_at: Optional[str] = None
41
- topics: Optional[List[str]] = None
42
-
43
- def to_dict(self):
44
- return asdict(self)
45
-
46
-
47
- class GitHubAPIError(Exception):
48
- """Raised when GitHub API returns an error."""
49
-
50
- pass
51
 
 
 
 
 
 
 
52
 
53
- def _get_github_token() -> str:
54
- """Get GitHub token from environment."""
 
55
  token = os.environ.get("GITHUB_TOKEN")
56
  if not token:
57
- raise GitHubAPIError(
58
- "GITHUB_TOKEN environment variable is required. "
59
- "Set it with: export GITHUB_TOKEN=your_token_here"
60
- )
61
- return token
 
62
 
 
 
63
 
64
- def _fetch_repositories(
65
- query: str, sort: str, order: str, limit: Optional[int], token: str
66
- ) -> List[Repository]:
67
- """Fetch repositories from GitHub Search API."""
68
  headers = {
69
  "Accept": "application/vnd.github+json",
70
  "X-GitHub-Api-Version": "2022-11-28",
@@ -75,22 +54,46 @@ def _fetch_repositories(
75
  page = 1
76
  per_page = min(100, limit) if limit else 100
77
 
78
- while True:
79
- params = {
80
- "q": query,
81
- "sort": sort,
82
- "order": order,
83
- "page": page,
84
- "per_page": per_page,
85
- }
 
86
 
87
- url = "https://api.github.com/search/repositories"
 
 
 
 
 
88
 
89
- try:
90
- response = requests.get(url, headers=headers, params=params, timeout=30)
 
 
 
 
 
 
91
 
92
  if response.status_code != 200:
93
- break
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  data = response.json()
96
  items = data.get("items", [])
@@ -99,214 +102,122 @@ def _fetch_repositories(
99
  break
100
 
101
  for item in items:
102
- repo = Repository(
103
- id=item.get("id"),
104
- name=item.get("name"),
105
- full_name=item.get("full_name"),
106
- description=item.get("description"),
107
- html_url=item.get("html_url"),
108
- language=item.get("language"),
109
- stars=item.get("stargazers_count", 0),
110
- forks=item.get("forks_count", 0),
111
- open_issues=item.get("open_issues_count", 0),
112
- private=item.get("private", False),
113
- fork=item.get("fork", False),
114
- archived=item.get("archived", False),
115
- default_branch=item.get("default_branch", "main"),
116
- created_at=item.get("created_at"),
117
- updated_at=item.get("updated_at"),
118
- topics=item.get("topics", []),
119
  )
120
- all_repos.append(repo)
121
 
 
122
  if limit and len(all_repos) >= limit:
123
  all_repos = all_repos[:limit]
124
  break
125
 
126
  total_count = data.get("total_count", 0)
127
- if len(all_repos) >= total_count:
128
- break
129
-
130
- if page * per_page >= 1000:
131
  break
132
 
133
  page += 1
134
 
135
- except requests.exceptions.RequestException:
136
- break
137
-
138
- return all_repos
139
-
140
-
141
- def list_repos(
142
- owner: str,
143
- owner_type: Literal["user", "org"] = "org",
144
- sort: Literal["stars", "forks", "updated", "created"] = "stars",
145
- order: Literal["asc", "desc"] = "desc",
146
- limit: Optional[int] = None,
147
- ) -> List[Repository]:
148
- """
149
- List repositories for a user or organization using GitHub Search API.
150
-
151
- Backed by https://api.github.com/search/repositories?q=org:huggingface&sort=stars&order=desc
152
- or can use GraphQL + client-side sort.
153
-
154
- Args:
155
- owner: GitHub username or organization name
156
- owner_type: Whether the owner is a "user" or "org" (default: "org")
157
- sort: Sort field - "stars", "forks", "updated", or "created" (default: "stars")
158
- order: Sort order - "asc" or "desc" (default: "desc")
159
- limit: Maximum number of repositories to return (default: no limit)
160
-
161
- Returns:
162
- List of Repository objects
163
- """
164
- token = _get_github_token()
165
-
166
- if owner_type == "org":
167
- query = f"org:{owner}"
168
- else:
169
- query = f"user:{owner}"
170
-
171
- repos = _fetch_repositories(
172
- query=query, sort=sort, order=order, limit=limit, token=token
173
- )
174
-
175
- return repos
176
-
177
-
178
- async def _async_call(func, *args, **kwargs):
179
- """Wrap synchronous calls for async context."""
180
- return await asyncio.to_thread(func, *args, **kwargs)
181
-
182
 
183
- def _format_repos_table(repos: List[Repository]) -> str:
184
- """Format repositories as a markdown table."""
185
- if not repos:
186
- return "No repositories found."
 
 
187
 
188
- lines = [
189
- "| Repo | Stars | Forks | Language | Description |",
190
- "|------|-------|-------|----------|-------------|",
191
- ]
192
 
193
- for repo in repos:
194
- desc = repo.description or "N/A"
195
- if len(desc) > 50:
196
- desc = desc[:47] + "..."
197
- lang = repo.language or "N/A"
198
  lines.append(
199
- f"| {repo.full_name} | {repo.stars:,} | {repo.forks:,} | {lang} | {desc} |"
200
  )
201
-
202
- return "\n".join(lines)
203
-
204
-
205
- class ListReposTool:
206
- """Tool for listing GitHub repositories."""
207
-
208
- async def execute(self, params: Dict[str, Any]) -> ToolResult:
209
- """Execute list_repos operation."""
210
- owner = params.get("owner")
211
- if not owner:
212
- return {
213
- "formatted": "Error: 'owner' parameter is required",
214
- "totalResults": 0,
215
- "resultsShared": 0,
216
- "isError": True,
217
- }
218
-
219
- owner_type = params.get("owner_type", "org")
220
- sort = params.get("sort", "stars")
221
- order = params.get("order", "desc")
222
- limit = params.get("limit")
223
-
224
- try:
225
- repos = await _async_call(
226
- list_repos,
227
- owner=owner,
228
- owner_type=owner_type,
229
- sort=sort,
230
- order=order,
231
- limit=limit,
232
  )
233
-
234
- if not repos:
235
- return {
236
- "formatted": f"No repositories found for {owner}",
237
- "totalResults": 0,
238
- "resultsShared": 0,
239
- }
240
-
241
- table = _format_repos_table(repos)
242
- response = f"**Found {len(repos)} repositories for {owner} (sorted by {sort}, {order}):**\n\n{table}"
243
-
244
- # Add links to top repos
245
- response += "\n\n**Top repositories:**\n"
246
- for i, repo in enumerate(repos[:5], 1):
247
- response += (
248
- f"{i}. [{repo.full_name}]({repo.html_url}) - ⭐ {repo.stars:,}\n"
249
- )
250
-
251
- return {
252
- "formatted": response,
253
- "totalResults": len(repos),
254
- "resultsShared": len(repos),
255
- }
256
-
257
- except GitHubAPIError as e:
258
- return {
259
- "formatted": f"GitHub API Error: {str(e)}",
260
- "totalResults": 0,
261
- "resultsShared": 0,
262
- "isError": True,
263
- }
264
- except Exception as e:
265
- return {
266
- "formatted": f"Error: {str(e)}",
267
- "totalResults": 0,
268
- "resultsShared": 0,
269
- "isError": True,
270
- }
271
 
272
 
273
  # Tool specification
274
- LIST_REPOS_TOOL_SPEC = {
275
  "name": "list_repos",
276
  "description": (
277
- "List repositories for a user or organization with sorting options.\n\n"
278
- "Backed by GitHub Search API: https://api.github.com/search/repositories?q=org:huggingface&sort=stars&order=desc\n\n"
279
- "Examples:\n"
280
- "- Top 10 starred repos: {'owner': 'huggingface', 'sort': 'stars', 'limit': 10}\n"
281
- "- Recently updated: {'owner': 'microsoft', 'sort': 'updated', 'order': 'desc', 'limit': 5}\n"
282
- "- User repos: {'owner': 'torvalds', 'owner_type': 'user', 'sort': 'stars'}\n"
283
- "- All repos: {'owner': 'pytorch', 'sort': 'forks'}\n\n"
 
 
 
 
 
 
 
 
 
 
 
284
  ),
285
  "parameters": {
286
  "type": "object",
287
  "properties": {
288
  "owner": {
289
  "type": "string",
290
- "description": "GitHub username or organization name (e.g., 'huggingface', 'torvalds')",
291
  },
292
  "owner_type": {
293
  "type": "string",
294
  "enum": ["user", "org"],
295
- "description": "Whether the owner is a 'user' or 'org' (default: 'org')",
296
  },
297
  "sort": {
298
  "type": "string",
299
  "enum": ["stars", "forks", "updated", "created"],
300
- "description": "Sort field: 'stars', 'forks', 'updated', or 'created' (default: 'stars')",
301
  },
302
  "order": {
303
  "type": "string",
304
  "enum": ["asc", "desc"],
305
- "description": "Sort order: 'asc' or 'desc' (default: 'desc')",
306
  },
307
  "limit": {
308
  "type": "integer",
309
- "description": "Maximum number of repositories to return (default: no limit, returns all)",
310
  },
311
  },
312
  "required": ["owner"],
@@ -314,11 +225,16 @@ LIST_REPOS_TOOL_SPEC = {
314
  }
315
 
316
 
317
- async def list_repos_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
318
- """Handler for agent tool router."""
319
  try:
320
- tool = ListReposTool()
321
- result = await tool.execute(arguments)
 
 
 
 
 
322
  return result["formatted"], not result.get("isError", False)
323
  except Exception as e:
324
- return f"Error executing list_repos: {str(e)}", False
 
1
  """
2
+ GitHub List Repositories Tool - List and sort repositories for any user or organization
3
 
4
+ Efficiently discover repositories with flexible sorting options.
5
  """
6
 
 
7
  import os
8
+ from typing import Any, Dict, Literal, Optional
 
9
 
10
+ import requests
 
 
 
 
 
11
 
12
  from agent.tools.types import ToolResult
13
 
14
 
15
+ def list_repos(
16
+ owner: str,
17
+ owner_type: Literal["user", "org"] = "org",
18
+ sort: Literal["stars", "forks", "updated", "created"] = "stars",
19
+ order: Literal["asc", "desc"] = "desc",
20
+ limit: Optional[int] = None,
21
+ ) -> ToolResult:
22
+ """
23
+ List repositories for a user or organization using GitHub Search API.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ Args:
26
+ owner: GitHub username or organization name
27
+ owner_type: Whether the owner is a "user" or "org" (default: "org")
28
+ sort: Sort field - "stars", "forks", "updated", or "created"
29
+ order: Sort order - "asc" or "desc" (default: "desc")
30
+ limit: Maximum number of repositories to return
31
 
32
+ Returns:
33
+ ToolResult with repository information
34
+ """
35
  token = os.environ.get("GITHUB_TOKEN")
36
  if not token:
37
+ return {
38
+ "formatted": "Error: GITHUB_TOKEN environment variable is required",
39
+ "totalResults": 0,
40
+ "resultsShared": 0,
41
+ "isError": True,
42
+ }
43
 
44
+ # Build search query
45
+ query = f"org:{owner}" if owner_type == "org" else f"user:{owner}"
46
 
 
 
 
 
47
  headers = {
48
  "Accept": "application/vnd.github+json",
49
  "X-GitHub-Api-Version": "2022-11-28",
 
54
  page = 1
55
  per_page = min(100, limit) if limit else 100
56
 
57
+ try:
58
+ while True:
59
+ params = {
60
+ "q": query,
61
+ "sort": sort,
62
+ "order": order,
63
+ "page": page,
64
+ "per_page": per_page,
65
+ }
66
 
67
+ response = requests.get(
68
+ "https://api.github.com/search/repositories",
69
+ headers=headers,
70
+ params=params,
71
+ timeout=30,
72
+ )
73
 
74
+ if response.status_code == 403:
75
+ error_data = response.json()
76
+ return {
77
+ "formatted": f"GitHub API rate limit or permission error: {error_data.get('message', 'Unknown error')}",
78
+ "totalResults": 0,
79
+ "resultsShared": 0,
80
+ "isError": True,
81
+ }
82
 
83
  if response.status_code != 200:
84
+ error_msg = f"GitHub API error (status {response.status_code})"
85
+ try:
86
+ error_data = response.json()
87
+ if "message" in error_data:
88
+ error_msg += f": {error_data['message']}"
89
+ except Exception:
90
+ pass
91
+ return {
92
+ "formatted": error_msg,
93
+ "totalResults": 0,
94
+ "resultsShared": 0,
95
+ "isError": True,
96
+ }
97
 
98
  data = response.json()
99
  items = data.get("items", [])
 
102
  break
103
 
104
  for item in items:
105
+ all_repos.append(
106
+ {
107
+ "name": item.get("name"),
108
+ "full_name": item.get("full_name"),
109
+ "description": item.get("description"),
110
+ "html_url": item.get("html_url"),
111
+ "language": item.get("language"),
112
+ "stars": item.get("stargazers_count", 0),
113
+ "forks": item.get("forks_count", 0),
114
+ "open_issues": item.get("open_issues_count", 0),
115
+ "topics": item.get("topics", []),
116
+ "updated_at": item.get("updated_at"),
117
+ }
 
 
 
 
118
  )
 
119
 
120
+ # Check limits
121
  if limit and len(all_repos) >= limit:
122
  all_repos = all_repos[:limit]
123
  break
124
 
125
  total_count = data.get("total_count", 0)
126
+ if len(all_repos) >= total_count or page * per_page >= 1000:
 
 
 
127
  break
128
 
129
  page += 1
130
 
131
+ except requests.exceptions.RequestException as e:
132
+ return {
133
+ "formatted": f"Failed to connect to GitHub API: {str(e)}",
134
+ "totalResults": 0,
135
+ "resultsShared": 0,
136
+ "isError": True,
137
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ if not all_repos:
140
+ return {
141
+ "formatted": f"No repositories found for {owner_type} '{owner}'",
142
+ "totalResults": 0,
143
+ "resultsShared": 0,
144
+ }
145
 
146
+ # Format output
147
+ lines = [f"**Found {len(all_repos)} repositories for {owner}:**\n"]
 
 
148
 
149
+ for i, repo in enumerate(all_repos, 1):
150
+ lines.append(f"{i}. **{repo['full_name']}**")
 
 
 
151
  lines.append(
152
+ f" {repo['stars']:,} stars | 🍴 {repo['forks']:,} forks | Language: {repo['language'] or 'N/A'}"
153
  )
154
+ if repo["description"]:
155
+ desc = (
156
+ repo["description"][:100] + "..."
157
+ if len(repo["description"]) > 100
158
+ else repo["description"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  )
160
+ lines.append(f" {desc}")
161
+ lines.append(f" URL: {repo['html_url']}")
162
+ if repo["topics"]:
163
+ lines.append(f" Topics: {', '.join(repo['topics'][:5])}")
164
+ lines.append("")
165
+
166
+ return {
167
+ "formatted": "\n".join(lines),
168
+ "totalResults": len(all_repos),
169
+ "resultsShared": len(all_repos),
170
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
 
173
  # Tool specification
174
+ GITHUB_LIST_REPOS_TOOL_SPEC = {
175
  "name": "list_repos",
176
  "description": (
177
+ "List and sort repositories for any GitHub user or organization.\n\n"
178
+ "Uses GitHub Search API for efficient sorting by stars, forks, update date, or creation date.\n"
179
+ "Returns comprehensive repository information including:\n"
180
+ "- Stars, forks, and open issues count\n"
181
+ "- Primary programming language\n"
182
+ "- Repository topics/tags\n"
183
+ "- Last update timestamp\n"
184
+ "- Direct URLs\n\n"
185
+ "## Examples:\n\n"
186
+ "**List top 10 starred Hugging Face repos:**\n"
187
+ "{'owner': 'huggingface', 'owner_type': 'org', 'sort': 'stars', 'limit': 10}\n\n"
188
+ "**List recently updated Microsoft repos:**\n"
189
+ "{'owner': 'microsoft', 'sort': 'updated', 'order': 'desc', 'limit': 5}\n\n"
190
+ "**List all repos for a user:**\n"
191
+ "{'owner': 'torvalds', 'owner_type': 'user', 'sort': 'stars'}\n\n"
192
+ "**Find most forked Google repos:**\n"
193
+ "{'owner': 'google', 'sort': 'forks', 'order': 'desc', 'limit': 20}\n\n"
194
+ "Perfect for discovering popular projects, finding active repositories, or exploring an organization's work."
195
  ),
196
  "parameters": {
197
  "type": "object",
198
  "properties": {
199
  "owner": {
200
  "type": "string",
201
+ "description": "GitHub username or organization name. Required.",
202
  },
203
  "owner_type": {
204
  "type": "string",
205
  "enum": ["user", "org"],
206
+ "description": "Whether the owner is a 'user' or 'org'. Default: 'org'.",
207
  },
208
  "sort": {
209
  "type": "string",
210
  "enum": ["stars", "forks", "updated", "created"],
211
+ "description": "Sort field. Options: 'stars', 'forks', 'updated', 'created'. Default: 'stars'.",
212
  },
213
  "order": {
214
  "type": "string",
215
  "enum": ["asc", "desc"],
216
+ "description": "Sort order. Options: 'asc', 'desc'. Default: 'desc'.",
217
  },
218
  "limit": {
219
  "type": "integer",
220
+ "description": "Maximum number of repositories to return. No limit if not specified.",
221
  },
222
  },
223
  "required": ["owner"],
 
225
  }
226
 
227
 
228
+ async def github_list_repos_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
229
+ """Handler for agent tool router"""
230
  try:
231
+ result = list_repos(
232
+ owner=arguments["owner"],
233
+ owner_type=arguments.get("owner_type", "org"),
234
+ sort=arguments.get("sort", "stars"),
235
+ order=arguments.get("order", "desc"),
236
+ limit=arguments.get("limit"),
237
+ )
238
  return result["formatted"], not result.get("isError", False)
239
  except Exception as e:
240
+ return f"Error listing repositories: {str(e)}", False
agent/tools/github_read_file.py CHANGED
@@ -1,135 +1,67 @@
1
  """
2
- GitHub Read File Tool
3
 
4
- Reads file contents from a GitHub repository with line range support.
5
  """
6
 
7
- import asyncio
8
  import base64
9
  import os
10
- from dataclasses import asdict, dataclass
11
- from typing import Any, Dict, Optional, Tuple
12
 
13
- try:
14
- import requests
15
- except ImportError:
16
- raise ImportError(
17
- "requests library is required. Install with: pip install requests"
18
- )
19
 
20
  from agent.tools.types import ToolResult
21
 
22
 
23
- @dataclass
24
- class FileContents:
25
- """File contents with metadata."""
26
-
27
- content: str
28
- sha: str
29
- path: str
30
- size: int
31
- last_modified: Optional[str]
32
- last_commit_sha: Optional[str]
33
- line_start: int
34
- line_end: int
35
- total_lines: int
36
- truncated: bool
37
- message: Optional[str] = None
38
-
39
- def to_dict(self):
40
- return asdict(self)
41
-
42
-
43
- class GitHubAPIError(Exception):
44
- """Raised when GitHub API returns an error."""
45
-
46
- pass
47
 
 
 
 
 
 
 
48
 
49
- def _get_github_token() -> str:
50
- """Get GitHub token from environment."""
 
51
  token = os.environ.get("GITHUB_TOKEN")
52
  if not token:
53
- raise GitHubAPIError(
54
- "GITHUB_TOKEN environment variable is required. "
55
- "Set it with: export GITHUB_TOKEN=your_token_here"
56
- )
57
- return token
58
-
59
-
60
- def _fetch_raw_content(owner: str, repo: str, path: str, ref: str, token: str) -> str:
61
- """Fetch raw file content for large files."""
62
- headers = {
63
- "Accept": "application/vnd.github.raw",
64
- "X-GitHub-Api-Version": "2022-11-28",
65
- "Authorization": f"Bearer {token}",
66
- }
67
-
68
- url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
69
- params = {"ref": ref}
70
-
71
- response = requests.get(url, headers=headers, params=params, timeout=30)
72
-
73
- if response.status_code != 200:
74
- raise GitHubAPIError(
75
- f"Failed to fetch raw content: HTTP {response.status_code}"
76
- )
77
-
78
- return response.text
79
-
80
-
81
- def _get_last_commit_info(
82
- owner: str, repo: str, path: str, ref: Optional[str], token: str
83
- ) -> Tuple[Optional[str], Optional[str]]:
84
- """Get last commit information for a specific file."""
85
- headers = {
86
- "Accept": "application/vnd.github+json",
87
- "X-GitHub-Api-Version": "2022-11-28",
88
- "Authorization": f"Bearer {token}",
89
- }
90
-
91
- url = f"https://api.github.com/repos/{owner}/{repo}/commits"
92
- params = {"path": path, "per_page": 1}
93
-
94
- if ref and ref != "HEAD":
95
- params["sha"] = ref
96
-
97
- try:
98
- response = requests.get(url, headers=headers, params=params, timeout=30)
99
-
100
- if response.status_code == 200:
101
- commits = response.json()
102
- if commits:
103
- commit = commits[0]
104
- commit_sha = commit.get("sha")
105
- commit_date = commit.get("commit", {}).get("committer", {}).get("date")
106
- return commit_date, commit_sha
107
-
108
- except Exception:
109
- pass
110
-
111
- return None, None
112
 
 
113
 
114
- def _fetch_file_contents(
115
- owner: str,
116
- repo: str,
117
- path: str,
118
- ref: str,
119
- line_start: Optional[int],
120
- line_end: Optional[int],
121
- token: str,
122
- ) -> FileContents:
123
- """Fetch file contents from GitHub API."""
124
  headers = {
125
  "Accept": "application/vnd.github+json",
126
  "X-GitHub-Api-Version": "2022-11-28",
127
  "Authorization": f"Bearer {token}",
128
  }
129
 
130
- url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
 
131
  params = {}
132
-
133
  if ref and ref != "HEAD":
134
  params["ref"] = ref
135
 
@@ -137,9 +69,12 @@ def _fetch_file_contents(
137
  response = requests.get(url, headers=headers, params=params, timeout=30)
138
 
139
  if response.status_code == 404:
140
- raise GitHubAPIError(
141
- f"File not found: {path} in {owner}/{repo} (ref: {ref})"
142
- )
 
 
 
143
 
144
  if response.status_code != 200:
145
  error_msg = f"GitHub API error (status {response.status_code})"
@@ -149,17 +84,23 @@ def _fetch_file_contents(
149
  error_msg += f": {error_data['message']}"
150
  except Exception:
151
  pass
152
- raise GitHubAPIError(error_msg)
 
 
 
 
 
153
 
154
  data = response.json()
155
 
 
156
  if data.get("type") != "file":
157
- raise GitHubAPIError(
158
- f"Path {path} is not a file (type: {data.get('type')})"
159
- )
160
-
161
- file_sha = data.get("sha")
162
- file_size = data.get("size", 0)
163
 
164
  # Decode content
165
  content_b64 = data.get("content", "")
@@ -167,214 +108,142 @@ def _fetch_file_contents(
167
  content_b64 = content_b64.replace("\n", "").replace(" ", "")
168
  content = base64.b64decode(content_b64).decode("utf-8", errors="replace")
169
  else:
170
- content = _fetch_raw_content(owner, repo, path, ref or "HEAD", token)
171
-
172
- except requests.exceptions.RequestException as e:
173
- raise GitHubAPIError(f"Failed to connect to GitHub API: {e}")
174
-
175
- # Get last commit info
176
- last_modified, last_commit_sha = _get_last_commit_info(
177
- owner, repo, path, ref, token
178
- )
179
-
180
- # Process line ranges
181
- lines = content.split("\n")
182
- total_lines = len(lines)
183
-
184
- truncated = False
185
- message = None
186
-
187
- if line_start is None and line_end is None:
188
- if total_lines > 300:
189
- line_start = 1
190
- line_end = 300
191
- truncated = True
192
- message = (
193
- f"File has {total_lines} lines. Returned only the first 300 lines. "
194
- f"To view more, use the line_start and line_end parameters."
195
- )
196
- else:
197
- line_start = 1
198
- line_end = total_lines
199
- else:
200
- if line_start is None:
201
- line_start = 1
202
- if line_end is None:
203
- line_end = total_lines
204
-
205
- if line_start < 1:
206
- line_start = 1
207
- if line_end > total_lines:
208
- line_end = total_lines
209
- if line_start > line_end:
210
- raise ValueError(
211
- f"line_start ({line_start}) cannot be greater than line_end ({line_end})"
212
- )
213
-
214
- selected_lines = lines[line_start - 1 : line_end]
215
- selected_content = "\n".join(selected_lines)
216
-
217
- return FileContents(
218
- content=selected_content,
219
- sha=file_sha,
220
- path=path,
221
- size=file_size,
222
- last_modified=last_modified,
223
- last_commit_sha=last_commit_sha,
224
- line_start=line_start,
225
- line_end=line_end,
226
- total_lines=total_lines,
227
- truncated=truncated,
228
- message=message,
229
- )
230
-
231
-
232
- def read_file(
233
- repo: str,
234
- path: str,
235
- ref: str = "HEAD",
236
- line_start: Optional[int] = None,
237
- line_end: Optional[int] = None,
238
- ) -> FileContents:
239
- """
240
- Read file contents from a GitHub repository.
241
-
242
- Returns raw file text plus metadata (commit SHA, last modified).
243
- If file is more than 300 lines and no line range is specified,
244
- returns only the first 300 lines with a message.
245
-
246
- Args:
247
- repo: Repository in format "owner/repo" (e.g., "huggingface/transformers")
248
- path: Path to file in repository (e.g., "README.md")
249
- ref: Git reference - branch name, tag, or commit SHA (default: "HEAD")
250
- line_start: Starting line number (1-indexed, inclusive)
251
- line_end: Ending line number (1-indexed, inclusive)
252
-
253
- Returns:
254
- FileContents object with content and metadata
255
- """
256
- if "/" not in repo:
257
- raise ValueError("repo must be in format 'owner/repo'")
258
-
259
- owner, repo_name = repo.split("/", 1)
260
- token = _get_github_token()
261
-
262
- return _fetch_file_contents(
263
- owner=owner,
264
- repo=repo_name,
265
- path=path,
266
- ref=ref,
267
- line_start=line_start,
268
- line_end=line_end,
269
- token=token,
270
- )
271
-
272
-
273
- async def _async_call(func, *args, **kwargs):
274
- """Wrap synchronous calls for async context."""
275
- return await asyncio.to_thread(func, *args, **kwargs)
276
-
277
-
278
- class ReadFileTool:
279
- """Tool for reading files from GitHub repositories."""
280
-
281
- async def execute(self, params: Dict[str, Any]) -> ToolResult:
282
- """Execute read_file operation."""
283
- repo = params.get("repo")
284
- path = params.get("path")
285
-
286
- if not repo or not path:
287
- return {
288
- "formatted": "Error: 'repo' and 'path' parameters are required",
289
- "totalResults": 0,
290
- "resultsShared": 0,
291
- "isError": True,
292
  }
293
-
294
- ref = params.get("ref", "HEAD")
295
- line_start = params.get("line_start")
296
- line_end = params.get("line_end")
297
-
298
- try:
299
- file_contents = await _async_call(
300
- read_file,
301
- repo=repo,
302
- path=path,
303
- ref=ref,
304
- line_start=line_start,
305
- line_end=line_end,
306
  )
 
 
 
 
 
 
 
 
 
 
 
 
307
 
308
- response = f"**File: {file_contents.path}**\n"
309
- response += f"**Repo: {repo}**\n"
310
- response += f"**Lines:** {file_contents.line_start}-{file_contents.line_end} of {file_contents.total_lines}\n"
311
- response += f"**SHA:** {file_contents.sha}\n"
312
-
313
- if file_contents.last_modified:
314
- response += f"**Last modified:** {file_contents.last_modified}\n"
315
-
316
- if file_contents.message:
317
- response += f"\n⚠️ {file_contents.message}\n"
318
-
319
- response += f"\n```\n{file_contents.content}\n```"
320
-
321
- return {
322
- "formatted": response,
323
- "totalResults": 1,
324
- "resultsShared": 1,
325
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
- except GitHubAPIError as e:
328
- return {
329
- "formatted": f"GitHub API Error: {str(e)}",
330
- "totalResults": 0,
331
- "resultsShared": 0,
332
- "isError": True,
333
- }
334
- except Exception as e:
335
- return {
336
- "formatted": f"Error: {str(e)}",
337
- "totalResults": 0,
338
- "resultsShared": 0,
339
- "isError": True,
340
- }
341
 
342
 
343
  # Tool specification
344
- READ_FILE_TOOL_SPEC = {
345
  "name": "read_file",
346
  "description": (
347
- "Read file contents from a GitHub repository.\n\n"
348
- "Returns raw file text plus metadata (commit SHA, last modified).\n"
349
- "If file is more than 300 lines, returns only the first 300 lines and includes line_start and line_end indexes.\n"
350
- "Use line_start and line_end parameters to view specific line ranges.\n\n"
351
- "Examples:\n"
352
- "- Read README: {'repo': 'huggingface/transformers', 'path': 'README.md'}\n"
353
- "- Read specific lines: {'repo': 'huggingface/transformers', 'path': 'src/transformers/__init__.py', 'line_start': 1, 'line_end': 50}\n"
354
- "- Read from branch: {'repo': 'torvalds/linux', 'path': 'MAINTAINERS', 'ref': 'master', 'line_start': 1, 'line_end': 20}\n\n"
 
 
 
 
 
 
 
 
 
355
  ),
356
  "parameters": {
357
  "type": "object",
358
  "properties": {
359
  "repo": {
360
  "type": "string",
361
- "description": "Repository in format 'owner/repo' (e.g., 'huggingface/transformers')",
362
  },
363
  "path": {
364
  "type": "string",
365
- "description": "Path to file in repository (e.g., 'README.md', 'src/main.py')",
366
  },
367
  "ref": {
368
  "type": "string",
369
- "description": "Git reference: branch name, tag, or commit SHA (default: 'HEAD')",
370
  },
371
  "line_start": {
372
  "type": "integer",
373
- "description": "Starting line number (1-indexed, inclusive). Use to read specific range.",
374
  },
375
  "line_end": {
376
  "type": "integer",
377
- "description": "Ending line number (1-indexed, inclusive). Use to read specific range.",
378
  },
379
  },
380
  "required": ["repo", "path"],
@@ -382,11 +251,16 @@ READ_FILE_TOOL_SPEC = {
382
  }
383
 
384
 
385
- async def read_file_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
386
- """Handler for agent tool router."""
387
  try:
388
- tool = ReadFileTool()
389
- result = await tool.execute(arguments)
 
 
 
 
 
390
  return result["formatted"], not result.get("isError", False)
391
  except Exception as e:
392
- return f"Error executing read_file: {str(e)}", False
 
1
  """
2
+ GitHub Read File Tool - Read file contents from any GitHub repository with line range support
3
 
4
+ Fetch exact file contents with metadata, supporting line ranges for efficient reading.
5
  """
6
 
 
7
  import base64
8
  import os
9
+ from typing import Any, Dict, Optional
 
10
 
11
+ import requests
 
 
 
 
 
12
 
13
  from agent.tools.types import ToolResult
14
 
15
 
16
+ def read_file(
17
+ repo: str,
18
+ path: str,
19
+ ref: str = "HEAD",
20
+ line_start: Optional[int] = None,
21
+ line_end: Optional[int] = None,
22
+ ) -> ToolResult:
23
+ """
24
+ Read file contents from a GitHub repository with line range support.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ Args:
27
+ repo: Repository in format "owner/repo" (e.g., "github/github-mcp-server")
28
+ path: Path to file in repository (e.g., "pkg/github/search.go")
29
+ ref: Git reference - branch name, tag, or commit SHA (default: "HEAD")
30
+ line_start: Starting line number (1-indexed, inclusive)
31
+ line_end: Ending line number (1-indexed, inclusive)
32
 
33
+ Returns:
34
+ ToolResult with file contents and metadata
35
+ """
36
  token = os.environ.get("GITHUB_TOKEN")
37
  if not token:
38
+ return {
39
+ "formatted": "Error: GITHUB_TOKEN environment variable is required",
40
+ "totalResults": 0,
41
+ "resultsShared": 0,
42
+ "isError": True,
43
+ }
44
+
45
+ # Parse repo
46
+ if "/" not in repo:
47
+ return {
48
+ "formatted": "Error: repo must be in format 'owner/repo'",
49
+ "totalResults": 0,
50
+ "resultsShared": 0,
51
+ "isError": True,
52
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ owner, repo_name = repo.split("/", 1)
55
 
 
 
 
 
 
 
 
 
 
 
56
  headers = {
57
  "Accept": "application/vnd.github+json",
58
  "X-GitHub-Api-Version": "2022-11-28",
59
  "Authorization": f"Bearer {token}",
60
  }
61
 
62
+ # Fetch file contents
63
+ url = f"https://api.github.com/repos/{owner}/{repo_name}/contents/{path}"
64
  params = {}
 
65
  if ref and ref != "HEAD":
66
  params["ref"] = ref
67
 
 
69
  response = requests.get(url, headers=headers, params=params, timeout=30)
70
 
71
  if response.status_code == 404:
72
+ return {
73
+ "formatted": f"File not found: {path} in {repo} (ref: {ref})",
74
+ "totalResults": 0,
75
+ "resultsShared": 0,
76
+ "isError": True,
77
+ }
78
 
79
  if response.status_code != 200:
80
  error_msg = f"GitHub API error (status {response.status_code})"
 
84
  error_msg += f": {error_data['message']}"
85
  except Exception:
86
  pass
87
+ return {
88
+ "formatted": error_msg,
89
+ "totalResults": 0,
90
+ "resultsShared": 0,
91
+ "isError": True,
92
+ }
93
 
94
  data = response.json()
95
 
96
+ # Check if it's a file
97
  if data.get("type") != "file":
98
+ return {
99
+ "formatted": f"Path {path} is not a file (type: {data.get('type')})",
100
+ "totalResults": 0,
101
+ "resultsShared": 0,
102
+ "isError": True,
103
+ }
104
 
105
  # Decode content
106
  content_b64 = data.get("content", "")
 
108
  content_b64 = content_b64.replace("\n", "").replace(" ", "")
109
  content = base64.b64decode(content_b64).decode("utf-8", errors="replace")
110
  else:
111
+ # For large files, fetch raw content
112
+ raw_headers = {
113
+ "Accept": "application/vnd.github.raw",
114
+ "X-GitHub-Api-Version": "2022-11-28",
115
+ "Authorization": f"Bearer {token}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  }
117
+ raw_response = requests.get(
118
+ url, headers=raw_headers, params=params, timeout=30
 
 
 
 
 
 
 
 
 
 
 
119
  )
120
+ if raw_response.status_code != 200:
121
+ return {
122
+ "formatted": "Failed to fetch file content",
123
+ "totalResults": 0,
124
+ "resultsShared": 0,
125
+ "isError": True,
126
+ }
127
+ content = raw_response.text
128
+
129
+ # Get metadata
130
+ file_sha = data.get("sha")
131
+ file_size = data.get("size", 0)
132
 
133
+ # Process line ranges
134
+ lines = content.split("\n")
135
+ total_lines = len(lines)
136
+
137
+ truncated = False
138
+ message = None
139
+
140
+ if line_start is None and line_end is None:
141
+ # No range specified
142
+ if total_lines > 300:
143
+ line_start = 1
144
+ line_end = 300
145
+ truncated = True
146
+ message = f"File has {total_lines} lines. Showing first 300 lines. Use line_start and line_end to view more."
147
+ else:
148
+ line_start = 1
149
+ line_end = total_lines
150
+ else:
151
+ # Range specified
152
+ if line_start is None:
153
+ line_start = 1
154
+ if line_end is None:
155
+ line_end = total_lines
156
+
157
+ # Validate range
158
+ line_start = max(1, line_start)
159
+ line_end = min(total_lines, line_end)
160
+ if line_start > line_end:
161
+ return {
162
+ "formatted": f"Invalid range: line_start ({line_start}) > line_end ({line_end})",
163
+ "totalResults": 0,
164
+ "resultsShared": 0,
165
+ "isError": True,
166
+ }
167
+
168
+ # Extract lines
169
+ selected_lines = lines[line_start - 1 : line_end]
170
+ selected_content = "\n".join(selected_lines)
171
+
172
+ # Format output
173
+ lines_output = [f"**File: {repo}/{path}**"]
174
+ lines_output.append(f"SHA: {file_sha}")
175
+ lines_output.append(f"Size: {file_size:,} bytes")
176
+ lines_output.append(
177
+ f"Lines: {line_start}-{line_end} of {total_lines} total lines"
178
+ )
179
+ if ref and ref != "HEAD":
180
+ lines_output.append(f"Ref: {ref}")
181
+ if truncated and message:
182
+ lines_output.append(f"⚠️ {message}")
183
+ lines_output.append("\n**Content:**")
184
+ lines_output.append("```")
185
+ lines_output.append(selected_content)
186
+ lines_output.append("```")
187
+
188
+ return {
189
+ "formatted": "\n".join(lines_output),
190
+ "totalResults": 1,
191
+ "resultsShared": 1,
192
+ }
193
 
194
+ except requests.exceptions.RequestException as e:
195
+ return {
196
+ "formatted": f"Failed to connect to GitHub API: {str(e)}",
197
+ "totalResults": 0,
198
+ "resultsShared": 0,
199
+ "isError": True,
200
+ }
 
 
 
 
 
 
 
201
 
202
 
203
  # Tool specification
204
+ GITHUB_READ_FILE_TOOL_SPEC = {
205
  "name": "read_file",
206
  "description": (
207
+ "Read file contents from any GitHub repository with precise line range control.\n\n"
208
+ "Features:\n"
209
+ "- Read entire files or specific line ranges\n"
210
+ "- Auto-truncates large files to 300 lines (with warning)\n"
211
+ "- Works with any branch, tag, or commit SHA\n"
212
+ "- Returns file metadata (SHA, size, line count)\n"
213
+ "- Handles both small and large files efficiently\n\n"
214
+ "## Examples:\n\n"
215
+ "**Read entire README:**\n"
216
+ "{'repo': 'facebook/react', 'path': 'README.md'}\n\n"
217
+ "**Read specific line range:**\n"
218
+ "{'repo': 'torvalds/linux', 'path': 'kernel/sched/core.c', 'line_start': 100, 'line_end': 150}\n\n"
219
+ "**Read from specific branch:**\n"
220
+ "{'repo': 'python/cpython', 'path': 'Lib/ast.py', 'ref': 'main', 'line_start': 1, 'line_end': 50}\n\n"
221
+ "**Read from specific commit:**\n"
222
+ "{'repo': 'github/github-mcp-server', 'path': 'pkg/github/search.go', 'ref': 'abc123def'}\n\n"
223
+ "Perfect for examining code, reading documentation, or investigating specific implementations."
224
  ),
225
  "parameters": {
226
  "type": "object",
227
  "properties": {
228
  "repo": {
229
  "type": "string",
230
+ "description": "Repository in format 'owner/repo' (e.g., 'github/github-mcp-server'). Required.",
231
  },
232
  "path": {
233
  "type": "string",
234
+ "description": "Path to file in repository (e.g., 'src/index.js'). Required.",
235
  },
236
  "ref": {
237
  "type": "string",
238
+ "description": "Git reference - branch name, tag, or commit SHA. Default: 'HEAD'.",
239
  },
240
  "line_start": {
241
  "type": "integer",
242
+ "description": "Starting line number (1-indexed, inclusive). Optional.",
243
  },
244
  "line_end": {
245
  "type": "integer",
246
+ "description": "Ending line number (1-indexed, inclusive). Optional.",
247
  },
248
  },
249
  "required": ["repo", "path"],
 
251
  }
252
 
253
 
254
+ async def github_read_file_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
255
+ """Handler for agent tool router"""
256
  try:
257
+ result = read_file(
258
+ repo=arguments["repo"],
259
+ path=arguments["path"],
260
+ ref=arguments.get("ref", "HEAD"),
261
+ line_start=arguments.get("line_start"),
262
+ line_end=arguments.get("line_end"),
263
+ )
264
  return result["formatted"], not result.get("isError", False)
265
  except Exception as e:
266
+ return f"Error reading file: {str(e)}", False
agent/tools/github_search_code.py CHANGED
@@ -1,123 +1,138 @@
1
  """
2
- GitHub Search Code Tool
3
 
4
- Searches code across GitHub with glob filtering and line-level results.
5
  """
6
 
7
- import asyncio
8
  import fnmatch
9
  import os
10
  import re
11
- from dataclasses import asdict, dataclass
12
- from typing import Any, Dict, List, Optional, Tuple
13
 
14
- try:
15
- import requests
16
- except ImportError:
17
- raise ImportError(
18
- "requests library is required. Install with: pip install requests"
19
- )
20
 
21
  from agent.tools.types import ToolResult
22
 
23
 
24
- @dataclass
25
- class CodeMatch:
26
- """A code match with location information."""
27
-
28
- repo: str
29
- path: str
30
- ref: str
31
- line_start: int
32
- line_end: int
33
- snippet: str
34
-
35
- def to_dict(self):
36
- return asdict(self)
37
-
38
 
39
- class GitHubAPIError(Exception):
40
- """Raised when GitHub API returns an error."""
41
 
42
- pass
 
 
 
 
 
 
 
 
43
 
 
 
 
 
 
 
44
 
45
- def _get_github_token() -> str:
46
- """Get GitHub token from environment."""
 
47
  token = os.environ.get("GITHUB_TOKEN")
48
  if not token:
49
- raise GitHubAPIError(
50
- "GITHUB_TOKEN environment variable is required. "
51
- "Set it with: export GITHUB_TOKEN=your_token_here"
52
- )
53
- return token
54
-
55
 
56
- def _build_github_query(
57
- query: str, repo_glob: Optional[str], path_glob: Optional[str], regex: bool
58
- ) -> str:
59
- """Build GitHub search query string from parameters."""
60
- parts = []
61
 
62
  if regex:
63
- parts.append(f"/{query}/")
64
  else:
65
- if " " in query:
66
- parts.append(f'"{query}"')
67
- else:
68
- parts.append(query)
69
 
 
70
  if repo_glob:
71
  if "/" in repo_glob:
72
- parts.append(f"repo:{repo_glob}")
73
  else:
74
- parts.append(f"user:{repo_glob}")
75
 
 
76
  if path_glob:
77
  if "*" not in path_glob and "?" not in path_glob:
78
- parts.append(f"path:{path_glob}")
79
  elif path_glob.startswith("*."):
80
  ext = path_glob[2:]
81
- parts.append(f"extension:{ext}")
82
  elif "/" not in path_glob and "*" in path_glob:
83
- parts.append(f"filename:{path_glob}")
84
  else:
85
- if "." in path_glob:
86
- ext_match = re.search(r"\*\.(\w+)", path_glob)
87
- if ext_match:
88
- parts.append(f"extension:{ext_match.group(1)}")
89
 
90
- return " ".join(parts)
91
 
92
-
93
- def _fetch_code_search_results(
94
- query: str, token: str, max_results: int
95
- ) -> List[Dict[str, Any]]:
96
- """Fetch code search results from GitHub API."""
97
  headers = {
98
  "Accept": "application/vnd.github.text-match+json",
99
  "X-GitHub-Api-Version": "2022-11-28",
100
  "Authorization": f"Bearer {token}",
101
  }
102
 
103
- all_items = []
104
  page = 1
105
  per_page = min(100, max_results)
106
 
107
- while len(all_items) < max_results:
108
- params = {
109
- "q": query,
110
- "page": page,
111
- "per_page": per_page,
112
- }
 
113
 
114
- url = "https://api.github.com/search/code"
 
 
 
 
 
115
 
116
- try:
117
- response = requests.get(url, headers=headers, params=params, timeout=30)
 
 
 
 
 
 
118
 
119
  if response.status_code != 200:
120
- break
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  data = response.json()
123
  items = data.get("items", [])
@@ -125,245 +140,145 @@ def _fetch_code_search_results(
125
  if not items:
126
  break
127
 
128
- all_items.extend(items)
129
-
130
- if len(all_items) >= data.get("total_count", 0):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  break
132
 
133
  page += 1
134
 
135
- except requests.exceptions.RequestException:
136
- break
137
-
138
- return all_items[:max_results]
139
-
140
-
141
- def _glob_match(text: str, pattern: str) -> bool:
142
- """Check if text matches glob pattern, supporting ** for multi-level paths."""
143
- if "**" in pattern:
144
- regex_pattern = pattern.replace("**", "<<<DOUBLESTAR>>>")
145
- regex_pattern = fnmatch.translate(regex_pattern)
146
- regex_pattern = regex_pattern.replace("<<<DOUBLESTAR>>>", ".*")
147
- return re.match(regex_pattern, text) is not None
148
- else:
149
- return fnmatch.fnmatch(text, pattern)
150
-
151
-
152
- def _estimate_line_numbers(fragment: str) -> Tuple[int, int]:
153
- """Estimate line numbers from a code fragment."""
154
- lines = fragment.split("\n")
155
- line_count = len([line for line in lines if line.strip()])
156
- return 1, line_count
157
-
158
-
159
- def _parse_results_to_matches(
160
- raw_results: List[Dict[str, Any]],
161
- repo_glob: Optional[str],
162
- path_glob: Optional[str],
163
- ) -> List[CodeMatch]:
164
- """Parse raw GitHub API results into CodeMatch objects."""
165
- matches = []
166
-
167
- for item in raw_results:
168
- repo_name = item.get("repository", {}).get("full_name", "unknown/unknown")
169
- file_path = item.get("path", "")
170
- sha = item.get("sha", "unknown")
171
-
172
- if repo_glob and not _glob_match(repo_name, repo_glob):
173
- continue
174
-
175
- if path_glob and not _glob_match(file_path, path_glob):
176
- continue
177
-
178
- text_matches = item.get("text_matches", [])
179
-
180
- if text_matches:
181
- for text_match in text_matches:
182
- fragment = text_match.get("fragment", "")
183
- line_start, line_end = _estimate_line_numbers(fragment)
184
-
185
- match = CodeMatch(
186
- repo=repo_name,
187
- path=file_path,
188
- ref=sha,
189
- line_start=line_start,
190
- line_end=line_end,
191
- snippet=fragment.strip(),
192
- )
193
- matches.append(match)
194
- else:
195
- match = CodeMatch(
196
- repo=repo_name,
197
- path=file_path,
198
- ref=sha,
199
- line_start=1,
200
- line_end=1,
201
- snippet="<match found, but snippet not available>",
202
- )
203
- matches.append(match)
204
-
205
- return matches
206
-
207
-
208
- def search_code(
209
- query: str,
210
- repo_glob: Optional[str] = None,
211
- path_glob: Optional[str] = None,
212
- regex: bool = False,
213
- max_results: int = 100,
214
- ) -> List[CodeMatch]:
215
- """
216
- Search for code across GitHub with glob filtering and line-level results.
217
-
218
- Returns: repo, path, ref, line_start, line_end, snippet
219
-
220
- Args:
221
- query: Search term or pattern to find in code
222
- repo_glob: Glob pattern to filter repositories (e.g., "github/*", "facebook/react")
223
- path_glob: Glob pattern to filter file paths (e.g., "*.py", "src/**/*.js")
224
- regex: If True, treat query as a regular expression
225
- max_results: Maximum number of results to return (default: 100)
226
-
227
- Returns:
228
- List of CodeMatch objects with repo, path, ref, line numbers, and snippet
229
- """
230
- github_query = _build_github_query(query, repo_glob, path_glob, regex)
231
- token = _get_github_token()
232
-
233
- raw_results = _fetch_code_search_results(github_query, token, max_results)
234
- matches = _parse_results_to_matches(raw_results, repo_glob, path_glob)
235
-
236
- return matches
237
-
238
-
239
- async def _async_call(func, *args, **kwargs):
240
- """Wrap synchronous calls for async context."""
241
- return await asyncio.to_thread(func, *args, **kwargs)
242
-
243
-
244
- def _format_code_matches(matches: List[CodeMatch]) -> str:
245
- """Format code matches."""
246
- if not matches:
247
- return "No matches found."
248
-
249
- lines = []
250
- for i, match in enumerate(matches, 1):
251
- lines.append(f"**{i}. {match.repo}/{match.path}:{match.line_start}**")
252
- lines.append("```")
253
- # Show first 5 lines of snippet
254
- snippet_lines = match.snippet.split("\n")[:5]
255
- lines.extend(snippet_lines)
256
- if len(match.snippet.split("\n")) > 5:
257
- lines.append("...")
258
- lines.append("```")
259
- lines.append("")
260
-
261
- return "\n".join(lines)
262
-
263
-
264
- class SearchCodeTool:
265
- """Tool for searching code across GitHub."""
266
-
267
- async def execute(self, params: Dict[str, Any]) -> ToolResult:
268
- """Execute search_code operation."""
269
- query = params.get("query")
270
- if not query:
271
- return {
272
- "formatted": "Error: 'query' parameter is required",
273
- "totalResults": 0,
274
- "resultsShared": 0,
275
- "isError": True,
276
- }
277
 
278
- repo_glob = params.get("repo_glob")
279
- path_glob = params.get("path_glob")
280
- regex = params.get("regex", False)
281
- max_results = params.get("max_results", 100)
282
-
283
- try:
284
- matches = await _async_call(
285
- search_code,
286
- query=query,
287
- repo_glob=repo_glob,
288
- path_glob=path_glob,
289
- regex=regex,
290
- max_results=max_results,
291
- )
292
 
293
- if not matches:
294
- return {
295
- "formatted": "No matches found",
296
- "totalResults": 0,
297
- "resultsShared": 0,
298
- }
299
 
300
- formatted = _format_code_matches(matches)
301
- response = f"**Found {len(matches)} code matches:**\n\n{formatted}"
302
-
303
- # Add note about viewing full files
304
- if matches:
305
- response += "\n**To view full file, use:**\n"
306
- top_match = matches[0]
307
- response += (
308
- f"read_file(repo='{top_match.repo}', path='{top_match.path}')"
309
- )
310
-
311
- return {
312
- "formatted": response,
313
- "totalResults": len(matches),
314
- "resultsShared": min(len(matches), 10),
315
- }
316
 
317
- except GitHubAPIError as e:
318
- return {
319
- "formatted": f"GitHub API Error: {str(e)}",
320
- "totalResults": 0,
321
- "resultsShared": 0,
322
- "isError": True,
323
- }
324
- except Exception as e:
325
- return {
326
- "formatted": f"Error: {str(e)}",
327
- "totalResults": 0,
328
- "resultsShared": 0,
329
- "isError": True,
330
- }
 
 
 
 
 
 
 
 
 
331
 
332
 
333
  # Tool specification
334
- SEARCH_CODE_TOOL_SPEC = {
335
  "name": "search_code",
336
  "description": (
337
- "Search code across GitHub with glob filtering and line-level results.\n\n"
338
- "Returns: repo, path, ref, line_start, line_end, snippet\n\n"
339
- "Examples:\n"
340
- "- Search Python functions: {'query': 'def train', 'path_glob': '*.py', 'repo_glob': 'huggingface/*'}\n"
341
- "- Search TODO comments: {'query': 'TODO', 'repo_glob': 'github/*', 'max_results': 10}\n"
342
- "- Regex search: {'query': r'func Test\\w+', 'path_glob': '*.go', 'regex': True}\n"
343
- "- Search in specific repo: {'query': 'HfApi', 'repo_glob': 'huggingface/huggingface_hub', 'path_glob': '*.py'}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
344
  ),
345
  "parameters": {
346
  "type": "object",
347
  "properties": {
348
  "query": {
349
  "type": "string",
350
- "description": "Search term or pattern to find in code",
351
  },
352
  "repo_glob": {
353
  "type": "string",
354
- "description": "Glob pattern to filter repositories (e.g., 'github/*', 'facebook/react')",
355
  },
356
  "path_glob": {
357
  "type": "string",
358
- "description": "Glob pattern to filter file paths (e.g., '*.py', 'src/**/*.js', 'test_*.py')",
359
  },
360
  "regex": {
361
  "type": "boolean",
362
- "description": "Treat query as regular expression (default: false)",
363
  },
364
  "max_results": {
365
  "type": "integer",
366
- "description": "Maximum number of results to return (default: 100)",
367
  },
368
  },
369
  "required": ["query"],
@@ -371,11 +286,16 @@ SEARCH_CODE_TOOL_SPEC = {
371
  }
372
 
373
 
374
- async def search_code_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
375
- """Handler for agent tool router."""
376
  try:
377
- tool = SearchCodeTool()
378
- result = await tool.execute(arguments)
 
 
 
 
 
379
  return result["formatted"], not result.get("isError", False)
380
  except Exception as e:
381
- return f"Error executing search_code: {str(e)}", False
 
1
  """
2
+ GitHub Code Search Tool - Search code across GitHub with advanced filtering
3
 
4
+ Find code patterns using regex and glob filters for repositories and file paths.
5
  """
6
 
 
7
  import fnmatch
8
  import os
9
  import re
10
+ from typing import Any, Dict, Optional
 
11
 
12
+ import requests
 
 
 
 
 
13
 
14
  from agent.tools.types import ToolResult
15
 
16
 
17
+ def _glob_match(text: str, pattern: str) -> bool:
18
+ """Check if text matches glob pattern, supporting ** for multi-level paths"""
19
+ if "**" in pattern:
20
+ regex_pattern = pattern.replace("**", "<<<DOUBLESTAR>>>")
21
+ regex_pattern = fnmatch.translate(regex_pattern)
22
+ regex_pattern = regex_pattern.replace("<<<DOUBLESTAR>>>", ".*")
23
+ return re.match(regex_pattern, text) is not None
24
+ return fnmatch.fnmatch(text, pattern)
 
 
 
 
 
 
25
 
 
 
26
 
27
+ def search_code(
28
+ query: str,
29
+ repo_glob: Optional[str] = None,
30
+ path_glob: Optional[str] = None,
31
+ regex: bool = False,
32
+ max_results: int = 20,
33
+ ) -> ToolResult:
34
+ """
35
+ Search for code across GitHub with glob filtering.
36
 
37
+ Args:
38
+ query: Search term or pattern to find in code
39
+ repo_glob: Glob pattern to filter repositories (e.g., "github/*", "*/react")
40
+ path_glob: Glob pattern to filter file paths (e.g., "*.py", "src/**/*.js")
41
+ regex: If True, treat query as regular expression
42
+ max_results: Maximum number of results to return (default 20)
43
 
44
+ Returns:
45
+ ToolResult with code matches and snippets
46
+ """
47
  token = os.environ.get("GITHUB_TOKEN")
48
  if not token:
49
+ return {
50
+ "formatted": "Error: GITHUB_TOKEN environment variable is required",
51
+ "totalResults": 0,
52
+ "resultsShared": 0,
53
+ "isError": True,
54
+ }
55
 
56
+ # Build GitHub query
57
+ query_parts = []
 
 
 
58
 
59
  if regex:
60
+ query_parts.append(f"/{query}/")
61
  else:
62
+ query_parts.append(f'"{query}"' if " " in query else query)
 
 
 
63
 
64
+ # Add repo filter
65
  if repo_glob:
66
  if "/" in repo_glob:
67
+ query_parts.append(f"repo:{repo_glob}")
68
  else:
69
+ query_parts.append(f"user:{repo_glob}")
70
 
71
+ # Add path filter
72
  if path_glob:
73
  if "*" not in path_glob and "?" not in path_glob:
74
+ query_parts.append(f"path:{path_glob}")
75
  elif path_glob.startswith("*."):
76
  ext = path_glob[2:]
77
+ query_parts.append(f"extension:{ext}")
78
  elif "/" not in path_glob and "*" in path_glob:
79
+ query_parts.append(f"filename:{path_glob}")
80
  else:
81
+ # Complex pattern, extract extension if possible
82
+ ext_match = re.search(r"\*\.(\w+)", path_glob)
83
+ if ext_match:
84
+ query_parts.append(f"extension:{ext_match.group(1)}")
85
 
86
+ github_query = " ".join(query_parts)
87
 
 
 
 
 
 
88
  headers = {
89
  "Accept": "application/vnd.github.text-match+json",
90
  "X-GitHub-Api-Version": "2022-11-28",
91
  "Authorization": f"Bearer {token}",
92
  }
93
 
94
+ all_matches = []
95
  page = 1
96
  per_page = min(100, max_results)
97
 
98
+ try:
99
+ while len(all_matches) < max_results:
100
+ params = {
101
+ "q": github_query,
102
+ "page": page,
103
+ "per_page": per_page,
104
+ }
105
 
106
+ response = requests.get(
107
+ "https://api.github.com/search/code",
108
+ headers=headers,
109
+ params=params,
110
+ timeout=30,
111
+ )
112
 
113
+ if response.status_code == 403:
114
+ error_data = response.json()
115
+ return {
116
+ "formatted": f"GitHub API rate limit or permission error: {error_data.get('message', 'Unknown error')}",
117
+ "totalResults": 0,
118
+ "resultsShared": 0,
119
+ "isError": True,
120
+ }
121
 
122
  if response.status_code != 200:
123
+ error_msg = f"GitHub API error (status {response.status_code})"
124
+ try:
125
+ error_data = response.json()
126
+ if "message" in error_data:
127
+ error_msg += f": {error_data['message']}"
128
+ except Exception:
129
+ pass
130
+ return {
131
+ "formatted": error_msg,
132
+ "totalResults": 0,
133
+ "resultsShared": 0,
134
+ "isError": True,
135
+ }
136
 
137
  data = response.json()
138
  items = data.get("items", [])
 
140
  if not items:
141
  break
142
 
143
+ for item in items:
144
+ repo_name = item.get("repository", {}).get("full_name", "unknown")
145
+ file_path = item.get("path", "")
146
+ sha = item.get("sha", "")
147
+
148
+ # Apply client-side glob filtering
149
+ if repo_glob and not _glob_match(repo_name, repo_glob):
150
+ continue
151
+ if path_glob and not _glob_match(file_path, path_glob):
152
+ continue
153
+
154
+ # Extract text matches
155
+ text_matches = item.get("text_matches", [])
156
+ if text_matches:
157
+ for text_match in text_matches:
158
+ fragment = text_match.get("fragment", "")
159
+ lines = fragment.split("\n")
160
+ line_count = len([line for line in lines if line.strip()])
161
+
162
+ all_matches.append(
163
+ {
164
+ "repo": repo_name,
165
+ "path": file_path,
166
+ "ref": sha,
167
+ "line_start": 1,
168
+ "line_end": line_count,
169
+ "snippet": fragment.strip(),
170
+ "url": item.get("html_url", ""),
171
+ }
172
+ )
173
+ else:
174
+ all_matches.append(
175
+ {
176
+ "repo": repo_name,
177
+ "path": file_path,
178
+ "ref": sha,
179
+ "line_start": 1,
180
+ "line_end": 1,
181
+ "snippet": "(snippet not available)",
182
+ "url": item.get("html_url", ""),
183
+ }
184
+ )
185
+
186
+ if len(all_matches) >= data.get("total_count", 0):
187
  break
188
 
189
  page += 1
190
 
191
+ except requests.exceptions.RequestException as e:
192
+ return {
193
+ "formatted": f"Failed to connect to GitHub API: {str(e)}",
194
+ "totalResults": 0,
195
+ "resultsShared": 0,
196
+ "isError": True,
197
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
+ results = all_matches[:max_results]
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
+ if not results:
202
+ return {
203
+ "formatted": f"No code matches found for query: {query}",
204
+ "totalResults": 0,
205
+ "resultsShared": 0,
206
+ }
207
 
208
+ # Format output
209
+ lines_output = [f"**Found {len(results)} code matches:**\n"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
+ for i, match in enumerate(results, 1):
212
+ lines_output.append(f"{i}. **{match['repo']}:{match['path']}**")
213
+ lines_output.append(
214
+ f" Lines: {match['line_start']}-{match['line_end']} | Ref: {match['ref'][:7]}"
215
+ )
216
+ lines_output.append(f" URL: {match['url']}")
217
+
218
+ # Show snippet (first 5 lines)
219
+ snippet_lines = match["snippet"].split("\n")[:5]
220
+ if snippet_lines:
221
+ lines_output.append(" ```")
222
+ for line in snippet_lines:
223
+ lines_output.append(f" {line}")
224
+ if len(match["snippet"].split("\n")) > 5:
225
+ lines_output.append(" ...")
226
+ lines_output.append(" ```")
227
+ lines_output.append("")
228
+
229
+ return {
230
+ "formatted": "\n".join(lines_output),
231
+ "totalResults": len(results),
232
+ "resultsShared": len(results),
233
+ }
234
 
235
 
236
  # Tool specification
237
+ GITHUB_SEARCH_CODE_TOOL_SPEC = {
238
  "name": "search_code",
239
  "description": (
240
+ "Search for code patterns across GitHub with advanced glob filtering.\n\n"
241
+ "Features:\n"
242
+ "- Text or regex search\n"
243
+ "- Repository glob patterns (e.g., 'github/*', '*/react')\n"
244
+ "- File path glob patterns (e.g., '*.py', 'src/**/*.js')\n"
245
+ "- Returns code snippets with line numbers\n"
246
+ "- Direct URLs to matches\n\n"
247
+ "## Examples:\n\n"
248
+ "**Search for Python function definitions:**\n"
249
+ "{'query': 'def search', 'path_glob': '*.py', 'max_results': 10}\n\n"
250
+ "**Search for TODO comments in specific org:**\n"
251
+ "{'query': 'TODO', 'repo_glob': 'github/*', 'max_results': 5}\n\n"
252
+ "**Regex search for test functions:**\n"
253
+ "{'query': r'func Test\\w+', 'path_glob': '*.go', 'regex': True}\n\n"
254
+ "**Search in specific repo with path filter:**\n"
255
+ "{'query': 'SearchCode', 'repo_glob': 'github/github-mcp-server', 'path_glob': '*.go'}\n\n"
256
+ "**Find imports in TypeScript files:**\n"
257
+ "{'query': 'import', 'path_glob': 'src/**/*.ts', 'repo_glob': 'facebook/*'}\n\n"
258
+ "Perfect for finding code patterns, learning from examples, or exploring implementations."
259
  ),
260
  "parameters": {
261
  "type": "object",
262
  "properties": {
263
  "query": {
264
  "type": "string",
265
+ "description": "Search term or pattern to find in code. Required.",
266
  },
267
  "repo_glob": {
268
  "type": "string",
269
+ "description": "Glob pattern to filter repositories (e.g., 'github/*', '*/react'). Optional.",
270
  },
271
  "path_glob": {
272
  "type": "string",
273
+ "description": "Glob pattern to filter file paths (e.g., '*.py', 'src/**/*.js'). Optional.",
274
  },
275
  "regex": {
276
  "type": "boolean",
277
+ "description": "If true, treat query as regular expression. Default: false.",
278
  },
279
  "max_results": {
280
  "type": "integer",
281
+ "description": "Maximum number of results to return. Default: 20.",
282
  },
283
  },
284
  "required": ["query"],
 
286
  }
287
 
288
 
289
+ async def github_search_code_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
290
+ """Handler for agent tool router"""
291
  try:
292
+ result = search_code(
293
+ query=arguments["query"],
294
+ repo_glob=arguments.get("repo_glob"),
295
+ path_glob=arguments.get("path_glob"),
296
+ regex=arguments.get("regex", False),
297
+ max_results=arguments.get("max_results", 20),
298
+ )
299
  return result["formatted"], not result.get("isError", False)
300
  except Exception as e:
301
+ return f"Error searching code: {str(e)}", False