AdithyaVardan's picture
Fix lazy token loading in tools, add github_list_commits tool, load_dotenv in main
ed33951
"""
GitHub tools β€” discovery-first, markdown-only reads, paginated, rate-limit aware.
"""
import os
import httpx
GH = "https://api.github.com"
def _headers() -> dict:
return {
"Authorization": f"Bearer {os.environ.get('GITHUB_TOKEN', '')}",
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
}
def _target(repo: str) -> str:
return repo or os.environ.get("GITHUB_REPO", "")
def _rate_warn(resp: httpx.Response) -> str | None:
if resp.headers.get("x-ratelimit-remaining", "1") == "0":
reset = resp.headers.get("x-ratelimit-reset", "soon")
return f"⚠️ GitHub rate limit reached. Resets at epoch {reset}."
return None
# ── Tools ──────────────────────────────────────────────────────────────────────
async def github_list_repos(org_or_user: str = "", repo_type: str = "all",
page: int = 1) -> str:
params = {"type": repo_type, "per_page": 50, "page": page}
async with httpx.AsyncClient(timeout=15) as h:
if org_or_user:
r = await h.get(f"{GH}/orgs/{org_or_user}/repos",
headers=_headers(), params=params)
if r.status_code == 404:
r = await h.get(f"{GH}/users/{org_or_user}/repos",
headers=_headers(), params=params)
else:
r = await h.get(f"{GH}/user/repos", headers=_headers(), params=params)
if w := _rate_warn(r): return w
if r.status_code != 200: return f"GitHub error {r.status_code}: {r.text[:300]}"
repos = r.json()
if not repos: return "No repositories found."
lines = [
f"β€’ {repo['full_name']} {'πŸ”’' if repo['private'] else '🌐'} "
f"⭐{repo['stargazers_count']} {(repo.get('description') or '')[:80]}"
for repo in repos
]
if len(repos) == 50:
lines.append(f"[Page {page} β€” call with page={page+1} for more]")
return "\n".join(lines)
async def github_repo_summary(repo: str = "") -> str:
t = _target(repo)
if not t: return "Provide repo as 'owner/repo' or set GITHUB_REPO."
async with httpx.AsyncClient(timeout=15) as h:
r = await h.get(f"{GH}/repos/{t}", headers=_headers())
if w := _rate_warn(r): return w
if r.status_code == 404: return f"Repo not found: {t}"
if r.status_code != 200: return f"GitHub error {r.status_code}"
d = r.json()
topics = ", ".join(d.get("topics", [])) or "none"
return (
f"Repo: {d['full_name']}\n"
f"Description: {d.get('description') or 'n/a'}\n"
f"Language: {d.get('language') or 'n/a'}\n"
f"Topics: {topics}\n"
f"Default branch: {d.get('default_branch')}\n"
f"Stars: {d['stargazers_count']} Open issues: {d['open_issues_count']}\n"
f"URL: {d['html_url']}"
)
async def github_list_files(repo: str = "", path: str = "", branch: str = "") -> str:
t = _target(repo)
if not t: return "Provide repo as 'owner/repo' or set GITHUB_REPO."
params = {"ref": branch} if branch else {}
async with httpx.AsyncClient(timeout=15) as h:
r = await h.get(f"{GH}/repos/{t}/contents/{path}",
headers=_headers(), params=params)
if w := _rate_warn(r): return w
if r.status_code == 404: return f"Path '{path}' not found in {t}."
if r.status_code != 200: return f"GitHub error {r.status_code}"
items = r.json()
if isinstance(items, dict):
return f"'{path}' is a file. Use github_read_file to read it."
lines = []
for item in sorted(items, key=lambda x: (x["type"] != "dir", x["name"])):
icon = "πŸ“" if item["type"] == "dir" else "πŸ“„"
size = f" {item.get('size',0):,}b" if item["type"] == "file" else ""
lines.append(f"{icon} {item['path']}{size}")
return "\n".join(lines) or "Empty directory."
async def github_read_file(path: str, repo: str = "", branch: str = "") -> str:
t = _target(repo)
if not t: return "Provide repo as 'owner/repo' or set GITHUB_REPO."
lower = path.lower()
allowed_exts = (".md", ".mdx", ".txt", ".rst", ".adoc")
allowed_names = ("readme", "changelog", "contributing", "license", "notice")
allowed_dirs = ("docs/", "doc/", "wiki/", "adr/", "architecture/", "rfcs/")
is_doc = (
any(lower.endswith(e) for e in allowed_exts)
or any(os.path.basename(lower) == n for n in allowed_names)
or any(lower.startswith(d) for d in allowed_dirs)
)
if not is_doc:
return (
f"⚠️ '{path}' looks like a source/config file. "
"This tool is for documentation only (.md, README, docs/ etc). "
"Re-call with the same args only if the user explicitly asked for this file."
)
params = {"ref": branch} if branch else {}
async with httpx.AsyncClient(timeout=15) as h:
r = await h.get(
f"{GH}/repos/{t}/contents/{path}",
headers={**_headers(), "Accept": "application/vnd.github.raw+json"},
params=params,
)
if w := _rate_warn(r): return w
if r.status_code == 404: return f"File not found: {path} in {t}"
if r.status_code != 200: return f"GitHub error {r.status_code}"
content = r.text
total = len(content)
if total > 8000:
content = content[:8000] + f"\n\n…[truncated β€” {total:,} chars total]"
return f"[{t} Β· {path}]\n\n{content}"
async def github_search_code(query: str, repo: str = "", language: str = "",
path_filter: str = "") -> str:
t = _target(repo)
q = query
if t: q += f" repo:{t}"
if language: q += f" language:{language}"
if path_filter: q += f" path:{path_filter}"
async with httpx.AsyncClient(timeout=15) as h:
r = await h.get(f"{GH}/search/code", headers=_headers(),
params={"q": q, "per_page": 10})
if w := _rate_warn(r): return w
if r.status_code == 422: return "Search query too short or invalid."
if r.status_code == 403: return "GitHub search rate limit hit. Wait 60s then retry."
if r.status_code != 200: return f"GitHub error {r.status_code}: {r.text[:200]}"
items = r.json().get("items", [])
if not items: return "No matching files found."
return "\n".join(
f"β€’ {i['repository']['full_name']}/{i['path']}\n {i['html_url']}"
for i in items
)
async def github_list_markdown_files(repo: str = "", folder: str = "") -> str:
t = _target(repo)
if not t: return "Provide repo as 'owner/repo' or set GITHUB_REPO."
q = f"extension:md repo:{t}"
if folder: q += f" path:{folder}"
async with httpx.AsyncClient(timeout=15) as h:
r = await h.get(f"{GH}/search/code", headers=_headers(),
params={"q": q, "per_page": 30})
if w := _rate_warn(r): return w
if r.status_code != 200: return f"GitHub error {r.status_code}"
items = r.json().get("items", [])
if not items: return "No markdown files found."
return f"[{t}] {len(items)} markdown file(s):\n" + "\n".join(f"β€’ {i['path']}" for i in items)
async def github_list_commits(repo: str = "", branch: str = "", page: int = 1) -> str:
t = _target(repo)
if not t: return "Provide repo as 'owner/repo' or set GITHUB_REPO."
params = {"per_page": 20, "page": page}
if branch:
params["sha"] = branch
async with httpx.AsyncClient(timeout=15) as h:
r = await h.get(f"{GH}/repos/{t}/commits", headers=_headers(), params=params)
if w := _rate_warn(r): return w
if r.status_code == 404: return f"Repo not found: {t}"
if r.status_code != 200: return f"GitHub error {r.status_code}: {r.text[:200]}"
commits = r.json()
if not commits: return "No commits found."
lines = []
for c in commits:
sha = c["sha"][:7]
msg = c["commit"]["message"].split("\n")[0][:80]
author = c["commit"]["author"]["name"]
date = c["commit"]["author"]["date"][:10]
lines.append(f"β€’ {sha} {date} {author}: {msg}")
if len(commits) == 20:
lines.append(f"[Page {page} β€” call with page={page+1} for more]")
return "\n".join(lines)
# ── Registry ───────────────────────────────────────────────────────────────────
GITHUB_TOOL_FNS = {
"github_list_repos": github_list_repos,
"github_repo_summary": github_repo_summary,
"github_list_files": github_list_files,
"github_read_file": github_read_file,
"github_search_code": github_search_code,
"github_list_markdown_files": github_list_markdown_files,
"github_list_commits": github_list_commits,
}
GITHUB_TOOLS = [
{"type": "function", "function": {
"name": "github_list_repos",
"description": (
"List GitHub repositories for a user or org. "
"Call this FIRST when you don't know which repos exist. "
"Supports pagination. Do NOT call if you already have the repo name."
),
"parameters": {"type": "object", "properties": {
"org_or_user": {"type": "string", "description": "GitHub username or org. Empty = authenticated user."},
"repo_type": {"type": "string", "enum": ["all","public","private","forks","sources"]},
"page": {"type": "integer", "description": "Page number, default 1"},
}},
}},
{"type": "function", "function": {
"name": "github_repo_summary",
"description": (
"Get lightweight repo metadata: description, language, topics, stars, open issues. "
"Call this INSTEAD of reading README when you just need a quick overview. "
"Much cheaper than github_read_file."
),
"parameters": {"type": "object", "properties": {
"repo": {"type": "string", "description": "owner/repo"},
}},
}},
{"type": "function", "function": {
"name": "github_list_files",
"description": (
"List files and directories in a repo path. "
"Use this to explore repo structure BEFORE reading any file. "
"Do NOT use to read file content β€” use github_read_file for that."
),
"parameters": {"type": "object", "properties": {
"repo": {"type": "string"}, "path": {"type": "string"},
"branch": {"type": "string"},
}},
}},
{"type": "function", "function": {
"name": "github_read_file",
"description": (
"Read a file from GitHub. ONLY for documentation files: "
".md .mdx .txt .rst, README, CHANGELOG, files in docs/ wiki/ adr/ architecture/. "
"Do NOT use for .py .ts .json .yaml .lock source files unless user explicitly asks."
),
"parameters": {"type": "object", "required": ["path"], "properties": {
"path": {"type": "string"}, "repo": {"type": "string"},
"branch": {"type": "string"},
}},
}},
{"type": "function", "function": {
"name": "github_search_code",
"description": (
"Search for text or code within GitHub repos. "
"Use to LOCATE relevant files before reading them. "
"Set language='markdown' and path_filter='docs/' to target documentation."
),
"parameters": {"type": "object", "required": ["query"], "properties": {
"query": {"type": "string"}, "repo": {"type": "string"},
"language": {"type": "string"}, "path_filter": {"type": "string"},
}},
}},
{"type": "function", "function": {
"name": "github_list_markdown_files",
"description": (
"List ALL markdown (.md) files in a repo, optionally scoped to a folder. "
"Use for documentation discovery before github_read_file. "
"Returns file paths only, not content."
),
"parameters": {"type": "object", "properties": {
"repo": {"type": "string"}, "folder": {"type": "string"},
}},
}},
{"type": "function", "function": {
"name": "github_list_commits",
"description": (
"List recent commits in a GitHub repo. "
"Use this when asked about recent changes, commit history, or who changed what. "
"Returns SHA, date, author, and commit message for each commit."
),
"parameters": {"type": "object", "properties": {
"repo": {"type": "string", "description": "owner/repo"},
"branch": {"type": "string", "description": "Branch name, default is the repo default branch"},
"page": {"type": "integer", "description": "Page number, default 1"},
}},
}},
]