Spaces:
Sleeping
Sleeping
| """ | |
| GitHub tools β discovery-first, markdown-only reads, paginated, rate-limit aware. | |
| """ | |
| import os | |
| import httpx | |
| GH = "https://api.github.com" | |
| def _headers() -> dict: | |
| return { | |
| "Authorization": f"Bearer {os.environ.get('GITHUB_TOKEN', '')}", | |
| "Accept": "application/vnd.github+json", | |
| "X-GitHub-Api-Version": "2022-11-28", | |
| } | |
| def _target(repo: str) -> str: | |
| return repo or os.environ.get("GITHUB_REPO", "") | |
| def _rate_warn(resp: httpx.Response) -> str | None: | |
| if resp.headers.get("x-ratelimit-remaining", "1") == "0": | |
| reset = resp.headers.get("x-ratelimit-reset", "soon") | |
| return f"β οΈ GitHub rate limit reached. Resets at epoch {reset}." | |
| return None | |
| # ββ Tools ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def github_list_repos(org_or_user: str = "", repo_type: str = "all", | |
| page: int = 1) -> str: | |
| params = {"type": repo_type, "per_page": 50, "page": page} | |
| async with httpx.AsyncClient(timeout=15) as h: | |
| if org_or_user: | |
| r = await h.get(f"{GH}/orgs/{org_or_user}/repos", | |
| headers=_headers(), params=params) | |
| if r.status_code == 404: | |
| r = await h.get(f"{GH}/users/{org_or_user}/repos", | |
| headers=_headers(), params=params) | |
| else: | |
| r = await h.get(f"{GH}/user/repos", headers=_headers(), params=params) | |
| if w := _rate_warn(r): return w | |
| if r.status_code != 200: return f"GitHub error {r.status_code}: {r.text[:300]}" | |
| repos = r.json() | |
| if not repos: return "No repositories found." | |
| lines = [ | |
| f"β’ {repo['full_name']} {'π' if repo['private'] else 'π'} " | |
| f"β{repo['stargazers_count']} {(repo.get('description') or '')[:80]}" | |
| for repo in repos | |
| ] | |
| if len(repos) == 50: | |
| lines.append(f"[Page {page} β call with page={page+1} for more]") | |
| return "\n".join(lines) | |
| async def github_repo_summary(repo: str = "") -> str: | |
| t = _target(repo) | |
| if not t: return "Provide repo as 'owner/repo' or set GITHUB_REPO." | |
| async with httpx.AsyncClient(timeout=15) as h: | |
| r = await h.get(f"{GH}/repos/{t}", headers=_headers()) | |
| if w := _rate_warn(r): return w | |
| if r.status_code == 404: return f"Repo not found: {t}" | |
| if r.status_code != 200: return f"GitHub error {r.status_code}" | |
| d = r.json() | |
| topics = ", ".join(d.get("topics", [])) or "none" | |
| return ( | |
| f"Repo: {d['full_name']}\n" | |
| f"Description: {d.get('description') or 'n/a'}\n" | |
| f"Language: {d.get('language') or 'n/a'}\n" | |
| f"Topics: {topics}\n" | |
| f"Default branch: {d.get('default_branch')}\n" | |
| f"Stars: {d['stargazers_count']} Open issues: {d['open_issues_count']}\n" | |
| f"URL: {d['html_url']}" | |
| ) | |
| async def github_list_files(repo: str = "", path: str = "", branch: str = "") -> str: | |
| t = _target(repo) | |
| if not t: return "Provide repo as 'owner/repo' or set GITHUB_REPO." | |
| params = {"ref": branch} if branch else {} | |
| async with httpx.AsyncClient(timeout=15) as h: | |
| r = await h.get(f"{GH}/repos/{t}/contents/{path}", | |
| headers=_headers(), params=params) | |
| if w := _rate_warn(r): return w | |
| if r.status_code == 404: return f"Path '{path}' not found in {t}." | |
| if r.status_code != 200: return f"GitHub error {r.status_code}" | |
| items = r.json() | |
| if isinstance(items, dict): | |
| return f"'{path}' is a file. Use github_read_file to read it." | |
| lines = [] | |
| for item in sorted(items, key=lambda x: (x["type"] != "dir", x["name"])): | |
| icon = "π" if item["type"] == "dir" else "π" | |
| size = f" {item.get('size',0):,}b" if item["type"] == "file" else "" | |
| lines.append(f"{icon} {item['path']}{size}") | |
| return "\n".join(lines) or "Empty directory." | |
| async def github_read_file(path: str, repo: str = "", branch: str = "") -> str: | |
| t = _target(repo) | |
| if not t: return "Provide repo as 'owner/repo' or set GITHUB_REPO." | |
| lower = path.lower() | |
| allowed_exts = (".md", ".mdx", ".txt", ".rst", ".adoc") | |
| allowed_names = ("readme", "changelog", "contributing", "license", "notice") | |
| allowed_dirs = ("docs/", "doc/", "wiki/", "adr/", "architecture/", "rfcs/") | |
| is_doc = ( | |
| any(lower.endswith(e) for e in allowed_exts) | |
| or any(os.path.basename(lower) == n for n in allowed_names) | |
| or any(lower.startswith(d) for d in allowed_dirs) | |
| ) | |
| if not is_doc: | |
| return ( | |
| f"β οΈ '{path}' looks like a source/config file. " | |
| "This tool is for documentation only (.md, README, docs/ etc). " | |
| "Re-call with the same args only if the user explicitly asked for this file." | |
| ) | |
| params = {"ref": branch} if branch else {} | |
| async with httpx.AsyncClient(timeout=15) as h: | |
| r = await h.get( | |
| f"{GH}/repos/{t}/contents/{path}", | |
| headers={**_headers(), "Accept": "application/vnd.github.raw+json"}, | |
| params=params, | |
| ) | |
| if w := _rate_warn(r): return w | |
| if r.status_code == 404: return f"File not found: {path} in {t}" | |
| if r.status_code != 200: return f"GitHub error {r.status_code}" | |
| content = r.text | |
| total = len(content) | |
| if total > 8000: | |
| content = content[:8000] + f"\n\nβ¦[truncated β {total:,} chars total]" | |
| return f"[{t} Β· {path}]\n\n{content}" | |
| async def github_search_code(query: str, repo: str = "", language: str = "", | |
| path_filter: str = "") -> str: | |
| t = _target(repo) | |
| q = query | |
| if t: q += f" repo:{t}" | |
| if language: q += f" language:{language}" | |
| if path_filter: q += f" path:{path_filter}" | |
| async with httpx.AsyncClient(timeout=15) as h: | |
| r = await h.get(f"{GH}/search/code", headers=_headers(), | |
| params={"q": q, "per_page": 10}) | |
| if w := _rate_warn(r): return w | |
| if r.status_code == 422: return "Search query too short or invalid." | |
| if r.status_code == 403: return "GitHub search rate limit hit. Wait 60s then retry." | |
| if r.status_code != 200: return f"GitHub error {r.status_code}: {r.text[:200]}" | |
| items = r.json().get("items", []) | |
| if not items: return "No matching files found." | |
| return "\n".join( | |
| f"β’ {i['repository']['full_name']}/{i['path']}\n {i['html_url']}" | |
| for i in items | |
| ) | |
| async def github_list_markdown_files(repo: str = "", folder: str = "") -> str: | |
| t = _target(repo) | |
| if not t: return "Provide repo as 'owner/repo' or set GITHUB_REPO." | |
| q = f"extension:md repo:{t}" | |
| if folder: q += f" path:{folder}" | |
| async with httpx.AsyncClient(timeout=15) as h: | |
| r = await h.get(f"{GH}/search/code", headers=_headers(), | |
| params={"q": q, "per_page": 30}) | |
| if w := _rate_warn(r): return w | |
| if r.status_code != 200: return f"GitHub error {r.status_code}" | |
| items = r.json().get("items", []) | |
| if not items: return "No markdown files found." | |
| return f"[{t}] {len(items)} markdown file(s):\n" + "\n".join(f"β’ {i['path']}" for i in items) | |
| async def github_list_commits(repo: str = "", branch: str = "", page: int = 1) -> str: | |
| t = _target(repo) | |
| if not t: return "Provide repo as 'owner/repo' or set GITHUB_REPO." | |
| params = {"per_page": 20, "page": page} | |
| if branch: | |
| params["sha"] = branch | |
| async with httpx.AsyncClient(timeout=15) as h: | |
| r = await h.get(f"{GH}/repos/{t}/commits", headers=_headers(), params=params) | |
| if w := _rate_warn(r): return w | |
| if r.status_code == 404: return f"Repo not found: {t}" | |
| if r.status_code != 200: return f"GitHub error {r.status_code}: {r.text[:200]}" | |
| commits = r.json() | |
| if not commits: return "No commits found." | |
| lines = [] | |
| for c in commits: | |
| sha = c["sha"][:7] | |
| msg = c["commit"]["message"].split("\n")[0][:80] | |
| author = c["commit"]["author"]["name"] | |
| date = c["commit"]["author"]["date"][:10] | |
| lines.append(f"β’ {sha} {date} {author}: {msg}") | |
| if len(commits) == 20: | |
| lines.append(f"[Page {page} β call with page={page+1} for more]") | |
| return "\n".join(lines) | |
| # ββ Registry βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| GITHUB_TOOL_FNS = { | |
| "github_list_repos": github_list_repos, | |
| "github_repo_summary": github_repo_summary, | |
| "github_list_files": github_list_files, | |
| "github_read_file": github_read_file, | |
| "github_search_code": github_search_code, | |
| "github_list_markdown_files": github_list_markdown_files, | |
| "github_list_commits": github_list_commits, | |
| } | |
| GITHUB_TOOLS = [ | |
| {"type": "function", "function": { | |
| "name": "github_list_repos", | |
| "description": ( | |
| "List GitHub repositories for a user or org. " | |
| "Call this FIRST when you don't know which repos exist. " | |
| "Supports pagination. Do NOT call if you already have the repo name." | |
| ), | |
| "parameters": {"type": "object", "properties": { | |
| "org_or_user": {"type": "string", "description": "GitHub username or org. Empty = authenticated user."}, | |
| "repo_type": {"type": "string", "enum": ["all","public","private","forks","sources"]}, | |
| "page": {"type": "integer", "description": "Page number, default 1"}, | |
| }}, | |
| }}, | |
| {"type": "function", "function": { | |
| "name": "github_repo_summary", | |
| "description": ( | |
| "Get lightweight repo metadata: description, language, topics, stars, open issues. " | |
| "Call this INSTEAD of reading README when you just need a quick overview. " | |
| "Much cheaper than github_read_file." | |
| ), | |
| "parameters": {"type": "object", "properties": { | |
| "repo": {"type": "string", "description": "owner/repo"}, | |
| }}, | |
| }}, | |
| {"type": "function", "function": { | |
| "name": "github_list_files", | |
| "description": ( | |
| "List files and directories in a repo path. " | |
| "Use this to explore repo structure BEFORE reading any file. " | |
| "Do NOT use to read file content β use github_read_file for that." | |
| ), | |
| "parameters": {"type": "object", "properties": { | |
| "repo": {"type": "string"}, "path": {"type": "string"}, | |
| "branch": {"type": "string"}, | |
| }}, | |
| }}, | |
| {"type": "function", "function": { | |
| "name": "github_read_file", | |
| "description": ( | |
| "Read a file from GitHub. ONLY for documentation files: " | |
| ".md .mdx .txt .rst, README, CHANGELOG, files in docs/ wiki/ adr/ architecture/. " | |
| "Do NOT use for .py .ts .json .yaml .lock source files unless user explicitly asks." | |
| ), | |
| "parameters": {"type": "object", "required": ["path"], "properties": { | |
| "path": {"type": "string"}, "repo": {"type": "string"}, | |
| "branch": {"type": "string"}, | |
| }}, | |
| }}, | |
| {"type": "function", "function": { | |
| "name": "github_search_code", | |
| "description": ( | |
| "Search for text or code within GitHub repos. " | |
| "Use to LOCATE relevant files before reading them. " | |
| "Set language='markdown' and path_filter='docs/' to target documentation." | |
| ), | |
| "parameters": {"type": "object", "required": ["query"], "properties": { | |
| "query": {"type": "string"}, "repo": {"type": "string"}, | |
| "language": {"type": "string"}, "path_filter": {"type": "string"}, | |
| }}, | |
| }}, | |
| {"type": "function", "function": { | |
| "name": "github_list_markdown_files", | |
| "description": ( | |
| "List ALL markdown (.md) files in a repo, optionally scoped to a folder. " | |
| "Use for documentation discovery before github_read_file. " | |
| "Returns file paths only, not content." | |
| ), | |
| "parameters": {"type": "object", "properties": { | |
| "repo": {"type": "string"}, "folder": {"type": "string"}, | |
| }}, | |
| }}, | |
| {"type": "function", "function": { | |
| "name": "github_list_commits", | |
| "description": ( | |
| "List recent commits in a GitHub repo. " | |
| "Use this when asked about recent changes, commit history, or who changed what. " | |
| "Returns SHA, date, author, and commit message for each commit." | |
| ), | |
| "parameters": {"type": "object", "properties": { | |
| "repo": {"type": "string", "description": "owner/repo"}, | |
| "branch": {"type": "string", "description": "Branch name, default is the repo default branch"}, | |
| "page": {"type": "integer", "description": "Page number, default 1"}, | |
| }}, | |
| }}, | |
| ] |