| """
|
| HF Repo Files Tool - File operations on Hugging Face repositories
|
|
|
| Operations: list, read, upload, delete
|
| """
|
|
|
| import asyncio
|
| from typing import Any, Dict, Literal, Optional
|
|
|
| from huggingface_hub import HfApi, hf_hub_download
|
| from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError
|
|
|
| from agent.tools.types import ToolResult
|
|
|
| OperationType = Literal["list", "read", "upload", "delete"]
|
|
|
|
|
| async def _async_call(func, *args, **kwargs):
|
| """Wrap synchronous HfApi calls for async context."""
|
| return await asyncio.to_thread(func, *args, **kwargs)
|
|
|
|
|
| def _build_repo_url(repo_id: str, repo_type: str = "model") -> str:
|
| """Build the Hub URL for a repository."""
|
| if repo_type == "model":
|
| return f"https://huggingface.co/{repo_id}"
|
| return f"https://huggingface.co/{repo_type}s/{repo_id}"
|
|
|
|
|
| def _format_size(size_bytes: int) -> str:
|
| """Format file size in human-readable form."""
|
| for unit in ["B", "KB", "MB", "GB", "TB"]:
|
| if size_bytes < 1024:
|
| return f"{size_bytes:.1f}{unit}"
|
| size_bytes /= 1024
|
| return f"{size_bytes:.1f}PB"
|
|
|
|
|
| class HfRepoFilesTool:
|
| """Tool for file operations on HF repos."""
|
|
|
| def __init__(self, hf_token: Optional[str] = None):
|
| self.api = HfApi(token=hf_token)
|
|
|
| async def execute(self, args: Dict[str, Any]) -> ToolResult:
|
| """Execute the specified operation."""
|
| operation = args.get("operation")
|
|
|
| if not operation:
|
| return self._help()
|
|
|
| try:
|
| handlers = {
|
| "list": self._list,
|
| "read": self._read,
|
| "upload": self._upload,
|
| "delete": self._delete,
|
| }
|
|
|
| handler = handlers.get(operation)
|
| if handler:
|
| return await handler(args)
|
| else:
|
| return self._error(f"Unknown operation: {operation}. Valid: list, read, upload, delete")
|
|
|
| except RepositoryNotFoundError:
|
| return self._error(f"Repository not found: {args.get('repo_id')}")
|
| except EntryNotFoundError:
|
| return self._error(f"File not found: {args.get('path')}")
|
| except Exception as e:
|
| return self._error(f"Error: {str(e)}")
|
|
|
| def _help(self) -> ToolResult:
|
| """Show usage instructions."""
|
| return {
|
| "formatted": """**hf_repo_files** - File operations on HF repos
|
|
|
| **Operations:**
|
| - `list` - List files: `{"operation": "list", "repo_id": "gpt2"}`
|
| - `read` - Read file: `{"operation": "read", "repo_id": "gpt2", "path": "config.json"}`
|
| - `upload` - Upload: `{"operation": "upload", "repo_id": "my-model", "path": "README.md", "content": "..."}`
|
| - `delete` - Delete: `{"operation": "delete", "repo_id": "my-model", "patterns": ["*.tmp"]}`
|
|
|
| **Common params:** repo_id (required), repo_type (model/dataset/space), revision (default: main)""",
|
| "totalResults": 1,
|
| "resultsShared": 1,
|
| }
|
|
|
| async def _list(self, args: Dict[str, Any]) -> ToolResult:
|
| """List files in a repository."""
|
| repo_id = args.get("repo_id")
|
| if not repo_id:
|
| return self._error("repo_id is required")
|
|
|
| repo_type = args.get("repo_type", "model")
|
| revision = args.get("revision", "main")
|
| path = args.get("path", "")
|
|
|
| items = list(await _async_call(
|
| self.api.list_repo_tree,
|
| repo_id=repo_id,
|
| repo_type=repo_type,
|
| revision=revision,
|
| path_in_repo=path,
|
| recursive=True,
|
| ))
|
|
|
| if not items:
|
| return {"formatted": f"No files in {repo_id}", "totalResults": 0, "resultsShared": 0}
|
|
|
| lines = []
|
| total_size = 0
|
| for item in sorted(items, key=lambda x: x.path):
|
| if hasattr(item, "size") and item.size:
|
| total_size += item.size
|
| lines.append(f"{item.path} ({_format_size(item.size)})")
|
| else:
|
| lines.append(f"{item.path}/")
|
|
|
| url = _build_repo_url(repo_id, repo_type)
|
| response = f"**{repo_id}** ({len(items)} files, {_format_size(total_size)})\n{url}/tree/{revision}\n\n" + "\n".join(lines)
|
|
|
| return {"formatted": response, "totalResults": len(items), "resultsShared": len(items)}
|
|
|
| async def _read(self, args: Dict[str, Any]) -> ToolResult:
|
| """Read file content from a repository."""
|
| repo_id = args.get("repo_id")
|
| path = args.get("path")
|
|
|
| if not repo_id:
|
| return self._error("repo_id is required")
|
| if not path:
|
| return self._error("path is required")
|
|
|
| repo_type = args.get("repo_type", "model")
|
| revision = args.get("revision", "main")
|
| max_chars = args.get("max_chars", 50000)
|
|
|
| file_path = await _async_call(
|
| hf_hub_download,
|
| repo_id=repo_id,
|
| filename=path,
|
| repo_type=repo_type,
|
| revision=revision,
|
| token=self.api.token,
|
| )
|
|
|
| try:
|
| with open(file_path, "r", encoding="utf-8") as f:
|
| content = f.read()
|
|
|
| truncated = len(content) > max_chars
|
| if truncated:
|
| content = content[:max_chars]
|
|
|
| url = f"{_build_repo_url(repo_id, repo_type)}/blob/{revision}/{path}"
|
| response = f"**{path}**{' (truncated)' if truncated else ''}\n{url}\n\n```\n{content}\n```"
|
|
|
| return {"formatted": response, "totalResults": 1, "resultsShared": 1}
|
|
|
| except UnicodeDecodeError:
|
| import os
|
| size = os.path.getsize(file_path)
|
| return {"formatted": f"Binary file ({_format_size(size)})", "totalResults": 1, "resultsShared": 1}
|
|
|
| async def _upload(self, args: Dict[str, Any]) -> ToolResult:
|
| """Upload content to a repository."""
|
| repo_id = args.get("repo_id")
|
| path = args.get("path")
|
| content = args.get("content")
|
|
|
| if not repo_id:
|
| return self._error("repo_id is required")
|
| if not path:
|
| return self._error("path is required")
|
| if content is None:
|
| return self._error("content is required")
|
|
|
| repo_type = args.get("repo_type", "model")
|
| revision = args.get("revision", "main")
|
| create_pr = args.get("create_pr", False)
|
| commit_message = args.get("commit_message", f"Upload {path}")
|
|
|
| file_bytes = content.encode("utf-8") if isinstance(content, str) else content
|
|
|
| result = await _async_call(
|
| self.api.upload_file,
|
| path_or_fileobj=file_bytes,
|
| path_in_repo=path,
|
| repo_id=repo_id,
|
| repo_type=repo_type,
|
| revision=revision,
|
| commit_message=commit_message,
|
| create_pr=create_pr,
|
| )
|
|
|
| url = _build_repo_url(repo_id, repo_type)
|
| if create_pr and hasattr(result, "pr_url"):
|
| response = f"**Uploaded as PR**\n{result.pr_url}"
|
| else:
|
| response = f"**Uploaded:** {path}\n{url}/blob/{revision}/{path}"
|
|
|
| return {"formatted": response, "totalResults": 1, "resultsShared": 1}
|
|
|
| async def _delete(self, args: Dict[str, Any]) -> ToolResult:
|
| """Delete files from a repository."""
|
| repo_id = args.get("repo_id")
|
| patterns = args.get("patterns")
|
|
|
| if not repo_id:
|
| return self._error("repo_id is required")
|
| if not patterns:
|
| return self._error("patterns is required (list of paths/wildcards)")
|
|
|
| if isinstance(patterns, str):
|
| patterns = [patterns]
|
|
|
| repo_type = args.get("repo_type", "model")
|
| revision = args.get("revision", "main")
|
| create_pr = args.get("create_pr", False)
|
| commit_message = args.get("commit_message", f"Delete {', '.join(patterns)}")
|
|
|
| await _async_call(
|
| self.api.delete_files,
|
| repo_id=repo_id,
|
| delete_patterns=patterns,
|
| repo_type=repo_type,
|
| revision=revision,
|
| commit_message=commit_message,
|
| create_pr=create_pr,
|
| )
|
|
|
| response = f"**Deleted:** {', '.join(patterns)} from {repo_id}"
|
| return {"formatted": response, "totalResults": 1, "resultsShared": 1}
|
|
|
| def _error(self, message: str) -> ToolResult:
|
| """Return an error result."""
|
| return {"formatted": message, "totalResults": 0, "resultsShared": 0, "isError": True}
|
|
|
|
|
|
|
| HF_REPO_FILES_TOOL_SPEC = {
|
| "name": "hf_repo_files",
|
| "description": (
|
| "Read and write files in HF repos (models/datasets/spaces).\n\n"
|
| "## Operations\n"
|
| "- **list**: List files with sizes and structure\n"
|
| "- **read**: Read file content (text files only)\n"
|
| "- **upload**: Upload content to repo (can create PR)\n"
|
| "- **delete**: Delete files/folders (supports wildcards like *.tmp)\n\n"
|
| "## Use when\n"
|
| "- Need to see what files exist in a repo\n"
|
| "- Want to read config.json, README.md, or other text files\n"
|
| "- Uploading training scripts, configs, or results to a repo\n"
|
| "- Cleaning up temporary files from a repo\n\n"
|
| "## Examples\n"
|
| '{"operation": "list", "repo_id": "meta-llama/Llama-2-7b"}\n'
|
| '{"operation": "read", "repo_id": "gpt2", "path": "config.json"}\n'
|
| '{"operation": "upload", "repo_id": "my-model", "path": "README.md", "content": "# My Model"}\n'
|
| '{"operation": "upload", "repo_id": "org/model", "path": "fix.py", "content": "...", "create_pr": true}\n'
|
| '{"operation": "delete", "repo_id": "my-model", "patterns": ["*.tmp", "logs/"]}\n\n'
|
| "## Notes\n"
|
| "- For binary files (safetensors, bin), use list to see them but can't read content\n"
|
| "- upload/delete require approval (can overwrite/destroy data)\n"
|
| "- Use create_pr=true to propose changes instead of direct commit\n"
|
| ),
|
| "parameters": {
|
| "type": "object",
|
| "properties": {
|
| "operation": {
|
| "type": "string",
|
| "enum": ["list", "read", "upload", "delete"],
|
| "description": "Operation: list, read, upload, delete",
|
| },
|
| "repo_id": {
|
| "type": "string",
|
| "description": "Repository ID (e.g., 'username/repo-name')",
|
| },
|
| "repo_type": {
|
| "type": "string",
|
| "enum": ["model", "dataset", "space"],
|
| "description": "Repository type (default: model)",
|
| },
|
| "revision": {
|
| "type": "string",
|
| "description": "Branch/tag/commit (default: main)",
|
| },
|
| "path": {
|
| "type": "string",
|
| "description": "File path for read/upload",
|
| },
|
| "content": {
|
| "type": "string",
|
| "description": "File content for upload",
|
| },
|
| "patterns": {
|
| "type": "array",
|
| "items": {"type": "string"},
|
| "description": "Patterns to delete (e.g., ['*.tmp', 'logs/'])",
|
| },
|
| "create_pr": {
|
| "type": "boolean",
|
| "description": "Create PR instead of direct commit",
|
| },
|
| "commit_message": {
|
| "type": "string",
|
| "description": "Custom commit message",
|
| },
|
| },
|
| "required": ["operation"],
|
| },
|
| }
|
|
|
|
|
| async def hf_repo_files_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
|
| """Handler for agent tool router."""
|
| try:
|
| tool = HfRepoFilesTool()
|
| result = await tool.execute(arguments)
|
| return result["formatted"], not result.get("isError", False)
|
| except Exception as e:
|
| return f"Error: {str(e)}", False
|
|
|