| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| from __future__ import annotations |
|
|
| import inspect |
| import json |
| import re |
| import struct |
| import time |
| import warnings |
| from collections import defaultdict |
| from concurrent.futures import Future, ThreadPoolExecutor |
| from dataclasses import asdict, dataclass, field |
| from datetime import datetime |
| from functools import wraps |
| from itertools import islice |
| from pathlib import Path |
| from textwrap import dedent |
| from typing import ( |
| TYPE_CHECKING, |
| Any, |
| BinaryIO, |
| Callable, |
| Iterable, |
| Iterator, |
| Literal, |
| Optional, |
| TypeVar, |
| Union, |
| overload, |
| ) |
| from urllib.parse import quote |
|
|
| import httpx |
| from tqdm.auto import tqdm as base_tqdm |
| from tqdm.contrib.concurrent import thread_map |
|
|
| from . import constants |
| from ._commit_api import ( |
| CommitOperation, |
| CommitOperationAdd, |
| CommitOperationCopy, |
| CommitOperationDelete, |
| _fetch_files_to_copy, |
| _fetch_upload_modes, |
| _prepare_commit_payload, |
| _upload_files, |
| _warn_on_overwriting_operations, |
| ) |
| from ._inference_endpoints import InferenceEndpoint, InferenceEndpointScalingMetric, InferenceEndpointType |
| from ._jobs_api import JobInfo, JobSpec, ScheduledJobInfo, _create_job_spec |
| from ._space_api import SpaceHardware, SpaceRuntime, SpaceStorage, SpaceVariable |
| from ._upload_large_folder import upload_large_folder_internal |
| from .community import ( |
| Discussion, |
| DiscussionComment, |
| DiscussionStatusChange, |
| DiscussionTitleChange, |
| DiscussionWithDetails, |
| deserialize_event, |
| ) |
| from .errors import ( |
| BadRequestError, |
| GatedRepoError, |
| HfHubHTTPError, |
| LocalTokenNotFoundError, |
| RemoteEntryNotFoundError, |
| RepositoryNotFoundError, |
| RevisionNotFoundError, |
| ) |
| from .file_download import DryRunFileInfo, HfFileMetadata, get_hf_file_metadata, hf_hub_url |
| from .repocard_data import DatasetCardData, ModelCardData, SpaceCardData |
| from .utils import ( |
| DEFAULT_IGNORE_PATTERNS, |
| NotASafetensorsRepoError, |
| SafetensorsFileMetadata, |
| SafetensorsParsingError, |
| SafetensorsRepoMetadata, |
| TensorInfo, |
| build_hf_headers, |
| chunk_iterable, |
| experimental, |
| filter_repo_objects, |
| fix_hf_endpoint_in_url, |
| get_session, |
| get_token, |
| hf_raise_for_status, |
| logging, |
| paginate, |
| parse_datetime, |
| validate_hf_hub_args, |
| ) |
| from .utils import tqdm as hf_tqdm |
| from .utils._auth import _get_token_from_environment, _get_token_from_file, _get_token_from_google_colab |
| from .utils._deprecation import _deprecate_arguments |
| from .utils._typing import CallableT |
| from .utils._verification import collect_local_files, resolve_local_root, verify_maps |
| from .utils.endpoint_helpers import _is_emission_within_threshold |
|
|
|
|
| if TYPE_CHECKING: |
| from .inference._providers import PROVIDER_T |
| from .utils._verification import FolderVerification |
|
|
| R = TypeVar("R") |
| CollectionItemType_T = Literal["model", "dataset", "space", "paper", "collection"] |
|
|
| ExpandModelProperty_T = Literal[ |
| "author", |
| "baseModels", |
| "cardData", |
| "childrenModelCount", |
| "config", |
| "createdAt", |
| "disabled", |
| "downloads", |
| "downloadsAllTime", |
| "gated", |
| "gguf", |
| "inference", |
| "inferenceProviderMapping", |
| "lastModified", |
| "library_name", |
| "likes", |
| "mask_token", |
| "model-index", |
| "pipeline_tag", |
| "private", |
| "resourceGroup", |
| "safetensors", |
| "sha", |
| "siblings", |
| "spaces", |
| "tags", |
| "transformersInfo", |
| "trendingScore", |
| "usedStorage", |
| "widgetData", |
| ] |
|
|
| ExpandDatasetProperty_T = Literal[ |
| "author", |
| "cardData", |
| "citation", |
| "createdAt", |
| "description", |
| "disabled", |
| "downloads", |
| "downloadsAllTime", |
| "gated", |
| "lastModified", |
| "likes", |
| "paperswithcode_id", |
| "private", |
| "resourceGroup", |
| "sha", |
| "siblings", |
| "tags", |
| "trendingScore", |
| "usedStorage", |
| ] |
|
|
| ExpandSpaceProperty_T = Literal[ |
| "author", |
| "cardData", |
| "createdAt", |
| "datasets", |
| "disabled", |
| "lastModified", |
| "likes", |
| "models", |
| "private", |
| "resourceGroup", |
| "runtime", |
| "sdk", |
| "sha", |
| "siblings", |
| "subdomain", |
| "tags", |
| "trendingScore", |
| "usedStorage", |
| ] |
|
|
| USERNAME_PLACEHOLDER = "hf_user" |
| _REGEX_DISCUSSION_URL = re.compile(r".*/discussions/(\d+)$") |
| _REGEX_HTTP_PROTOCOL = re.compile(r"https?://") |
|
|
| _CREATE_COMMIT_NO_REPO_ERROR_MESSAGE = ( |
| "\nNote: Creating a commit assumes that the repo already exists on the" |
| " Huggingface Hub. Please use `create_repo` if it's not the case." |
| ) |
| _AUTH_CHECK_NO_REPO_ERROR_MESSAGE = ( |
| "\nNote: The repository either does not exist or you do not have access rights." |
| " Please check the repository ID and your access permissions." |
| " If this is a private repository, ensure that your token is correct." |
| ) |
| logger = logging.get_logger(__name__) |
|
|
|
|
| def repo_type_and_id_from_hf_id(hf_id: str, hub_url: Optional[str] = None) -> tuple[Optional[str], Optional[str], str]: |
| """ |
| Returns the repo type and ID from a huggingface.co URL linking to a |
| repository |
| |
| Args: |
| hf_id (`str`): |
| An URL or ID of a repository on the HF hub. Accepted values are: |
| |
| - https://huggingface.co/<repo_type>/<namespace>/<repo_id> |
| - https://huggingface.co/<namespace>/<repo_id> |
| - hf://<repo_type>/<namespace>/<repo_id> |
| - hf://<namespace>/<repo_id> |
| - <repo_type>/<namespace>/<repo_id> |
| - <namespace>/<repo_id> |
| - <repo_id> |
| hub_url (`str`, *optional*): |
| The URL of the HuggingFace Hub, defaults to https://huggingface.co |
| |
| Returns: |
| A tuple with three items: repo_type (`str` or `None`), namespace (`str` or |
| `None`) and repo_id (`str`). |
| |
| Raises: |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| If URL cannot be parsed. |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| If `repo_type` is unknown. |
| """ |
| input_hf_id = hf_id |
|
|
| |
| full_hub_url = hub_url if hub_url is not None else constants.ENDPOINT |
| hub_url_without_protocol = _REGEX_HTTP_PROTOCOL.sub("", full_hub_url) |
|
|
| |
| hf_id_without_protocol = _REGEX_HTTP_PROTOCOL.sub("", hf_id) |
| is_hf_url = hub_url_without_protocol in hf_id_without_protocol and "@" not in hf_id |
|
|
| HFFS_PREFIX = "hf://" |
| if hf_id.startswith(HFFS_PREFIX): |
| hf_id = hf_id[len(HFFS_PREFIX) :] |
|
|
| |
| if is_hf_url: |
| |
| hf_id_normalized = _REGEX_HTTP_PROTOCOL.sub("", hf_id) |
|
|
| |
| if hf_id_normalized.startswith(hub_url_without_protocol): |
| |
| hf_id = hf_id_normalized[len(hub_url_without_protocol) :].lstrip("/") |
|
|
| url_segments = hf_id.split("/") |
| is_hf_id = len(url_segments) <= 3 |
|
|
| namespace: Optional[str] |
| if is_hf_url: |
| |
| |
|
|
| if len(url_segments) >= 3: |
| |
| if url_segments[0] in constants.REPO_TYPES_MAPPING: |
| repo_type = constants.REPO_TYPES_MAPPING[url_segments[0]] |
| namespace = url_segments[1] |
| repo_id = url_segments[2] |
| else: |
| |
| namespace = url_segments[0] |
| repo_id = url_segments[1] |
| repo_type = None |
| elif len(url_segments) == 2: |
| namespace = url_segments[0] |
| repo_id = url_segments[1] |
|
|
| |
| if namespace in constants.REPO_TYPES_MAPPING: |
| |
| repo_type = constants.REPO_TYPES_MAPPING[namespace] |
| namespace = None |
| else: |
| repo_type = None |
| else: |
| |
| repo_id = url_segments[0] |
| namespace = None |
| repo_type = None |
| elif is_hf_id: |
| if len(url_segments) == 3: |
| |
| repo_type, namespace, repo_id = url_segments[-3:] |
| elif len(url_segments) == 2: |
| if url_segments[0] in constants.REPO_TYPES_MAPPING: |
| |
| repo_type = constants.REPO_TYPES_MAPPING[url_segments[0]] |
| namespace = None |
| repo_id = hf_id.split("/")[-1] |
| else: |
| |
| namespace, repo_id = hf_id.split("/")[-2:] |
| repo_type = None |
| else: |
| |
| repo_id = url_segments[0] |
| namespace, repo_type = None, None |
| else: |
| raise ValueError(f"Unable to retrieve user and repo ID from the passed HF ID: {hf_id}") |
|
|
| |
| if repo_type in constants.REPO_TYPES_MAPPING: |
| repo_type = constants.REPO_TYPES_MAPPING[repo_type] |
| if repo_type == "": |
| repo_type = None |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Unknown `repo_type`: '{repo_type}' ('{input_hf_id}')") |
|
|
| return repo_type, namespace, repo_id |
|
|
|
|
| @dataclass |
| class LastCommitInfo(dict): |
| oid: str |
| title: str |
| date: datetime |
|
|
| def __post_init__(self): |
| self.update(asdict(self)) |
|
|
|
|
| @dataclass |
| class BlobLfsInfo(dict): |
| size: int |
| sha256: str |
| pointer_size: int |
|
|
| def __post_init__(self): |
| self.update(asdict(self)) |
|
|
|
|
| @dataclass |
| class BlobSecurityInfo(dict): |
| safe: bool |
| status: str |
| av_scan: Optional[dict] |
| pickle_import_scan: Optional[dict] |
|
|
| def __post_init__(self): |
| self.update(asdict(self)) |
|
|
|
|
| @dataclass |
| class TransformersInfo(dict): |
| auto_model: str |
| custom_class: Optional[str] = None |
| |
| pipeline_tag: Optional[str] = None |
| processor: Optional[str] = None |
|
|
| def __post_init__(self): |
| self.update(asdict(self)) |
|
|
|
|
| @dataclass |
| class SafeTensorsInfo(dict): |
| parameters: dict[str, int] |
| total: int |
|
|
| def __post_init__(self): |
| self.update(asdict(self)) |
|
|
|
|
| @dataclass |
| class CommitInfo(str): |
| """Data structure containing information about a newly created commit. |
| |
| Returned by any method that creates a commit on the Hub: [`create_commit`], [`upload_file`], [`upload_folder`], |
| [`delete_file`], [`delete_folder`]. It inherits from `str` for backward compatibility but using methods specific |
| to `str` is deprecated. |
| |
| Attributes: |
| commit_url (`str`): |
| Url where to find the commit. |
| |
| commit_message (`str`): |
| The summary (first line) of the commit that has been created. |
| |
| commit_description (`str`): |
| Description of the commit that has been created. Can be empty. |
| |
| oid (`str`): |
| Commit hash id. Example: `"91c54ad1727ee830252e457677f467be0bfd8a57"`. |
| |
| pr_url (`str`, *optional*): |
| Url to the PR that has been created, if any. Populated when `create_pr=True` |
| is passed. |
| |
| pr_revision (`str`, *optional*): |
| Revision of the PR that has been created, if any. Populated when |
| `create_pr=True` is passed. Example: `"refs/pr/1"`. |
| |
| pr_num (`int`, *optional*): |
| Number of the PR discussion that has been created, if any. Populated when |
| `create_pr=True` is passed. Can be passed as `discussion_num` in |
| [`get_discussion_details`]. Example: `1`. |
| |
| repo_url (`RepoUrl`): |
| Repo URL of the commit containing info like repo_id, repo_type, etc. |
| """ |
|
|
| commit_url: str |
| commit_message: str |
| commit_description: str |
| oid: str |
| pr_url: Optional[str] = None |
|
|
| |
| repo_url: RepoUrl = field(init=False) |
|
|
| |
| pr_revision: Optional[str] = field(init=False) |
| pr_num: Optional[int] = field(init=False) |
|
|
| def __new__(cls, *args, commit_url: str, **kwargs): |
| return str.__new__(cls, commit_url) |
|
|
| def __post_init__(self): |
| """Populate pr-related fields after initialization. |
| |
| See https://docs.python.org/3.10/library/dataclasses.html#post-init-processing. |
| """ |
| |
| self.repo_url = RepoUrl(self.commit_url.split("/commit/")[0]) |
|
|
| |
| if self.pr_url is not None: |
| self.pr_revision = _parse_revision_from_pr_url(self.pr_url) |
| self.pr_num = int(self.pr_revision.split("/")[-1]) |
| else: |
| self.pr_revision = None |
| self.pr_num = None |
|
|
|
|
| @dataclass |
| class AccessRequest: |
| """Data structure containing information about a user access request. |
| |
| Attributes: |
| username (`str`): |
| Username of the user who requested access. |
| fullname (`str`): |
| Fullname of the user who requested access. |
| email (`Optional[str]`): |
| Email of the user who requested access. |
| Can only be `None` in the /accepted list if the user was granted access manually. |
| timestamp (`datetime`): |
| Timestamp of the request. |
| status (`Literal["pending", "accepted", "rejected"]`): |
| Status of the request. Can be one of `["pending", "accepted", "rejected"]`. |
| fields (`dict[str, Any]`, *optional*): |
| Additional fields filled by the user in the gate form. |
| """ |
|
|
| username: str |
| fullname: str |
| email: Optional[str] |
| timestamp: datetime |
| status: Literal["pending", "accepted", "rejected"] |
|
|
| |
| fields: Optional[dict[str, Any]] = None |
|
|
|
|
| @dataclass |
| class WebhookWatchedItem: |
| """Data structure containing information about the items watched by a webhook. |
| |
| Attributes: |
| type (`Literal["dataset", "model", "org", "space", "user"]`): |
| Type of the item to be watched. Can be one of `["dataset", "model", "org", "space", "user"]`. |
| name (`str`): |
| Name of the item to be watched. Can be the username, organization name, model name, dataset name or space name. |
| """ |
|
|
| type: Literal["dataset", "model", "org", "space", "user"] |
| name: str |
|
|
|
|
| @dataclass |
| class WebhookInfo: |
| """Data structure containing information about a webhook. |
| |
| One of `url` or `job` is specified, but not both. |
| |
| Attributes: |
| id (`str`): |
| ID of the webhook. |
| url (`str`, *optional*): |
| URL of the webhook. |
| job (`JobSpec`, *optional*): |
| Specifications of the Job to trigger. |
| watched (`list[WebhookWatchedItem]`): |
| List of items watched by the webhook, see [`WebhookWatchedItem`]. |
| domains (`list[WEBHOOK_DOMAIN_T]`): |
| List of domains the webhook is watching. Can be one of `["repo", "discussions"]`. |
| secret (`str`, *optional*): |
| Secret of the webhook. |
| disabled (`bool`): |
| Whether the webhook is disabled or not. |
| """ |
|
|
| id: str |
| url: Optional[str] |
| job: Optional[JobSpec] |
| watched: list[WebhookWatchedItem] |
| domains: list[constants.WEBHOOK_DOMAIN_T] |
| secret: Optional[str] |
| disabled: bool |
|
|
|
|
| class RepoUrl(str): |
| """Subclass of `str` describing a repo URL on the Hub. |
| |
| `RepoUrl` is returned by `HfApi.create_repo`. It inherits from `str` for backward |
| compatibility. At initialization, the URL is parsed to populate properties: |
| - endpoint (`str`) |
| - namespace (`Optional[str]`) |
| - repo_name (`str`) |
| - repo_id (`str`) |
| - repo_type (`Literal["model", "dataset", "space"]`) |
| - url (`str`) |
| |
| Args: |
| url (`Any`): |
| String value of the repo url. |
| endpoint (`str`, *optional*): |
| Endpoint of the Hub. Defaults to <https://huggingface.co>. |
| |
| Example: |
| ```py |
| >>> RepoUrl('https://huggingface.co/gpt2') |
| RepoUrl('https://huggingface.co/gpt2', endpoint='https://huggingface.co', repo_type='model', repo_id='gpt2') |
| |
| >>> RepoUrl('https://hub-ci.huggingface.co/datasets/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co') |
| RepoUrl('https://hub-ci.huggingface.co/datasets/dummy_user/dummy_dataset', endpoint='https://hub-ci.huggingface.co', repo_type='dataset', repo_id='dummy_user/dummy_dataset') |
| |
| >>> RepoUrl('hf://datasets/my-user/my-dataset') |
| RepoUrl('hf://datasets/my-user/my-dataset', endpoint='https://huggingface.co', repo_type='dataset', repo_id='user/dataset') |
| |
| >>> HfApi.create_repo("dummy_model") |
| RepoUrl('https://huggingface.co/Wauplin/dummy_model', endpoint='https://huggingface.co', repo_type='model', repo_id='Wauplin/dummy_model') |
| ``` |
| |
| Raises: |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| If URL cannot be parsed. |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| If `repo_type` is unknown. |
| """ |
|
|
| def __new__(cls, url: Any, endpoint: Optional[str] = None): |
| url = fix_hf_endpoint_in_url(url, endpoint=endpoint) |
| return super(RepoUrl, cls).__new__(cls, url) |
|
|
| def __init__(self, url: Any, endpoint: Optional[str] = None) -> None: |
| super().__init__() |
| |
| self.endpoint = endpoint or constants.ENDPOINT |
| repo_type, namespace, repo_name = repo_type_and_id_from_hf_id(self, hub_url=self.endpoint) |
|
|
| |
| self.namespace = namespace |
| self.repo_name = repo_name |
| self.repo_id = repo_name if namespace is None else f"{namespace}/{repo_name}" |
| self.repo_type = repo_type or constants.REPO_TYPE_MODEL |
| self.url = str(self) |
|
|
| def __repr__(self) -> str: |
| return f"RepoUrl('{self}', endpoint='{self.endpoint}', repo_type='{self.repo_type}', repo_id='{self.repo_id}')" |
|
|
|
|
| @dataclass |
| class RepoSibling: |
| """ |
| Contains basic information about a repo file inside a repo on the Hub. |
| |
| > [!TIP] |
| > All attributes of this class are optional except `rfilename`. This is because only the file names are returned when |
| > listing repositories on the Hub (with [`list_models`], [`list_datasets`] or [`list_spaces`]). If you need more |
| > information like file size, blob id or lfs details, you must request them specifically from one repo at a time |
| > (using [`model_info`], [`dataset_info`] or [`space_info`]) as it adds more constraints on the backend server to |
| > retrieve these. |
| |
| Attributes: |
| rfilename (str): |
| file name, relative to the repo root. |
| size (`int`, *optional*): |
| The file's size, in bytes. This attribute is defined when `files_metadata` argument of [`repo_info`] is set |
| to `True`. It's `None` otherwise. |
| blob_id (`str`, *optional*): |
| The file's git OID. This attribute is defined when `files_metadata` argument of [`repo_info`] is set to |
| `True`. It's `None` otherwise. |
| lfs (`BlobLfsInfo`, *optional*): |
| The file's LFS metadata. This attribute is defined when`files_metadata` argument of [`repo_info`] is set to |
| `True` and the file is stored with Git LFS. It's `None` otherwise. |
| """ |
|
|
| rfilename: str |
| size: Optional[int] = None |
| blob_id: Optional[str] = None |
| lfs: Optional[BlobLfsInfo] = None |
|
|
|
|
| @dataclass |
| class RepoFile: |
| """ |
| Contains information about a file on the Hub. |
| |
| Attributes: |
| path (str): |
| file path relative to the repo root. |
| size (`int`): |
| The file's size, in bytes. |
| blob_id (`str`): |
| The file's git OID. |
| lfs (`BlobLfsInfo`, *optional*): |
| The file's LFS metadata. |
| last_commit (`LastCommitInfo`, *optional*): |
| The file's last commit metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`] |
| are called with `expand=True`. |
| security (`BlobSecurityInfo`, *optional*): |
| The file's security scan metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`] |
| are called with `expand=True`. |
| """ |
|
|
| path: str |
| size: int |
| blob_id: str |
| lfs: Optional[BlobLfsInfo] = None |
| last_commit: Optional[LastCommitInfo] = None |
| security: Optional[BlobSecurityInfo] = None |
|
|
| def __init__(self, **kwargs): |
| self.path = kwargs.pop("path") |
| self.size = kwargs.pop("size") |
| self.blob_id = kwargs.pop("oid") |
| lfs = kwargs.pop("lfs", None) |
| if lfs is not None: |
| lfs = BlobLfsInfo(size=lfs["size"], sha256=lfs["oid"], pointer_size=lfs["pointerSize"]) |
| self.lfs = lfs |
| last_commit = kwargs.pop("lastCommit", None) or kwargs.pop("last_commit", None) |
| if last_commit is not None: |
| last_commit = LastCommitInfo( |
| oid=last_commit["id"], title=last_commit["title"], date=parse_datetime(last_commit["date"]) |
| ) |
| self.last_commit = last_commit |
| security = kwargs.pop("securityFileStatus", None) |
| if security is not None: |
| safe = security["status"] == "safe" |
| security = BlobSecurityInfo( |
| safe=safe, |
| status=security["status"], |
| av_scan=security["avScan"], |
| pickle_import_scan=security["pickleImportScan"], |
| ) |
| self.security = security |
|
|
| |
| self.rfilename = self.path |
| self.lastCommit = self.last_commit |
|
|
|
|
| @dataclass |
| class RepoFolder: |
| """ |
| Contains information about a folder on the Hub. |
| |
| Attributes: |
| path (str): |
| folder path relative to the repo root. |
| tree_id (`str`): |
| The folder's git OID. |
| last_commit (`LastCommitInfo`, *optional*): |
| The folder's last commit metadata. Only defined if [`list_repo_tree`] and [`get_paths_info`] |
| are called with `expand=True`. |
| """ |
|
|
| path: str |
| tree_id: str |
| last_commit: Optional[LastCommitInfo] = None |
|
|
| def __init__(self, **kwargs): |
| self.path = kwargs.pop("path") |
| self.tree_id = kwargs.pop("oid") |
| last_commit = kwargs.pop("lastCommit", None) or kwargs.pop("last_commit", None) |
| if last_commit is not None: |
| last_commit = LastCommitInfo( |
| oid=last_commit["id"], title=last_commit["title"], date=parse_datetime(last_commit["date"]) |
| ) |
| self.last_commit = last_commit |
|
|
|
|
| @dataclass |
| class InferenceProviderMapping: |
| provider: "PROVIDER_T" |
| hf_model_id: str |
| provider_id: str |
| status: Literal["error", "live", "staging"] |
| task: str |
|
|
| adapter: Optional[str] = None |
| adapter_weights_path: Optional[str] = None |
| type: Optional[Literal["single-model", "tag-filter"]] = None |
|
|
| def __init__(self, **kwargs): |
| self.provider = kwargs.pop("provider") |
| self.hf_model_id = kwargs.pop("hf_model_id") |
| self.provider_id = kwargs.pop("providerId") |
| self.status = kwargs.pop("status") |
| self.task = kwargs.pop("task") |
|
|
| self.adapter = kwargs.pop("adapter", None) |
| self.adapter_weights_path = kwargs.pop("adapterWeightsPath", None) |
| self.type = kwargs.pop("type", None) |
| self.__dict__.update(**kwargs) |
|
|
|
|
| @dataclass |
| class ModelInfo: |
| """ |
| Contains information about a model on the Hub. This object is returned by [`model_info`] and [`list_models`]. |
| |
| > [!TIP] |
| > Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made. |
| > In general, the more specific the query, the more information is returned. On the contrary, when listing models |
| > using [`list_models`] only a subset of the attributes are returned. |
| |
| Attributes: |
| id (`str`): |
| ID of model. |
| author (`str`, *optional*): |
| Author of the model. |
| sha (`str`, *optional*): |
| Repo SHA at this particular revision. |
| created_at (`datetime`, *optional*): |
| Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`, |
| corresponding to the date when we began to store creation dates. |
| last_modified (`datetime`, *optional*): |
| Date of last commit to the repo. |
| private (`bool`): |
| Is the repo private. |
| disabled (`bool`, *optional*): |
| Is the repo disabled. |
| downloads (`int`): |
| Number of downloads of the model over the last 30 days. |
| downloads_all_time (`int`): |
| Cumulated number of downloads of the model since its creation. |
| gated (`Literal["auto", "manual", False]`, *optional*): |
| Is the repo gated. |
| If so, whether there is manual or automatic approval. |
| gguf (`dict`, *optional*): |
| GGUF information of the model. |
| inference (`Literal["warm"]`, *optional*): |
| Status of the model on Inference Providers. Warm if the model is served by at least one provider. |
| inference_provider_mapping (`list[InferenceProviderMapping]`, *optional*): |
| A list of [`InferenceProviderMapping`] ordered after the user's provider order. |
| likes (`int`): |
| Number of likes of the model. |
| library_name (`str`, *optional*): |
| Library associated with the model. |
| tags (`list[str]`): |
| List of tags of the model. Compared to `card_data.tags`, contains extra tags computed by the Hub |
| (e.g. supported libraries, model's arXiv). |
| pipeline_tag (`str`, *optional*): |
| Pipeline tag associated with the model. |
| mask_token (`str`, *optional*): |
| Mask token used by the model. |
| widget_data (`Any`, *optional*): |
| Widget data associated with the model. |
| model_index (`dict`, *optional*): |
| Model index for evaluation. |
| config (`dict`, *optional*): |
| Model configuration. |
| transformers_info (`TransformersInfo`, *optional*): |
| Transformers-specific info (auto class, processor, etc.) associated with the model. |
| trending_score (`int`, *optional*): |
| Trending score of the model. |
| card_data (`ModelCardData`, *optional*): |
| Model Card Metadata as a [`huggingface_hub.repocard_data.ModelCardData`] object. |
| siblings (`list[RepoSibling]`): |
| List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the model. |
| spaces (`list[str]`, *optional*): |
| List of spaces using the model. |
| safetensors (`SafeTensorsInfo`, *optional*): |
| Model's safetensors information. |
| security_repo_status (`dict`, *optional*): |
| Model's security scan status. |
| """ |
|
|
| id: str |
| author: Optional[str] |
| sha: Optional[str] |
| created_at: Optional[datetime] |
| last_modified: Optional[datetime] |
| private: Optional[bool] |
| disabled: Optional[bool] |
| downloads: Optional[int] |
| downloads_all_time: Optional[int] |
| gated: Optional[Literal["auto", "manual", False]] |
| gguf: Optional[dict] |
| inference: Optional[Literal["warm"]] |
| inference_provider_mapping: Optional[list[InferenceProviderMapping]] |
| likes: Optional[int] |
| library_name: Optional[str] |
| tags: Optional[list[str]] |
| pipeline_tag: Optional[str] |
| mask_token: Optional[str] |
| card_data: Optional[ModelCardData] |
| widget_data: Optional[Any] |
| model_index: Optional[dict] |
| config: Optional[dict] |
| transformers_info: Optional[TransformersInfo] |
| trending_score: Optional[int] |
| siblings: Optional[list[RepoSibling]] |
| spaces: Optional[list[str]] |
| safetensors: Optional[SafeTensorsInfo] |
| security_repo_status: Optional[dict] |
|
|
| def __init__(self, **kwargs): |
| self.id = kwargs.pop("id") |
| self.author = kwargs.pop("author", None) |
| self.sha = kwargs.pop("sha", None) |
| last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None) |
| self.last_modified = parse_datetime(last_modified) if last_modified else None |
| created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None) |
| self.created_at = parse_datetime(created_at) if created_at else None |
| self.private = kwargs.pop("private", None) |
| self.gated = kwargs.pop("gated", None) |
| self.disabled = kwargs.pop("disabled", None) |
| self.downloads = kwargs.pop("downloads", None) |
| self.downloads_all_time = kwargs.pop("downloadsAllTime", None) |
| self.likes = kwargs.pop("likes", None) |
| self.library_name = kwargs.pop("library_name", None) |
| self.gguf = kwargs.pop("gguf", None) |
|
|
| self.inference = kwargs.pop("inference", None) |
|
|
| |
| |
| mapping = kwargs.pop("inferenceProviderMapping", None) |
| if isinstance(mapping, list): |
| self.inference_provider_mapping = [ |
| InferenceProviderMapping(**{**value, "hf_model_id": self.id}) for value in mapping |
| ] |
| elif isinstance(mapping, dict): |
| self.inference_provider_mapping = [ |
| InferenceProviderMapping(**{**value, "hf_model_id": self.id, "provider": provider}) |
| for provider, value in mapping.items() |
| ] |
| elif mapping is None: |
| self.inference_provider_mapping = None |
| else: |
| raise ValueError( |
| f"Unexpected type for `inferenceProviderMapping`. Expecting `dict` or `list`. Got {mapping}." |
| ) |
|
|
| self.tags = kwargs.pop("tags", None) |
| self.pipeline_tag = kwargs.pop("pipeline_tag", None) |
| self.mask_token = kwargs.pop("mask_token", None) |
| self.trending_score = kwargs.pop("trendingScore", None) |
|
|
| card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None) |
| self.card_data = ( |
| ModelCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data |
| ) |
|
|
| self.widget_data = kwargs.pop("widgetData", None) |
| self.model_index = kwargs.pop("model-index", None) or kwargs.pop("model_index", None) |
| self.config = kwargs.pop("config", None) |
| transformers_info = kwargs.pop("transformersInfo", None) or kwargs.pop("transformers_info", None) |
| self.transformers_info = TransformersInfo(**transformers_info) if transformers_info else None |
| siblings = kwargs.pop("siblings", None) |
| self.siblings = ( |
| [ |
| RepoSibling( |
| rfilename=sibling["rfilename"], |
| size=sibling.get("size"), |
| blob_id=sibling.get("blobId"), |
| lfs=( |
| BlobLfsInfo( |
| size=sibling["lfs"]["size"], |
| sha256=sibling["lfs"]["sha256"], |
| pointer_size=sibling["lfs"]["pointerSize"], |
| ) |
| if sibling.get("lfs") |
| else None |
| ), |
| ) |
| for sibling in siblings |
| ] |
| if siblings is not None |
| else None |
| ) |
| self.spaces = kwargs.pop("spaces", None) |
| safetensors = kwargs.pop("safetensors", None) |
| self.safetensors = ( |
| SafeTensorsInfo( |
| parameters=safetensors["parameters"], |
| total=safetensors["total"], |
| ) |
| if safetensors |
| else None |
| ) |
| self.security_repo_status = kwargs.pop("securityRepoStatus", None) |
| |
| self.lastModified = self.last_modified |
| self.cardData = self.card_data |
| self.transformersInfo = self.transformers_info |
| self.__dict__.update(**kwargs) |
|
|
|
|
| @dataclass |
| class DatasetInfo: |
| """ |
| Contains information about a dataset on the Hub. This object is returned by [`dataset_info`] and [`list_datasets`]. |
| |
| > [!TIP] |
| > Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made. |
| > In general, the more specific the query, the more information is returned. On the contrary, when listing datasets |
| > using [`list_datasets`] only a subset of the attributes are returned. |
| |
| Attributes: |
| id (`str`): |
| ID of dataset. |
| author (`str`): |
| Author of the dataset. |
| sha (`str`): |
| Repo SHA at this particular revision. |
| created_at (`datetime`, *optional*): |
| Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`, |
| corresponding to the date when we began to store creation dates. |
| last_modified (`datetime`, *optional*): |
| Date of last commit to the repo. |
| private (`bool`): |
| Is the repo private. |
| disabled (`bool`, *optional*): |
| Is the repo disabled. |
| gated (`Literal["auto", "manual", False]`, *optional*): |
| Is the repo gated. |
| If so, whether there is manual or automatic approval. |
| downloads (`int`): |
| Number of downloads of the dataset over the last 30 days. |
| downloads_all_time (`int`): |
| Cumulated number of downloads of the model since its creation. |
| likes (`int`): |
| Number of likes of the dataset. |
| tags (`list[str]`): |
| List of tags of the dataset. |
| card_data (`DatasetCardData`, *optional*): |
| Model Card Metadata as a [`huggingface_hub.repocard_data.DatasetCardData`] object. |
| siblings (`list[RepoSibling]`): |
| List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the dataset. |
| paperswithcode_id (`str`, *optional*): |
| Papers with code ID of the dataset. |
| trending_score (`int`, *optional*): |
| Trending score of the dataset. |
| """ |
|
|
| id: str |
| author: Optional[str] |
| sha: Optional[str] |
| created_at: Optional[datetime] |
| last_modified: Optional[datetime] |
| private: Optional[bool] |
| gated: Optional[Literal["auto", "manual", False]] |
| disabled: Optional[bool] |
| downloads: Optional[int] |
| downloads_all_time: Optional[int] |
| likes: Optional[int] |
| paperswithcode_id: Optional[str] |
| tags: Optional[list[str]] |
| trending_score: Optional[int] |
| card_data: Optional[DatasetCardData] |
| siblings: Optional[list[RepoSibling]] |
|
|
| def __init__(self, **kwargs): |
| self.id = kwargs.pop("id") |
| self.author = kwargs.pop("author", None) |
| self.sha = kwargs.pop("sha", None) |
| created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None) |
| self.created_at = parse_datetime(created_at) if created_at else None |
| last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None) |
| self.last_modified = parse_datetime(last_modified) if last_modified else None |
| self.private = kwargs.pop("private", None) |
| self.gated = kwargs.pop("gated", None) |
| self.disabled = kwargs.pop("disabled", None) |
| self.downloads = kwargs.pop("downloads", None) |
| self.downloads_all_time = kwargs.pop("downloadsAllTime", None) |
| self.likes = kwargs.pop("likes", None) |
| self.paperswithcode_id = kwargs.pop("paperswithcode_id", None) |
| self.tags = kwargs.pop("tags", None) |
| self.trending_score = kwargs.pop("trendingScore", None) |
|
|
| card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None) |
| self.card_data = ( |
| DatasetCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data |
| ) |
| siblings = kwargs.pop("siblings", None) |
| self.siblings = ( |
| [ |
| RepoSibling( |
| rfilename=sibling["rfilename"], |
| size=sibling.get("size"), |
| blob_id=sibling.get("blobId"), |
| lfs=( |
| BlobLfsInfo( |
| size=sibling["lfs"]["size"], |
| sha256=sibling["lfs"]["sha256"], |
| pointer_size=sibling["lfs"]["pointerSize"], |
| ) |
| if sibling.get("lfs") |
| else None |
| ), |
| ) |
| for sibling in siblings |
| ] |
| if siblings is not None |
| else None |
| ) |
| |
| self.lastModified = self.last_modified |
| self.cardData = self.card_data |
| self.__dict__.update(**kwargs) |
|
|
|
|
| @dataclass |
| class SpaceInfo: |
| """ |
| Contains information about a Space on the Hub. This object is returned by [`space_info`] and [`list_spaces`]. |
| |
| > [!TIP] |
| > Most attributes of this class are optional. This is because the data returned by the Hub depends on the query made. |
| > In general, the more specific the query, the more information is returned. On the contrary, when listing spaces |
| > using [`list_spaces`] only a subset of the attributes are returned. |
| |
| Attributes: |
| id (`str`): |
| ID of the Space. |
| author (`str`, *optional*): |
| Author of the Space. |
| sha (`str`, *optional*): |
| Repo SHA at this particular revision. |
| created_at (`datetime`, *optional*): |
| Date of creation of the repo on the Hub. Note that the lowest value is `2022-03-02T23:29:04.000Z`, |
| corresponding to the date when we began to store creation dates. |
| last_modified (`datetime`, *optional*): |
| Date of last commit to the repo. |
| private (`bool`): |
| Is the repo private. |
| gated (`Literal["auto", "manual", False]`, *optional*): |
| Is the repo gated. |
| If so, whether there is manual or automatic approval. |
| disabled (`bool`, *optional*): |
| Is the Space disabled. |
| host (`str`, *optional*): |
| Host URL of the Space. |
| subdomain (`str`, *optional*): |
| Subdomain of the Space. |
| likes (`int`): |
| Number of likes of the Space. |
| tags (`list[str]`): |
| List of tags of the Space. |
| siblings (`list[RepoSibling]`): |
| List of [`huggingface_hub.hf_api.RepoSibling`] objects that constitute the Space. |
| card_data (`SpaceCardData`, *optional*): |
| Space Card Metadata as a [`huggingface_hub.repocard_data.SpaceCardData`] object. |
| runtime (`SpaceRuntime`, *optional*): |
| Space runtime information as a [`huggingface_hub.hf_api.SpaceRuntime`] object. |
| sdk (`str`, *optional*): |
| SDK used by the Space. |
| models (`list[str]`, *optional*): |
| List of models used by the Space. |
| datasets (`list[str]`, *optional*): |
| List of datasets used by the Space. |
| trending_score (`int`, *optional*): |
| Trending score of the Space. |
| """ |
|
|
| id: str |
| author: Optional[str] |
| sha: Optional[str] |
| created_at: Optional[datetime] |
| last_modified: Optional[datetime] |
| private: Optional[bool] |
| gated: Optional[Literal["auto", "manual", False]] |
| disabled: Optional[bool] |
| host: Optional[str] |
| subdomain: Optional[str] |
| likes: Optional[int] |
| sdk: Optional[str] |
| tags: Optional[list[str]] |
| siblings: Optional[list[RepoSibling]] |
| trending_score: Optional[int] |
| card_data: Optional[SpaceCardData] |
| runtime: Optional[SpaceRuntime] |
| models: Optional[list[str]] |
| datasets: Optional[list[str]] |
|
|
| def __init__(self, **kwargs): |
| self.id = kwargs.pop("id") |
| self.author = kwargs.pop("author", None) |
| self.sha = kwargs.pop("sha", None) |
| created_at = kwargs.pop("createdAt", None) or kwargs.pop("created_at", None) |
| self.created_at = parse_datetime(created_at) if created_at else None |
| last_modified = kwargs.pop("lastModified", None) or kwargs.pop("last_modified", None) |
| self.last_modified = parse_datetime(last_modified) if last_modified else None |
| self.private = kwargs.pop("private", None) |
| self.gated = kwargs.pop("gated", None) |
| self.disabled = kwargs.pop("disabled", None) |
| self.host = kwargs.pop("host", None) |
| self.subdomain = kwargs.pop("subdomain", None) |
| self.likes = kwargs.pop("likes", None) |
| self.sdk = kwargs.pop("sdk", None) |
| self.tags = kwargs.pop("tags", None) |
| self.trending_score = kwargs.pop("trendingScore", None) |
| card_data = kwargs.pop("cardData", None) or kwargs.pop("card_data", None) |
| self.card_data = ( |
| SpaceCardData(**card_data, ignore_metadata_errors=True) if isinstance(card_data, dict) else card_data |
| ) |
| siblings = kwargs.pop("siblings", None) |
| self.siblings = ( |
| [ |
| RepoSibling( |
| rfilename=sibling["rfilename"], |
| size=sibling.get("size"), |
| blob_id=sibling.get("blobId"), |
| lfs=( |
| BlobLfsInfo( |
| size=sibling["lfs"]["size"], |
| sha256=sibling["lfs"]["sha256"], |
| pointer_size=sibling["lfs"]["pointerSize"], |
| ) |
| if sibling.get("lfs") |
| else None |
| ), |
| ) |
| for sibling in siblings |
| ] |
| if siblings is not None |
| else None |
| ) |
| runtime = kwargs.pop("runtime", None) |
| self.runtime = SpaceRuntime(runtime) if runtime else None |
| self.models = kwargs.pop("models", None) |
| self.datasets = kwargs.pop("datasets", None) |
| |
| self.lastModified = self.last_modified |
| self.cardData = self.card_data |
| self.__dict__.update(**kwargs) |
|
|
|
|
| @dataclass |
| class CollectionItem: |
| """ |
| Contains information about an item of a Collection (model, dataset, Space, paper or collection). |
| |
| Attributes: |
| item_object_id (`str`): |
| Unique ID of the item in the collection. |
| item_id (`str`): |
| ID of the underlying object on the Hub. Can be either a repo_id, a paper id or a collection slug. |
| e.g. `"jbilcke-hf/ai-comic-factory"`, `"2307.09288"`, `"celinah/cerebras-function-calling-682607169c35fbfa98b30b9a"`. |
| item_type (`str`): |
| Type of the underlying object. Can be one of `"model"`, `"dataset"`, `"space"`, `"paper"` or `"collection"`. |
| position (`int`): |
| Position of the item in the collection. |
| note (`str`, *optional*): |
| Note associated with the item, as plain text. |
| """ |
|
|
| item_object_id: str |
| item_id: str |
| item_type: str |
| position: int |
| note: Optional[str] = None |
|
|
| def __init__( |
| self, |
| _id: str, |
| id: str, |
| type: CollectionItemType_T, |
| position: int, |
| note: Optional[dict] = None, |
| **kwargs, |
| ) -> None: |
| self.item_object_id: str = _id |
| self.item_id: str = id |
| |
| slug = kwargs.get("slug") |
| if slug is not None: |
| self.item_id = slug |
| self.item_type: CollectionItemType_T = type |
| self.position: int = position |
| self.note: str = note["text"] if note is not None else None |
|
|
|
|
| @dataclass |
| class Collection: |
| """ |
| Contains information about a Collection on the Hub. |
| |
| Attributes: |
| slug (`str`): |
| Slug of the collection. E.g. `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. |
| title (`str`): |
| Title of the collection. E.g. `"Recent models"`. |
| owner (`str`): |
| Owner of the collection. E.g. `"TheBloke"`. |
| items (`list[CollectionItem]`): |
| List of items in the collection. |
| last_updated (`datetime`): |
| Date of the last update of the collection. |
| position (`int`): |
| Position of the collection in the list of collections of the owner. |
| private (`bool`): |
| Whether the collection is private or not. |
| theme (`str`): |
| Theme of the collection. E.g. `"green"`. |
| upvotes (`int`): |
| Number of upvotes of the collection. |
| description (`str`, *optional*): |
| Description of the collection, as plain text. |
| url (`str`): |
| (property) URL of the collection on the Hub. |
| """ |
|
|
| slug: str |
| title: str |
| owner: str |
| items: list[CollectionItem] |
| last_updated: datetime |
| position: int |
| private: bool |
| theme: str |
| upvotes: int |
| description: Optional[str] = None |
|
|
| def __init__(self, **kwargs) -> None: |
| self.slug = kwargs.pop("slug") |
| self.title = kwargs.pop("title") |
| self.owner = kwargs.pop("owner") |
| self.items = [CollectionItem(**item) for item in kwargs.pop("items")] |
| self.last_updated = parse_datetime(kwargs.pop("lastUpdated")) |
| self.position = kwargs.pop("position") |
| self.private = kwargs.pop("private") |
| self.theme = kwargs.pop("theme") |
| self.upvotes = kwargs.pop("upvotes") |
| self.description = kwargs.pop("description", None) |
| endpoint = kwargs.pop("endpoint", None) |
| if endpoint is None: |
| endpoint = constants.ENDPOINT |
| self._url = f"{endpoint}/collections/{self.slug}" |
|
|
| @property |
| def url(self) -> str: |
| """Returns the URL of the collection on the Hub.""" |
| return self._url |
|
|
|
|
| @dataclass |
| class GitRefInfo: |
| """ |
| Contains information about a git reference for a repo on the Hub. |
| |
| Attributes: |
| name (`str`): |
| Name of the reference (e.g. tag name or branch name). |
| ref (`str`): |
| Full git ref on the Hub (e.g. `"refs/heads/main"` or `"refs/tags/v1.0"`). |
| target_commit (`str`): |
| OID of the target commit for the ref (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`) |
| """ |
|
|
| name: str |
| ref: str |
| target_commit: str |
|
|
|
|
| @dataclass |
| class GitRefs: |
| """ |
| Contains information about all git references for a repo on the Hub. |
| |
| Object is returned by [`list_repo_refs`]. |
| |
| Attributes: |
| branches (`list[GitRefInfo]`): |
| A list of [`GitRefInfo`] containing information about branches on the repo. |
| converts (`list[GitRefInfo]`): |
| A list of [`GitRefInfo`] containing information about "convert" refs on the repo. |
| Converts are refs used (internally) to push preprocessed data in Dataset repos. |
| tags (`list[GitRefInfo]`): |
| A list of [`GitRefInfo`] containing information about tags on the repo. |
| pull_requests (`list[GitRefInfo]`, *optional*): |
| A list of [`GitRefInfo`] containing information about pull requests on the repo. |
| Only returned if `include_prs=True` is set. |
| """ |
|
|
| branches: list[GitRefInfo] |
| converts: list[GitRefInfo] |
| tags: list[GitRefInfo] |
| pull_requests: Optional[list[GitRefInfo]] = None |
|
|
|
|
| @dataclass |
| class GitCommitInfo: |
| """ |
| Contains information about a git commit for a repo on the Hub. Check out [`list_repo_commits`] for more details. |
| |
| Attributes: |
| commit_id (`str`): |
| OID of the commit (e.g. `"e7da7f221d5bf496a48136c0cd264e630fe9fcc8"`) |
| authors (`list[str]`): |
| List of authors of the commit. |
| created_at (`datetime`): |
| Datetime when the commit was created. |
| title (`str`): |
| Title of the commit. This is a free-text value entered by the authors. |
| message (`str`): |
| Description of the commit. This is a free-text value entered by the authors. |
| formatted_title (`str`): |
| Title of the commit formatted as HTML. Only returned if `formatted=True` is set. |
| formatted_message (`str`): |
| Description of the commit formatted as HTML. Only returned if `formatted=True` is set. |
| """ |
|
|
| commit_id: str |
|
|
| authors: list[str] |
| created_at: datetime |
| title: str |
| message: str |
|
|
| formatted_title: Optional[str] |
| formatted_message: Optional[str] |
|
|
|
|
| @dataclass |
| class UserLikes: |
| """ |
| Contains information about a user likes on the Hub. |
| |
| Attributes: |
| user (`str`): |
| Name of the user for which we fetched the likes. |
| total (`int`): |
| Total number of likes. |
| datasets (`list[str]`): |
| List of datasets liked by the user (as repo_ids). |
| models (`list[str]`): |
| List of models liked by the user (as repo_ids). |
| spaces (`list[str]`): |
| List of spaces liked by the user (as repo_ids). |
| """ |
|
|
| |
| user: str |
| total: int |
|
|
| |
| datasets: list[str] |
| models: list[str] |
| spaces: list[str] |
|
|
|
|
| @dataclass |
| class Organization: |
| """ |
| Contains information about an organization on the Hub. |
| |
| Attributes: |
| avatar_url (`str`): |
| URL of the organization's avatar. |
| name (`str`): |
| Name of the organization on the Hub (unique). |
| fullname (`str`): |
| Organization's full name. |
| details (`str`, *optional*): |
| Organization's description. |
| is_verified (`bool`, *optional*): |
| Whether the organization is verified. |
| is_following (`bool`, *optional*): |
| Whether the authenticated user follows this organization. |
| num_users (`int`, *optional*): |
| Number of members in the organization. |
| num_models (`int`, *optional*): |
| Number of models owned by the organization. |
| num_spaces (`int`, *optional*): |
| Number of Spaces owned by the organization. |
| num_datasets (`int`, *optional*): |
| Number of datasets owned by the organization. |
| num_followers (`int`, *optional*): |
| Number of followers of the organization. |
| """ |
|
|
| avatar_url: str |
| name: str |
| fullname: str |
| details: Optional[str] = None |
| is_verified: Optional[bool] = None |
| is_following: Optional[bool] = None |
| num_users: Optional[int] = None |
| num_models: Optional[int] = None |
| num_spaces: Optional[int] = None |
| num_datasets: Optional[int] = None |
| num_followers: Optional[int] = None |
|
|
| def __init__(self, **kwargs) -> None: |
| self.avatar_url = kwargs.pop("avatarUrl", "") |
| self.name = kwargs.pop("name", "") |
| self.fullname = kwargs.pop("fullname", "") |
| self.details = kwargs.pop("details", None) |
| self.is_verified = kwargs.pop("isVerified", None) |
| self.is_following = kwargs.pop("isFollowing", None) |
| self.num_users = kwargs.pop("numUsers", None) |
| self.num_models = kwargs.pop("numModels", None) |
| self.num_spaces = kwargs.pop("numSpaces", None) |
| self.num_datasets = kwargs.pop("numDatasets", None) |
| self.num_followers = kwargs.pop("numFollowers", None) |
|
|
| |
| self.__dict__.update(**kwargs) |
|
|
|
|
| @dataclass |
| class User: |
| """ |
| Contains information about a user on the Hub. |
| |
| Attributes: |
| username (`str`): |
| Name of the user on the Hub (unique). |
| fullname (`str`): |
| User's full name. |
| avatar_url (`str`): |
| URL of the user's avatar. |
| details (`str`, *optional*): |
| User's details. |
| is_following (`bool`, *optional*): |
| Whether the authenticated user is following this user. |
| is_pro (`bool`, *optional*): |
| Whether the user is a pro user. |
| num_models (`int`, *optional*): |
| Number of models created by the user. |
| num_datasets (`int`, *optional*): |
| Number of datasets created by the user. |
| num_spaces (`int`, *optional*): |
| Number of spaces created by the user. |
| num_discussions (`int`, *optional*): |
| Number of discussions initiated by the user. |
| num_papers (`int`, *optional*): |
| Number of papers authored by the user. |
| num_upvotes (`int`, *optional*): |
| Number of upvotes received by the user. |
| num_likes (`int`, *optional*): |
| Number of likes given by the user. |
| num_following (`int`, *optional*): |
| Number of users this user is following. |
| num_followers (`int`, *optional*): |
| Number of users following this user. |
| orgs (list of [`Organization`]): |
| List of organizations the user is part of. |
| """ |
|
|
| |
| username: str |
| fullname: str |
| avatar_url: str |
| details: Optional[str] = None |
| is_following: Optional[bool] = None |
| is_pro: Optional[bool] = None |
| num_models: Optional[int] = None |
| num_datasets: Optional[int] = None |
| num_spaces: Optional[int] = None |
| num_discussions: Optional[int] = None |
| num_papers: Optional[int] = None |
| num_upvotes: Optional[int] = None |
| num_likes: Optional[int] = None |
| num_following: Optional[int] = None |
| num_followers: Optional[int] = None |
| orgs: list[Organization] = field(default_factory=list) |
|
|
| def __init__(self, **kwargs) -> None: |
| self.username = kwargs.pop("user", "") |
| self.fullname = kwargs.pop("fullname", "") |
| self.avatar_url = kwargs.pop("avatarUrl", "") |
| self.is_following = kwargs.pop("isFollowing", None) |
| self.is_pro = kwargs.pop("isPro", None) |
| self.details = kwargs.pop("details", None) |
| self.num_models = kwargs.pop("numModels", None) |
| self.num_datasets = kwargs.pop("numDatasets", None) |
| self.num_spaces = kwargs.pop("numSpaces", None) |
| self.num_discussions = kwargs.pop("numDiscussions", None) |
| self.num_papers = kwargs.pop("numPapers", None) |
| self.num_upvotes = kwargs.pop("numUpvotes", None) |
| self.num_likes = kwargs.pop("numLikes", None) |
| self.num_following = kwargs.pop("numFollowing", None) |
| self.num_followers = kwargs.pop("numFollowers", None) |
| self.user_type = kwargs.pop("type", None) |
| self.orgs = [Organization(**org) for org in kwargs.pop("orgs", [])] |
|
|
| |
| self.__dict__.update(**kwargs) |
|
|
|
|
| @dataclass |
| class PaperInfo: |
| """ |
| Contains information about a paper on the Hub. |
| |
| Attributes: |
| id (`str`): |
| arXiv paper ID. |
| authors (`list[str]`, **optional**): |
| Names of paper authors |
| published_at (`datetime`, **optional**): |
| Date paper published. |
| title (`str`, **optional**): |
| Title of the paper. |
| summary (`str`, **optional**): |
| Summary of the paper. |
| upvotes (`int`, **optional**): |
| Number of upvotes for the paper on the Hub. |
| discussion_id (`str`, **optional**): |
| Discussion ID for the paper on the Hub. |
| source (`str`, **optional**): |
| Source of the paper. |
| comments (`int`, **optional**): |
| Number of comments for the paper on the Hub. |
| submitted_at (`datetime`, **optional**): |
| Date paper appeared in daily papers on the Hub. |
| submitted_by (`User`, **optional**): |
| Information about who submitted the daily paper. |
| """ |
|
|
| id: str |
| authors: Optional[list[str]] |
| published_at: Optional[datetime] |
| title: Optional[str] |
| summary: Optional[str] |
| upvotes: Optional[int] |
| discussion_id: Optional[str] |
| source: Optional[str] |
| comments: Optional[int] |
| submitted_at: Optional[datetime] |
| submitted_by: Optional[User] |
|
|
| def __init__(self, **kwargs) -> None: |
| paper = kwargs.pop("paper", {}) |
| self.id = kwargs.pop("id", None) or paper.pop("id", None) |
| authors = paper.pop("authors", None) or kwargs.pop("authors", None) |
| self.authors = [author.pop("name", None) for author in authors] if authors else None |
| published_at = paper.pop("publishedAt", None) or kwargs.pop("publishedAt", None) |
| self.published_at = parse_datetime(published_at) if published_at else None |
| self.title = kwargs.pop("title", None) |
| self.source = kwargs.pop("source", None) |
| self.summary = paper.pop("summary", None) or kwargs.pop("summary", None) |
| self.upvotes = paper.pop("upvotes", None) or kwargs.pop("upvotes", None) |
| self.discussion_id = paper.pop("discussionId", None) or kwargs.pop("discussionId", None) |
| self.comments = kwargs.pop("numComments", 0) |
| submitted_at = kwargs.pop("publishedAt", None) or kwargs.pop("submittedOnDailyAt", None) |
| self.submitted_at = parse_datetime(submitted_at) if submitted_at else None |
| submitted_by = kwargs.pop("submittedBy", None) or kwargs.pop("submittedOnDailyBy", None) |
| self.submitted_by = User(**submitted_by) if submitted_by else None |
|
|
| |
| self.__dict__.update(**kwargs) |
|
|
|
|
| @dataclass |
| class LFSFileInfo: |
| """ |
| Contains information about a file stored as LFS on a repo on the Hub. |
| |
| Used in the context of listing and permanently deleting LFS files from a repo to free-up space. |
| See [`list_lfs_files`] and [`permanently_delete_lfs_files`] for more details. |
| |
| Git LFS files are tracked using SHA-256 object IDs, rather than file paths, to optimize performance |
| This approach is necessary because a single object can be referenced by multiple paths across different commits, |
| making it impractical to search and resolve these connections. Check out [our documentation](https://huggingface.co/docs/hub/storage-limits#advanced-track-lfs-file-references) |
| to learn how to know which filename(s) is(are) associated with each SHA. |
| |
| Attributes: |
| file_oid (`str`): |
| SHA-256 object ID of the file. This is the identifier to pass when permanently deleting the file. |
| filename (`str`): |
| Possible filename for the LFS object. See the note above for more information. |
| oid (`str`): |
| OID of the LFS object. |
| pushed_at (`datetime`): |
| Date the LFS object was pushed to the repo. |
| ref (`str`, *optional*): |
| Ref where the LFS object has been pushed (if any). |
| size (`int`): |
| Size of the LFS object. |
| |
| Example: |
| ```py |
| >>> from huggingface_hub import HfApi |
| >>> api = HfApi() |
| >>> lfs_files = api.list_lfs_files("username/my-cool-repo") |
| |
| # Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`. |
| # e.g. select only LFS files in the "checkpoints" folder |
| >>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/")) |
| |
| # Permanently delete LFS files |
| >>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete) |
| ``` |
| """ |
|
|
| file_oid: str |
| filename: str |
| oid: str |
| pushed_at: datetime |
| ref: Optional[str] |
| size: int |
|
|
| def __init__(self, **kwargs) -> None: |
| self.file_oid = kwargs.pop("fileOid") |
| self.filename = kwargs.pop("filename") |
| self.oid = kwargs.pop("oid") |
| self.pushed_at = parse_datetime(kwargs.pop("pushedAt")) |
| self.ref = kwargs.pop("ref", None) |
| self.size = kwargs.pop("size") |
|
|
| |
| self.__dict__.update(**kwargs) |
|
|
|
|
| def future_compatible(fn: CallableT) -> CallableT: |
| """Wrap a method of `HfApi` to handle `run_as_future=True`. |
| |
| A method flagged as "future_compatible" will be called in a thread if `run_as_future=True` and return a |
| `concurrent.futures.Future` instance. Otherwise, it will be called normally and return the result. |
| """ |
| sig = inspect.signature(fn) |
| args_params = list(sig.parameters)[1:] |
|
|
| @wraps(fn) |
| def _inner(self, *args, **kwargs): |
| |
| if "run_as_future" in kwargs: |
| run_as_future = kwargs["run_as_future"] |
| kwargs["run_as_future"] = False |
| else: |
| run_as_future = False |
| for param, value in zip(args_params, args): |
| if param == "run_as_future": |
| run_as_future = value |
| break |
|
|
| |
| if run_as_future: |
| return self.run_as_future(fn, self, *args, **kwargs) |
|
|
| |
| return fn(self, *args, **kwargs) |
|
|
| _inner.is_future_compatible = True |
| return _inner |
|
|
|
|
| class HfApi: |
| """ |
| Client to interact with the Hugging Face Hub via HTTP. |
| |
| The client is initialized with some high-level settings used in all requests |
| made to the Hub (HF endpoint, authentication, user agents...). Using the `HfApi` |
| client is preferred but not mandatory as all of its public methods are exposed |
| directly at the root of `huggingface_hub`. |
| |
| Args: |
| endpoint (`str`, *optional*): |
| Endpoint of the Hub. Defaults to <https://huggingface.co>. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| library_name (`str`, *optional*): |
| The name of the library that is making the HTTP request. Will be added to |
| the user-agent header. Example: `"transformers"`. |
| library_version (`str`, *optional*): |
| The version of the library that is making the HTTP request. Will be added |
| to the user-agent header. Example: `"4.24.0"`. |
| user_agent (`str`, `dict`, *optional*): |
| The user agent info in the form of a dictionary or a single string. It will |
| be completed with information about the installed packages. |
| headers (`dict`, *optional*): |
| Additional headers to be sent with each request. Example: `{"X-My-Header": "value"}`. |
| Headers passed here are taking precedence over the default headers. |
| """ |
|
|
| def __init__( |
| self, |
| endpoint: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| library_name: Optional[str] = None, |
| library_version: Optional[str] = None, |
| user_agent: Union[dict, str, None] = None, |
| headers: Optional[dict[str, str]] = None, |
| ) -> None: |
| self.endpoint = endpoint if endpoint is not None else constants.ENDPOINT |
| self.token = token |
| self.library_name = library_name |
| self.library_version = library_version |
| self.user_agent = user_agent |
| self.headers = headers |
| self._thread_pool: Optional[ThreadPoolExecutor] = None |
|
|
| |
| self._whoami_cache: dict[str, dict] = {} |
|
|
| def run_as_future(self, fn: Callable[..., R], *args, **kwargs) -> Future[R]: |
| """ |
| Run a method in the background and return a Future instance. |
| |
| The main goal is to run methods without blocking the main thread (e.g. to push data during a training). |
| Background jobs are queued to preserve order but are not ran in parallel. If you need to speed-up your scripts |
| by parallelizing lots of call to the API, you must setup and use your own [ThreadPoolExecutor](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor). |
| |
| Note: Most-used methods like [`upload_file`], [`upload_folder`] and [`create_commit`] have a `run_as_future: bool` |
| argument to directly call them in the background. This is equivalent to calling `api.run_as_future(...)` on them |
| but less verbose. |
| |
| Args: |
| fn (`Callable`): |
| The method to run in the background. |
| *args, **kwargs: |
| Arguments with which the method will be called. |
| |
| Return: |
| `Future`: a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) instance to |
| get the result of the task. |
| |
| Example: |
| ```py |
| >>> from huggingface_hub import HfApi |
| >>> api = HfApi() |
| >>> future = api.run_as_future(api.whoami) # instant |
| >>> future.done() |
| False |
| >>> future.result() # wait until complete and return result |
| (...) |
| >>> future.done() |
| True |
| ``` |
| """ |
| if self._thread_pool is None: |
| self._thread_pool = ThreadPoolExecutor(max_workers=1) |
| self._thread_pool |
| return self._thread_pool.submit(fn, *args, **kwargs) |
|
|
| @validate_hf_hub_args |
| def whoami(self, token: Union[bool, str, None] = None, *, cache: bool = False) -> dict: |
| """ |
| Call HF API to know "whoami". |
| |
| If passing `cache=True`, the result will be cached for subsequent calls for the duration of the Python process. This is useful if you plan to call |
| `whoami` multiple times as this endpoint is heavily rate-limited for security reasons. |
| |
| Args: |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| cache (`bool`, *optional*): |
| Whether to cache the result of the `whoami` call for subsequent calls. |
| If an error occurs during the first call, it won't be cached. |
| Defaults to `False`. |
| """ |
| |
| token = self.token if token is None else token |
| if token is False: |
| raise ValueError("Cannot use `token=False` with `whoami` method as it requires authentication.") |
| if token is True or token is None: |
| token = get_token() |
| if token is None: |
| raise LocalTokenNotFoundError( |
| "Token is required to call the /whoami-v2 endpoint, but no token found. You must provide a token or be logged in to " |
| "Hugging Face with `hf auth login` or `huggingface_hub.login`. See https://huggingface.co/settings/tokens." |
| ) |
|
|
| if cache and (cached_token := self._whoami_cache.get(token)): |
| return cached_token |
|
|
| |
| output = self._inner_whoami(token=token) |
|
|
| |
| if cache: |
| self._whoami_cache[token] = output |
| return output |
|
|
| def _inner_whoami(self, token: str) -> dict: |
| r = get_session().get( |
| f"{self.endpoint}/api/whoami-v2", |
| headers=self._build_hf_headers(token=token), |
| ) |
| try: |
| hf_raise_for_status(r) |
| except HfHubHTTPError as e: |
| if e.response.status_code == 401: |
| error_message = "Invalid user token." |
| |
| if token == _get_token_from_google_colab(): |
| error_message += " The token from Google Colab vault is invalid. Please update it from the UI." |
| elif token == _get_token_from_environment(): |
| error_message += ( |
| " The token from HF_TOKEN environment variable is invalid. " |
| "Note that HF_TOKEN takes precedence over `hf auth login`." |
| ) |
| elif token == _get_token_from_file(): |
| error_message += " The token stored is invalid. Please run `hf auth login` to update it." |
| raise HfHubHTTPError(error_message, response=e.response) from e |
| if e.response.status_code == 429: |
| error_message = ( |
| "You've hit the rate limit for the /whoami-v2 endpoint, which is intentionally strict for security reasons." |
| " If you're calling it often, consider caching the response with `whoami(..., cache=True)`." |
| ) |
| raise HfHubHTTPError(error_message, response=e.response) from e |
| raise |
| return r.json() |
|
|
| def get_model_tags(self) -> dict: |
| """ |
| List all valid model tags as a nested namespace object |
| """ |
| path = f"{self.endpoint}/api/models-tags-by-type" |
| r = get_session().get(path) |
| hf_raise_for_status(r) |
| return r.json() |
|
|
| def get_dataset_tags(self) -> dict: |
| """ |
| List all valid dataset tags as a nested namespace object. |
| """ |
| path = f"{self.endpoint}/api/datasets-tags-by-type" |
| r = get_session().get(path) |
| hf_raise_for_status(r) |
| return r.json() |
|
|
| @validate_hf_hub_args |
| def list_models( |
| self, |
| *, |
| |
| filter: Union[str, Iterable[str], None] = None, |
| author: Optional[str] = None, |
| apps: Optional[Union[str, list[str]]] = None, |
| gated: Optional[bool] = None, |
| inference: Optional[Literal["warm"]] = None, |
| inference_provider: Optional[Union[Literal["all"], "PROVIDER_T", list["PROVIDER_T"]]] = None, |
| model_name: Optional[str] = None, |
| trained_dataset: Optional[Union[str, list[str]]] = None, |
| search: Optional[str] = None, |
| pipeline_tag: Optional[str] = None, |
| emissions_thresholds: Optional[tuple[float, float]] = None, |
| |
| sort: Union[Literal["last_modified"], str, None] = None, |
| direction: Optional[Literal[-1]] = None, |
| limit: Optional[int] = None, |
| |
| expand: Optional[list[ExpandModelProperty_T]] = None, |
| full: Optional[bool] = None, |
| cardData: bool = False, |
| fetch_config: bool = False, |
| token: Union[bool, str, None] = None, |
| ) -> Iterable[ModelInfo]: |
| """ |
| List models hosted on the Huggingface Hub, given some filters. |
| |
| Args: |
| filter (`str` or `Iterable[str]`, *optional*): |
| A string or list of string to filter models on the Hub. |
| Models can be filtered by library, language, task, tags, and more. |
| author (`str`, *optional*): |
| A string which identify the author (user or organization) of the |
| returned models. |
| apps (`str` or `List`, *optional*): |
| A string or list of strings to filter models on the Hub that |
| support the specified apps. Example values include `"ollama"` or `["ollama", "vllm"]`. |
| gated (`bool`, *optional*): |
| A boolean to filter models on the Hub that are gated or not. By default, all models are returned. |
| If `gated=True` is passed, only gated models are returned. |
| If `gated=False` is passed, only non-gated models are returned. |
| inference (`Literal["warm"]`, *optional*): |
| If "warm", filter models on the Hub currently served by at least one provider. |
| inference_provider (`Literal["all"]` or `str`, *optional*): |
| A string to filter models on the Hub that are served by a specific provider. |
| Pass `"all"` to get all models served by at least one provider. |
| model_name (`str`, *optional*): |
| A string that contain complete or partial names for models on the |
| Hub, such as "bert" or "bert-base-cased" |
| trained_dataset (`str` or `List`, *optional*): |
| A string tag or a list of string tags of the trained dataset for a |
| model on the Hub. |
| search (`str`, *optional*): |
| A string that will be contained in the returned model ids. |
| pipeline_tag (`str`, *optional*): |
| A string pipeline tag to filter models on the Hub by, such as `summarization`. |
| emissions_thresholds (`Tuple`, *optional*): |
| A tuple of two ints or floats representing a minimum and maximum |
| carbon footprint to filter the resulting models with in grams. |
| sort (`Literal["last_modified"]` or `str`, *optional*): |
| The key with which to sort the resulting models. Possible values are "last_modified", "trending_score", |
| "created_at", "downloads" and "likes". |
| direction (`Literal[-1]` or `int`, *optional*): |
| Direction in which to sort. The value `-1` sorts by descending |
| order while all other values sort by ascending order. |
| limit (`int`, *optional*): |
| The limit on the number of models fetched. Leaving this option |
| to `None` fetches all models. |
| expand (`list[ExpandModelProperty_T]`, *optional*): |
| List properties to return in the response. When used, only the properties in the list will be returned. |
| This parameter cannot be used if `full`, `cardData` or `fetch_config` are passed. |
| Possible values are `"author"`, `"cardData"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, and `"resourceGroup"`. |
| full (`bool`, *optional*): |
| Whether to fetch all model data, including the `last_modified`, |
| the `sha`, the files and the `tags`. This is set to `True` by |
| default when using a filter. |
| cardData (`bool`, *optional*): |
| Whether to grab the metadata for the model as well. Can contain |
| useful information such as carbon emissions, metrics, and |
| datasets trained on. |
| fetch_config (`bool`, *optional*): |
| Whether to fetch the model configs as well. This is not included |
| in `full` due to its size. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| |
| Returns: |
| `Iterable[ModelInfo]`: an iterable of [`huggingface_hub.hf_api.ModelInfo`] objects. |
| |
| Example: |
| |
| ```python |
| >>> from huggingface_hub import HfApi |
| |
| >>> api = HfApi() |
| |
| # List all models |
| >>> api.list_models() |
| |
| # List text classification models |
| >>> api.list_models(filter="text-classification") |
| |
| # List models from the KerasHub library |
| >>> api.list_models(filter="keras-hub") |
| |
| # List models served by Cohere |
| >>> api.list_models(inference_provider="cohere") |
| |
| # List models with "bert" in their name |
| >>> api.list_models(search="bert") |
| |
| # List models with "bert" in their name and pushed by google |
| >>> api.list_models(search="bert", author="google") |
| ``` |
| """ |
| if expand and (full or cardData or fetch_config): |
| raise ValueError("`expand` cannot be used if `full`, `cardData` or `fetch_config` are passed.") |
|
|
| if emissions_thresholds is not None and not cardData: |
| raise ValueError("`emissions_thresholds` were passed without setting `cardData=True`.") |
|
|
| path = f"{self.endpoint}/api/models" |
| headers = self._build_hf_headers(token=token) |
| params: dict[str, Any] = {} |
|
|
| |
| filter_list: list[str] = [] |
| if filter: |
| filter_list.extend([filter] if isinstance(filter, str) else filter) |
| if trained_dataset: |
| datasets = [trained_dataset] if isinstance(trained_dataset, str) else trained_dataset |
| filter_list.extend(f"dataset:{d}" if not d.startswith("dataset:") else d for d in datasets) |
| if len(filter_list) > 0: |
| params["filter"] = filter_list |
|
|
| |
| if author: |
| params["author"] = author |
| if apps: |
| if isinstance(apps, str): |
| apps = [apps] |
| params["apps"] = apps |
| if gated is not None: |
| params["gated"] = gated |
| if inference is not None: |
| params["inference"] = inference |
| if inference_provider is not None: |
| params["inference_provider"] = inference_provider |
| if pipeline_tag: |
| params["pipeline_tag"] = pipeline_tag |
| search_list = [] |
| if model_name: |
| search_list.append(model_name) |
| if search: |
| search_list.append(search) |
| if len(search_list) > 0: |
| params["search"] = search_list |
| if sort is not None: |
| params["sort"] = ( |
| "lastModified" |
| if sort == "last_modified" |
| else "trendingScore" |
| if sort == "trending_score" |
| else "createdAt" |
| if sort == "created_at" |
| else sort |
| ) |
| if direction is not None: |
| params["direction"] = direction |
| if limit is not None: |
| params["limit"] = limit |
|
|
| |
| if full: |
| params["full"] = True |
| if fetch_config: |
| params["config"] = True |
| if cardData: |
| params["cardData"] = True |
| if expand: |
| params["expand"] = expand |
|
|
| |
| items = paginate(path, params=params, headers=headers) |
| if limit is not None: |
| items = islice(items, limit) |
| for item in items: |
| if "siblings" not in item: |
| item["siblings"] = None |
| model_info = ModelInfo(**item) |
| if emissions_thresholds is None or _is_emission_within_threshold(model_info, *emissions_thresholds): |
| yield model_info |
|
|
| @_deprecate_arguments(version="1.0", deprecated_args=["tags"], custom_message="Use `filter` instead.") |
| @validate_hf_hub_args |
| def list_datasets( |
| self, |
| *, |
| |
| filter: Union[str, Iterable[str], None] = None, |
| author: Optional[str] = None, |
| benchmark: Optional[Union[str, list[str]]] = None, |
| dataset_name: Optional[str] = None, |
| gated: Optional[bool] = None, |
| language_creators: Optional[Union[str, list[str]]] = None, |
| language: Optional[Union[str, list[str]]] = None, |
| multilinguality: Optional[Union[str, list[str]]] = None, |
| size_categories: Optional[Union[str, list[str]]] = None, |
| task_categories: Optional[Union[str, list[str]]] = None, |
| task_ids: Optional[Union[str, list[str]]] = None, |
| search: Optional[str] = None, |
| |
| sort: Optional[Union[Literal["last_modified"], str]] = None, |
| direction: Optional[Literal[-1]] = None, |
| limit: Optional[int] = None, |
| |
| expand: Optional[list[ExpandDatasetProperty_T]] = None, |
| full: Optional[bool] = None, |
| token: Union[bool, str, None] = None, |
| |
| tags: Optional[Union[str, list[str]]] = None, |
| ) -> Iterable[DatasetInfo]: |
| """ |
| List datasets hosted on the Huggingface Hub, given some filters. |
| |
| Args: |
| filter (`str` or `Iterable[str]`, *optional*): |
| A string or list of string to filter datasets on the hub. |
| author (`str`, *optional*): |
| A string which identify the author of the returned datasets. |
| benchmark (`str` or `List`, *optional*): |
| A string or list of strings that can be used to identify datasets on |
| the Hub by their official benchmark. |
| dataset_name (`str`, *optional*): |
| A string or list of strings that can be used to identify datasets on |
| the Hub by its name, such as `SQAC` or `wikineural` |
| gated (`bool`, *optional*): |
| A boolean to filter datasets on the Hub that are gated or not. By default, all datasets are returned. |
| If `gated=True` is passed, only gated datasets are returned. |
| If `gated=False` is passed, only non-gated datasets are returned. |
| language_creators (`str` or `List`, *optional*): |
| A string or list of strings that can be used to identify datasets on |
| the Hub with how the data was curated, such as `crowdsourced` or |
| `machine_generated`. |
| language (`str` or `List`, *optional*): |
| A string or list of strings representing a two-character language to |
| filter datasets by on the Hub. |
| multilinguality (`str` or `List`, *optional*): |
| A string or list of strings representing a filter for datasets that |
| contain multiple languages. |
| size_categories (`str` or `List`, *optional*): |
| A string or list of strings that can be used to identify datasets on |
| the Hub by the size of the dataset such as `100K<n<1M` or |
| `1M<n<10M`. |
| tags (`str` or `List`, *optional*): |
| Deprecated. Pass tags in `filter` to filter datasets by tags. |
| task_categories (`str` or `List`, *optional*): |
| A string or list of strings that can be used to identify datasets on |
| the Hub by the designed task, such as `audio_classification` or |
| `named_entity_recognition`. |
| task_ids (`str` or `List`, *optional*): |
| A string or list of strings that can be used to identify datasets on |
| the Hub by the specific task such as `speech_emotion_recognition` or |
| `paraphrase`. |
| search (`str`, *optional*): |
| A string that will be contained in the returned datasets. |
| sort (`Literal["last_modified"]` or `str`, *optional*): |
| The key with which to sort the resulting models. Possible values are "last_modified", "trending_score", |
| "created_at", "downloads" and "likes". |
| direction (`Literal[-1]` or `int`, *optional*): |
| Direction in which to sort. The value `-1` sorts by descending |
| order while all other values sort by ascending order. |
| limit (`int`, *optional*): |
| The limit on the number of datasets fetched. Leaving this option |
| to `None` fetches all datasets. |
| expand (`list[ExpandDatasetProperty_T]`, *optional*): |
| List properties to return in the response. When used, only the properties in the list will be returned. |
| This parameter cannot be used if `full` is passed. |
| Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, and `"resourceGroup"`. |
| full (`bool`, *optional*): |
| Whether to fetch all dataset data, including the `last_modified`, |
| the `card_data` and the files. Can contain useful information such as the |
| PapersWithCode ID. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[DatasetInfo]`: an iterable of [`huggingface_hub.hf_api.DatasetInfo`] objects. |
| |
| Example usage with the `filter` argument: |
| |
| ```python |
| >>> from huggingface_hub import HfApi |
| |
| >>> api = HfApi() |
| |
| # List all datasets |
| >>> api.list_datasets() |
| |
| |
| # List only the text classification datasets |
| >>> api.list_datasets(filter="task_categories:text-classification") |
| |
| |
| # List only the datasets in russian for language modeling |
| >>> api.list_datasets( |
| ... filter=("language:ru", "task_ids:language-modeling") |
| ... ) |
| |
| # List FiftyOne datasets (identified by the tag "fiftyone" in dataset card) |
| >>> api.list_datasets(tags="fiftyone") |
| ``` |
| |
| Example usage with the `search` argument: |
| |
| ```python |
| >>> from huggingface_hub import HfApi |
| |
| >>> api = HfApi() |
| |
| # List all datasets with "text" in their name |
| >>> api.list_datasets(search="text") |
| |
| # List all datasets with "text" in their name made by google |
| >>> api.list_datasets(search="text", author="google") |
| ``` |
| """ |
| if expand and full: |
| raise ValueError("`expand` cannot be used if `full` is passed.") |
|
|
| path = f"{self.endpoint}/api/datasets" |
| headers = self._build_hf_headers(token=token) |
| params: dict[str, Any] = {} |
|
|
| |
| filter_list = [] |
| if filter is not None: |
| if isinstance(filter, str): |
| filter_list.append(filter) |
| else: |
| filter_list.extend(filter) |
| for key, value in ( |
| ("benchmark", benchmark), |
| ("language_creators", language_creators), |
| ("language", language), |
| ("multilinguality", multilinguality), |
| ("size_categories", size_categories), |
| ("task_categories", task_categories), |
| ("task_ids", task_ids), |
| ): |
| if value: |
| if isinstance(value, str): |
| value = [value] |
| for value_item in value: |
| if not value_item.startswith(f"{key}:"): |
| data = f"{key}:{value_item}" |
| filter_list.append(data) |
| if tags is not None: |
| filter_list.extend([tags] if isinstance(tags, str) else tags) |
| if len(filter_list) > 0: |
| params["filter"] = filter_list |
|
|
| |
| if author: |
| params["author"] = author |
| if gated is not None: |
| params["gated"] = gated |
| search_list = [] |
| if dataset_name: |
| search_list.append(dataset_name) |
| if search: |
| search_list.append(search) |
| if len(search_list) > 0: |
| params["search"] = search_list |
| if sort is not None: |
| params["sort"] = ( |
| "lastModified" |
| if sort == "last_modified" |
| else "trendingScore" |
| if sort == "trending_score" |
| else "createdAt" |
| if sort == "created_at" |
| else sort |
| ) |
| if direction is not None: |
| params["direction"] = direction |
| if limit is not None: |
| params["limit"] = limit |
|
|
| |
| if expand: |
| params["expand"] = expand |
| if full: |
| params["full"] = True |
|
|
| items = paginate(path, params=params, headers=headers) |
| if limit is not None: |
| items = islice(items, limit) |
| for item in items: |
| if "siblings" not in item: |
| item["siblings"] = None |
| yield DatasetInfo(**item) |
|
|
| @validate_hf_hub_args |
| def list_spaces( |
| self, |
| *, |
| |
| filter: Union[str, Iterable[str], None] = None, |
| author: Optional[str] = None, |
| search: Optional[str] = None, |
| datasets: Union[str, Iterable[str], None] = None, |
| models: Union[str, Iterable[str], None] = None, |
| linked: bool = False, |
| |
| sort: Union[Literal["last_modified"], str, None] = None, |
| direction: Optional[Literal[-1]] = None, |
| limit: Optional[int] = None, |
| |
| expand: Optional[list[ExpandSpaceProperty_T]] = None, |
| full: Optional[bool] = None, |
| token: Union[bool, str, None] = None, |
| ) -> Iterable[SpaceInfo]: |
| """ |
| List spaces hosted on the Huggingface Hub, given some filters. |
| |
| Args: |
| filter (`str` or `Iterable`, *optional*): |
| A string tag or list of tags that can be used to identify Spaces on the Hub. |
| author (`str`, *optional*): |
| A string which identify the author of the returned Spaces. |
| search (`str`, *optional*): |
| A string that will be contained in the returned Spaces. |
| datasets (`str` or `Iterable`, *optional*): |
| Whether to return Spaces that make use of a dataset. |
| The name of a specific dataset can be passed as a string. |
| models (`str` or `Iterable`, *optional*): |
| Whether to return Spaces that make use of a model. |
| The name of a specific model can be passed as a string. |
| linked (`bool`, *optional*): |
| Whether to return Spaces that make use of either a model or a dataset. |
| sort (`Literal["last_modified"]` or `str`, *optional*): |
| The key with which to sort the resulting models. Possible values are "last_modified", "trending_score", |
| "created_at" and "likes". |
| direction (`Literal[-1]` or `int`, *optional*): |
| Direction in which to sort. The value `-1` sorts by descending |
| order while all other values sort by ascending order. |
| limit (`int`, *optional*): |
| The limit on the number of Spaces fetched. Leaving this option |
| to `None` fetches all Spaces. |
| expand (`list[ExpandSpaceProperty_T]`, *optional*): |
| List properties to return in the response. When used, only the properties in the list will be returned. |
| This parameter cannot be used if `full` is passed. |
| Possible values are `"author"`, `"cardData"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"createdAt"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, and `"resourceGroup"`. |
| full (`bool`, *optional*): |
| Whether to fetch all Spaces data, including the `last_modified`, `siblings` |
| and `card_data` fields. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[SpaceInfo]`: an iterable of [`huggingface_hub.hf_api.SpaceInfo`] objects. |
| """ |
| if expand and full: |
| raise ValueError("`expand` cannot be used if `full` is passed.") |
|
|
| path = f"{self.endpoint}/api/spaces" |
| headers = self._build_hf_headers(token=token) |
| params: dict[str, Any] = {} |
| if filter is not None: |
| params["filter"] = filter |
| if author is not None: |
| params["author"] = author |
| if search is not None: |
| params["search"] = search |
| if sort is not None: |
| params["sort"] = ( |
| "lastModified" |
| if sort == "last_modified" |
| else "trendingScore" |
| if sort == "trending_score" |
| else "createdAt" |
| if sort == "created_at" |
| else sort |
| ) |
| if direction is not None: |
| params["direction"] = direction |
| if limit is not None: |
| params["limit"] = limit |
| if linked: |
| params["linked"] = True |
| if datasets is not None: |
| params["datasets"] = datasets |
| if models is not None: |
| params["models"] = models |
|
|
| |
| if expand: |
| params["expand"] = expand |
| if full: |
| params["full"] = True |
|
|
| items = paginate(path, params=params, headers=headers) |
| if limit is not None: |
| items = islice(items, limit) |
| for item in items: |
| if "siblings" not in item: |
| item["siblings"] = None |
| yield SpaceInfo(**item) |
|
|
| @validate_hf_hub_args |
| def unlike( |
| self, |
| repo_id: str, |
| *, |
| token: Union[bool, str, None] = None, |
| repo_type: Optional[str] = None, |
| ) -> None: |
| """ |
| Unlike a given repo on the Hub (e.g. remove from favorite list). |
| |
| To prevent spam usage, it is not possible to `like` a repository from a script. |
| |
| See also [`list_liked_repos`]. |
| |
| Args: |
| repo_id (`str`): |
| The repository to unlike. Example: `"user/my-cool-model"`. |
| |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if unliking a dataset or space, `None` or |
| `"model"` if unliking a model. Default is `None`. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If repository is not found (error 404): wrong repo_id/repo_type, private |
| but not authenticated or repo does not exist. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import list_liked_repos, unlike |
| >>> "gpt2" in list_liked_repos().models # we assume you have already liked gpt2 |
| True |
| >>> unlike("gpt2") |
| >>> "gpt2" in list_liked_repos().models |
| False |
| ``` |
| """ |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
| response = get_session().delete( |
| url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/like", headers=self._build_hf_headers(token=token) |
| ) |
| hf_raise_for_status(response) |
|
|
| @validate_hf_hub_args |
| def list_liked_repos( |
| self, |
| user: Optional[str] = None, |
| *, |
| token: Union[bool, str, None] = None, |
| ) -> UserLikes: |
| """ |
| List all public repos liked by a user on huggingface.co. |
| |
| This list is public so token is optional. If `user` is not passed, it defaults to |
| the logged in user. |
| |
| See also [`unlike`]. |
| |
| Args: |
| user (`str`, *optional*): |
| Name of the user for which you want to fetch the likes. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`UserLikes`]: object containing the user name and 3 lists of repo ids (1 for |
| models, 1 for datasets and 1 for Spaces). |
| |
| Raises: |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| If `user` is not passed and no token found (either from argument or from machine). |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import list_liked_repos |
| |
| >>> likes = list_liked_repos("julien-c") |
| |
| >>> likes.user |
| "julien-c" |
| |
| >>> likes.models |
| ["osanseviero/streamlit_1.15", "Xhaheen/ChatGPT_HF", ...] |
| ``` |
| """ |
| |
| if user is None: |
| me = self.whoami(token=token) |
| if me["type"] == "user": |
| user = me["name"] |
| else: |
| raise ValueError( |
| "Cannot list liked repos. You must provide a 'user' as input or be logged in as a user." |
| ) |
|
|
| path = f"{self.endpoint}/api/users/{user}/likes" |
| headers = self._build_hf_headers(token=token) |
|
|
| likes = list(paginate(path, params={}, headers=headers)) |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| return UserLikes( |
| user=user, |
| total=len(likes), |
| models=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "model"], |
| datasets=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "dataset"], |
| spaces=[like["repo"]["name"] for like in likes if like["repo"]["type"] == "space"], |
| ) |
|
|
| @validate_hf_hub_args |
| def list_repo_likers( |
| self, |
| repo_id: str, |
| *, |
| repo_type: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> Iterable[User]: |
| """ |
| List all users who liked a given repo on the hugging Face Hub. |
| |
| See also [`list_liked_repos`]. |
| |
| Args: |
| repo_id (`str`): |
| The repository to retrieve . Example: `"user/my-cool-model"`. |
| |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| |
| Returns: |
| `Iterable[User]`: an iterable of [`huggingface_hub.hf_api.User`] objects. |
| """ |
|
|
| |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
| path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/likers" |
| for liker in paginate(path, params={}, headers=self._build_hf_headers(token=token)): |
| yield User(username=liker["user"], fullname=liker["fullname"], avatar_url=liker["avatarUrl"]) |
|
|
| @validate_hf_hub_args |
| def model_info( |
| self, |
| repo_id: str, |
| *, |
| revision: Optional[str] = None, |
| timeout: Optional[float] = None, |
| securityStatus: Optional[bool] = None, |
| files_metadata: bool = False, |
| expand: Optional[list[ExpandModelProperty_T]] = None, |
| token: Union[bool, str, None] = None, |
| ) -> ModelInfo: |
| """ |
| Get info on one specific model on huggingface.co |
| |
| Model can be private if you pass an acceptable token or are logged in. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| revision (`str`, *optional*): |
| The revision of the model repository from which to get the |
| information. |
| timeout (`float`, *optional*): |
| Whether to set a timeout for the request to the Hub. |
| securityStatus (`bool`, *optional*): |
| Whether to retrieve the security status from the model |
| repository as well. The security status will be returned in the `security_repo_status` field. |
| files_metadata (`bool`, *optional*): |
| Whether or not to retrieve metadata for files in the repository |
| (size, LFS metadata, etc). Defaults to `False`. |
| expand (`list[ExpandModelProperty_T]`, *optional*): |
| List properties to return in the response. When used, only the properties in the list will be returned. |
| This parameter cannot be used if `securityStatus` or `files_metadata` are passed. |
| Possible values are `"author"`, `"baseModels"`, `"cardData"`, `"childrenModelCount"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gguf"`, `"inference"`, `"inferenceProviderMapping"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"`, `"trendingScore"`, `"widgetData"`, `"usedStorage"`, and `"resourceGroup"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`huggingface_hub.hf_api.ModelInfo`]: The model repository information. |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| > - [`~utils.RevisionNotFoundError`] |
| > If the revision to download from cannot be found. |
| """ |
| if expand and (securityStatus or files_metadata): |
| raise ValueError("`expand` cannot be used if `securityStatus` or `files_metadata` are set.") |
|
|
| headers = self._build_hf_headers(token=token) |
| path = ( |
| f"{self.endpoint}/api/models/{repo_id}" |
| if revision is None |
| else (f"{self.endpoint}/api/models/{repo_id}/revision/{quote(revision, safe='')}") |
| ) |
| params: dict = {} |
| if securityStatus: |
| params["securityStatus"] = True |
| if files_metadata: |
| params["blobs"] = True |
| if expand: |
| params["expand"] = expand |
| r = get_session().get(path, headers=headers, timeout=timeout, params=params) |
| hf_raise_for_status(r) |
| data = r.json() |
| return ModelInfo(**data) |
|
|
| @validate_hf_hub_args |
| def dataset_info( |
| self, |
| repo_id: str, |
| *, |
| revision: Optional[str] = None, |
| timeout: Optional[float] = None, |
| files_metadata: bool = False, |
| expand: Optional[list[ExpandDatasetProperty_T]] = None, |
| token: Union[bool, str, None] = None, |
| ) -> DatasetInfo: |
| """ |
| Get info on one specific dataset on huggingface.co. |
| |
| Dataset can be private if you pass an acceptable token. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| revision (`str`, *optional*): |
| The revision of the dataset repository from which to get the |
| information. |
| timeout (`float`, *optional*): |
| Whether to set a timeout for the request to the Hub. |
| files_metadata (`bool`, *optional*): |
| Whether or not to retrieve metadata for files in the repository |
| (size, LFS metadata, etc). Defaults to `False`. |
| expand (`list[ExpandDatasetProperty_T]`, *optional*): |
| List properties to return in the response. When used, only the properties in the list will be returned. |
| This parameter cannot be used if `files_metadata` is passed. |
| Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"`, `"tags"`, `"trendingScore"`,`"usedStorage"`, and `"resourceGroup"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`hf_api.DatasetInfo`]: The dataset repository information. |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| > - [`~utils.RevisionNotFoundError`] |
| > If the revision to download from cannot be found. |
| """ |
| if expand and files_metadata: |
| raise ValueError("`expand` cannot be used if `files_metadata` is set.") |
|
|
| headers = self._build_hf_headers(token=token) |
| path = ( |
| f"{self.endpoint}/api/datasets/{repo_id}" |
| if revision is None |
| else (f"{self.endpoint}/api/datasets/{repo_id}/revision/{quote(revision, safe='')}") |
| ) |
| params: dict = {} |
| if files_metadata: |
| params["blobs"] = True |
| if expand: |
| params["expand"] = expand |
|
|
| r = get_session().get(path, headers=headers, timeout=timeout, params=params) |
| hf_raise_for_status(r) |
| data = r.json() |
| return DatasetInfo(**data) |
|
|
| @validate_hf_hub_args |
| def space_info( |
| self, |
| repo_id: str, |
| *, |
| revision: Optional[str] = None, |
| timeout: Optional[float] = None, |
| files_metadata: bool = False, |
| expand: Optional[list[ExpandSpaceProperty_T]] = None, |
| token: Union[bool, str, None] = None, |
| ) -> SpaceInfo: |
| """ |
| Get info on one specific Space on huggingface.co. |
| |
| Space can be private if you pass an acceptable token. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| revision (`str`, *optional*): |
| The revision of the space repository from which to get the |
| information. |
| timeout (`float`, *optional*): |
| Whether to set a timeout for the request to the Hub. |
| files_metadata (`bool`, *optional*): |
| Whether or not to retrieve metadata for files in the repository |
| (size, LFS metadata, etc). Defaults to `False`. |
| expand (`list[ExpandSpaceProperty_T]`, *optional*): |
| List properties to return in the response. When used, only the properties in the list will be returned. |
| This parameter cannot be used if `full` is passed. |
| Possible values are `"author"`, `"cardData"`, `"createdAt"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"likes"`, `"models"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"`, `"trendingScore"`, `"usedStorage"`, and `"resourceGroup"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`~hf_api.SpaceInfo`]: The space repository information. |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| > - [`~utils.RevisionNotFoundError`] |
| > If the revision to download from cannot be found. |
| """ |
| if expand and files_metadata: |
| raise ValueError("`expand` cannot be used if `files_metadata` is set.") |
|
|
| headers = self._build_hf_headers(token=token) |
| path = ( |
| f"{self.endpoint}/api/spaces/{repo_id}" |
| if revision is None |
| else (f"{self.endpoint}/api/spaces/{repo_id}/revision/{quote(revision, safe='')}") |
| ) |
| params: dict = {} |
| if files_metadata: |
| params["blobs"] = True |
| if expand: |
| params["expand"] = expand |
|
|
| r = get_session().get(path, headers=headers, timeout=timeout, params=params) |
| hf_raise_for_status(r) |
| data = r.json() |
| return SpaceInfo(**data) |
|
|
| @validate_hf_hub_args |
| def repo_info( |
| self, |
| repo_id: str, |
| *, |
| revision: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| timeout: Optional[float] = None, |
| files_metadata: bool = False, |
| expand: Optional[Union[ExpandModelProperty_T, ExpandDatasetProperty_T, ExpandSpaceProperty_T]] = None, |
| token: Union[bool, str, None] = None, |
| ) -> Union[ModelInfo, DatasetInfo, SpaceInfo]: |
| """ |
| Get the info object for a given repo of a given type. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| revision (`str`, *optional*): |
| The revision of the repository from which to get the |
| information. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space, |
| `None` or `"model"` if getting repository info from a model. Default is `None`. |
| timeout (`float`, *optional*): |
| Whether to set a timeout for the request to the Hub. |
| expand (`ExpandModelProperty_T` or `ExpandDatasetProperty_T` or `ExpandSpaceProperty_T`, *optional*): |
| List properties to return in the response. When used, only the properties in the list will be returned. |
| This parameter cannot be used if `files_metadata` is passed. |
| For an exhaustive list of available properties, check out [`model_info`], [`dataset_info`] or [`space_info`]. |
| files_metadata (`bool`, *optional*): |
| Whether or not to retrieve metadata for files in the repository |
| (size, LFS metadata, etc). Defaults to `False`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Union[SpaceInfo, DatasetInfo, ModelInfo]`: The repository information, as a |
| [`huggingface_hub.hf_api.DatasetInfo`], [`huggingface_hub.hf_api.ModelInfo`] |
| or [`huggingface_hub.hf_api.SpaceInfo`] object. |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| > - [`~utils.RevisionNotFoundError`] |
| > If the revision to download from cannot be found. |
| """ |
| if repo_type is None or repo_type == "model": |
| method = self.model_info |
| elif repo_type == "dataset": |
| method = self.dataset_info |
| elif repo_type == "space": |
| method = self.space_info |
| else: |
| raise ValueError("Unsupported repo type.") |
| return method( |
| repo_id, |
| revision=revision, |
| token=token, |
| timeout=timeout, |
| expand=expand, |
| files_metadata=files_metadata, |
| ) |
|
|
| @validate_hf_hub_args |
| def repo_exists( |
| self, |
| repo_id: str, |
| *, |
| repo_type: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| ) -> bool: |
| """ |
| Checks if a repository exists on the Hugging Face Hub. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space, |
| `None` or `"model"` if getting repository info from a model. Default is `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| True if the repository exists, False otherwise. |
| |
| Examples: |
| ```py |
| >>> from huggingface_hub import repo_exists |
| >>> repo_exists("google/gemma-7b") |
| True |
| >>> repo_exists("google/not-a-repo") |
| False |
| ``` |
| """ |
| try: |
| self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token) |
| return True |
| except GatedRepoError: |
| return True |
| except RepositoryNotFoundError: |
| return False |
|
|
| @validate_hf_hub_args |
| def revision_exists( |
| self, |
| repo_id: str, |
| revision: str, |
| *, |
| repo_type: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| ) -> bool: |
| """ |
| Checks if a specific revision exists on a repo on the Hugging Face Hub. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| revision (`str`): |
| The revision of the repository to check. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space, |
| `None` or `"model"` if getting repository info from a model. Default is `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| True if the repository and the revision exists, False otherwise. |
| |
| Examples: |
| ```py |
| >>> from huggingface_hub import revision_exists |
| >>> revision_exists("google/gemma-7b", "float16") |
| True |
| >>> revision_exists("google/gemma-7b", "not-a-revision") |
| False |
| ``` |
| """ |
| try: |
| self.repo_info(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token) |
| return True |
| except RevisionNotFoundError: |
| return False |
| except RepositoryNotFoundError: |
| return False |
|
|
| @validate_hf_hub_args |
| def file_exists( |
| self, |
| repo_id: str, |
| filename: str, |
| *, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| ) -> bool: |
| """ |
| Checks if a file exists in a repository on the Hugging Face Hub. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| filename (`str`): |
| The name of the file to check, for example: |
| `"config.json"` |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if getting repository info from a dataset or a space, |
| `None` or `"model"` if getting repository info from a model. Default is `None`. |
| revision (`str`, *optional*): |
| The revision of the repository from which to get the information. Defaults to `"main"` branch. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| True if the file exists, False otherwise. |
| |
| Examples: |
| ```py |
| >>> from huggingface_hub import file_exists |
| >>> file_exists("bigcode/starcoder", "config.json") |
| True |
| >>> file_exists("bigcode/starcoder", "not-a-file") |
| False |
| >>> file_exists("bigcode/not-a-repo", "config.json") |
| False |
| ``` |
| """ |
| url = hf_hub_url( |
| repo_id=repo_id, repo_type=repo_type, revision=revision, filename=filename, endpoint=self.endpoint |
| ) |
| try: |
| if token is None: |
| token = self.token |
| get_hf_file_metadata(url, token=token) |
| return True |
| except GatedRepoError: |
| raise |
| except (RepositoryNotFoundError, RemoteEntryNotFoundError, RevisionNotFoundError): |
| return False |
|
|
| @validate_hf_hub_args |
| def list_repo_files( |
| self, |
| repo_id: str, |
| *, |
| revision: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| ) -> list[str]: |
| """ |
| Get the list of files in a given repo. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated by a `/`. |
| revision (`str`, *optional*): |
| The revision of the repository from which to get the information. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or space, `None` or `"model"` if uploading to |
| a model. Default is `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `list[str]`: the list of files in a given repository. |
| """ |
| return [ |
| f.rfilename |
| for f in self.list_repo_tree( |
| repo_id=repo_id, recursive=True, revision=revision, repo_type=repo_type, token=token |
| ) |
| if isinstance(f, RepoFile) |
| ] |
|
|
| @validate_hf_hub_args |
| def list_repo_tree( |
| self, |
| repo_id: str, |
| path_in_repo: Optional[str] = None, |
| *, |
| recursive: bool = False, |
| expand: bool = False, |
| revision: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| ) -> Iterable[Union[RepoFile, RepoFolder]]: |
| """ |
| List a repo tree's files and folders and get information about them. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated by a `/`. |
| path_in_repo (`str`, *optional*): |
| Relative path of the tree (folder) in the repo, for example: |
| `"checkpoints/1fec34a/results"`. Will default to the root tree (folder) of the repository. |
| recursive (`bool`, *optional*, defaults to `False`): |
| Whether to list tree's files and folders recursively. |
| expand (`bool`, *optional*, defaults to `False`): |
| Whether to fetch more information about the tree's files and folders (e.g. last commit and files' security scan results). This |
| operation is more expensive for the server so only 50 results are returned per page (instead of 1000). |
| As pagination is implemented in `huggingface_hub`, this is transparent for you except for the time it |
| takes to get the results. |
| revision (`str`, *optional*): |
| The revision of the repository from which to get the tree. Defaults to `"main"` branch. |
| repo_type (`str`, *optional*): |
| The type of the repository from which to get the tree (`"model"`, `"dataset"` or `"space"`. |
| Defaults to `"model"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[Union[RepoFile, RepoFolder]]`: |
| The information about the tree's files and folders, as an iterable of [`RepoFile`] and [`RepoFolder`] objects. The order of the files and folders is |
| not guaranteed. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo |
| does not exist. |
| [`~utils.RevisionNotFoundError`]: |
| If revision is not found (error 404) on the repo. |
| [`~utils.RemoteEntryNotFoundError`]: |
| If the tree (folder) does not exist (error 404) on the repo. |
| |
| Examples: |
| |
| Get information about a repo's tree. |
| ```py |
| >>> from huggingface_hub import list_repo_tree |
| >>> repo_tree = list_repo_tree("lysandre/arxiv-nlp") |
| >>> repo_tree |
| <generator object HfApi.list_repo_tree at 0x7fa4088e1ac0> |
| >>> list(repo_tree) |
| [ |
| RepoFile(path='.gitattributes', size=391, blob_id='ae8c63daedbd4206d7d40126955d4e6ab1c80f8f', lfs=None, last_commit=None, security=None), |
| RepoFile(path='README.md', size=391, blob_id='43bd404b159de6fba7c2f4d3264347668d43af25', lfs=None, last_commit=None, security=None), |
| RepoFile(path='config.json', size=554, blob_id='2f9618c3a19b9a61add74f70bfb121335aeef666', lfs=None, last_commit=None, security=None), |
| RepoFile( |
| path='flax_model.msgpack', size=497764107, blob_id='8095a62ccb4d806da7666fcda07467e2d150218e', |
| lfs={'size': 497764107, 'sha256': 'd88b0d6a6ff9c3f8151f9d3228f57092aaea997f09af009eefd7373a77b5abb9', 'pointer_size': 134}, last_commit=None, security=None |
| ), |
| RepoFile(path='merges.txt', size=456318, blob_id='226b0752cac7789c48f0cb3ec53eda48b7be36cc', lfs=None, last_commit=None, security=None), |
| RepoFile( |
| path='pytorch_model.bin', size=548123560, blob_id='64eaa9c526867e404b68f2c5d66fd78e27026523', |
| lfs={'size': 548123560, 'sha256': '9be78edb5b928eba33aa88f431551348f7466ba9f5ef3daf1d552398722a5436', 'pointer_size': 134}, last_commit=None, security=None |
| ), |
| RepoFile(path='vocab.json', size=898669, blob_id='b00361fece0387ca34b4b8b8539ed830d644dbeb', lfs=None, last_commit=None, security=None)] |
| ] |
| ``` |
| |
| Get even more information about a repo's tree (last commit and files' security scan results) |
| ```py |
| >>> from huggingface_hub import list_repo_tree |
| >>> repo_tree = list_repo_tree("prompthero/openjourney-v4", expand=True) |
| >>> list(repo_tree) |
| [ |
| RepoFolder( |
| path='feature_extractor', |
| tree_id='aa536c4ea18073388b5b0bc791057a7296a00398', |
| last_commit={ |
| 'oid': '47b62b20b20e06b9de610e840282b7e6c3d51190', |
| 'title': 'Upload diffusers weights (#48)', |
| 'date': datetime.datetime(2023, 3, 21, 9, 5, 27, tzinfo=datetime.timezone.utc) |
| } |
| ), |
| RepoFolder( |
| path='safety_checker', |
| tree_id='65aef9d787e5557373fdf714d6c34d4fcdd70440', |
| last_commit={ |
| 'oid': '47b62b20b20e06b9de610e840282b7e6c3d51190', |
| 'title': 'Upload diffusers weights (#48)', |
| 'date': datetime.datetime(2023, 3, 21, 9, 5, 27, tzinfo=datetime.timezone.utc) |
| } |
| ), |
| RepoFile( |
| path='model_index.json', |
| size=582, |
| blob_id='d3d7c1e8c3e78eeb1640b8e2041ee256e24c9ee1', |
| lfs=None, |
| last_commit={ |
| 'oid': 'b195ed2d503f3eb29637050a886d77bd81d35f0e', |
| 'title': 'Fix deprecation warning by changing `CLIPFeatureExtractor` to `CLIPImageProcessor`. (#54)', |
| 'date': datetime.datetime(2023, 5, 15, 21, 41, 59, tzinfo=datetime.timezone.utc) |
| }, |
| security={ |
| 'safe': True, |
| 'av_scan': {'virusFound': False, 'virusNames': None}, |
| 'pickle_import_scan': None |
| } |
| ) |
| ... |
| ] |
| ``` |
| """ |
| repo_type = repo_type or constants.REPO_TYPE_MODEL |
| revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION |
| headers = self._build_hf_headers(token=token) |
|
|
| encoded_path_in_repo = "/" + quote(path_in_repo, safe="") if path_in_repo else "" |
| tree_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tree/{revision}{encoded_path_in_repo}" |
| for path_info in paginate(path=tree_url, headers=headers, params={"recursive": recursive, "expand": expand}): |
| yield (RepoFile(**path_info) if path_info["type"] == "file" else RepoFolder(**path_info)) |
|
|
| @validate_hf_hub_args |
| def verify_repo_checksums( |
| self, |
| repo_id: str, |
| *, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| local_dir: Optional[Union[str, Path]] = None, |
| cache_dir: Optional[Union[str, Path]] = None, |
| token: Union[str, bool, None] = None, |
| ) -> "FolderVerification": |
| """ |
| Verify local files for a repo against Hub checksums. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated by a `/`. |
| repo_type (`str`, *optional*): |
| The type of the repository from which to get the tree (`"model"`, `"dataset"` or `"space"`. |
| Defaults to `"model"`. |
| revision (`str`, *optional*): |
| The revision of the repository from which to get the tree. Defaults to `"main"` branch. |
| local_dir (`str` or `Path`, *optional*): |
| The local directory to verify. |
| cache_dir (`str` or `Path`, *optional*): |
| The cache directory to verify. |
| token (Union[bool, str, None], optional): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`FolderVerification`]: a structured result containing the verification details. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo |
| does not exist. |
| [`~utils.RevisionNotFoundError`]: |
| If revision is not found (error 404) on the repo. |
| |
| """ |
|
|
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
|
|
| if local_dir is not None and cache_dir is not None: |
| raise ValueError("Pass either `local_dir` or `cache_dir`, not both.") |
|
|
| root, remote_revision = resolve_local_root( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| revision=revision, |
| cache_dir=Path(cache_dir) if cache_dir is not None else None, |
| local_dir=Path(local_dir) if local_dir is not None else None, |
| ) |
| local_by_path = collect_local_files(root) |
|
|
| |
| remote_by_path: dict[str, Union[RepoFile, RepoFolder]] = {} |
| for entry in self.list_repo_tree( |
| repo_id=repo_id, recursive=True, revision=remote_revision, repo_type=repo_type, token=token |
| ): |
| remote_by_path[entry.path] = entry |
|
|
| return verify_maps( |
| remote_by_path=remote_by_path, |
| local_by_path=local_by_path, |
| revision=remote_revision, |
| verified_path=root, |
| ) |
|
|
| @validate_hf_hub_args |
| def list_repo_refs( |
| self, |
| repo_id: str, |
| *, |
| repo_type: Optional[str] = None, |
| include_pull_requests: bool = False, |
| token: Union[str, bool, None] = None, |
| ) -> GitRefs: |
| """ |
| Get the list of refs of a given repo (both tags and branches). |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if listing refs from a dataset or a Space, |
| `None` or `"model"` if listing from a model. Default is `None`. |
| include_pull_requests (`bool`, *optional*): |
| Whether to include refs from pull requests in the list. Defaults to `False`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Example: |
| ```py |
| >>> from huggingface_hub import HfApi |
| >>> api = HfApi() |
| >>> api.list_repo_refs("gpt2") |
| GitRefs(branches=[GitRefInfo(name='main', ref='refs/heads/main', target_commit='e7da7f221d5bf496a48136c0cd264e630fe9fcc8')], converts=[], tags=[]) |
| |
| >>> api.list_repo_refs("bigcode/the-stack", repo_type='dataset') |
| GitRefs( |
| branches=[ |
| GitRefInfo(name='main', ref='refs/heads/main', target_commit='18edc1591d9ce72aa82f56c4431b3c969b210ae3'), |
| GitRefInfo(name='v1.1.a1', ref='refs/heads/v1.1.a1', target_commit='f9826b862d1567f3822d3d25649b0d6d22ace714') |
| ], |
| converts=[], |
| tags=[ |
| GitRefInfo(name='v1.0', ref='refs/tags/v1.0', target_commit='c37a8cd1e382064d8aced5e05543c5f7753834da') |
| ] |
| ) |
| ``` |
| |
| Returns: |
| [`GitRefs`]: object containing all information about branches and tags for a |
| repo on the Hub. |
| """ |
| repo_type = repo_type or constants.REPO_TYPE_MODEL |
| response = get_session().get( |
| f"{self.endpoint}/api/{repo_type}s/{repo_id}/refs", |
| headers=self._build_hf_headers(token=token), |
| params={"include_prs": 1} if include_pull_requests else {}, |
| ) |
| hf_raise_for_status(response) |
| data = response.json() |
|
|
| def _format_as_git_ref_info(item: dict) -> GitRefInfo: |
| return GitRefInfo(name=item["name"], ref=item["ref"], target_commit=item["targetCommit"]) |
|
|
| return GitRefs( |
| branches=[_format_as_git_ref_info(item) for item in data["branches"]], |
| converts=[_format_as_git_ref_info(item) for item in data["converts"]], |
| tags=[_format_as_git_ref_info(item) for item in data["tags"]], |
| pull_requests=[_format_as_git_ref_info(item) for item in data["pullRequests"]] |
| if include_pull_requests |
| else None, |
| ) |
|
|
| @validate_hf_hub_args |
| def list_repo_commits( |
| self, |
| repo_id: str, |
| *, |
| repo_type: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| revision: Optional[str] = None, |
| formatted: bool = False, |
| ) -> list[GitCommitInfo]: |
| """ |
| Get the list of commits of a given revision for a repo on the Hub. |
| |
| Commits are sorted by date (last commit first). |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated by a `/`. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if listing commits from a dataset or a Space, `None` or `"model"` if |
| listing from a model. Default is `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| revision (`str`, *optional*): |
| The git revision to commit from. Defaults to the head of the `"main"` branch. |
| formatted (`bool`): |
| Whether to return the HTML-formatted title and description of the commits. Defaults to False. |
| |
| Example: |
| ```py |
| >>> from huggingface_hub import HfApi |
| >>> api = HfApi() |
| |
| # Commits are sorted by date (last commit first) |
| >>> initial_commit = api.list_repo_commits("gpt2")[-1] |
| |
| # Initial commit is always a system commit containing the `.gitattributes` file. |
| >>> initial_commit |
| GitCommitInfo( |
| commit_id='9b865efde13a30c13e0a33e536cf3e4a5a9d71d8', |
| authors=['system'], |
| created_at=datetime.datetime(2019, 2, 18, 10, 36, 15, tzinfo=datetime.timezone.utc), |
| title='initial commit', |
| message='', |
| formatted_title=None, |
| formatted_message=None |
| ) |
| |
| # Create an empty branch by deriving from initial commit |
| >>> api.create_branch("gpt2", "new_empty_branch", revision=initial_commit.commit_id) |
| ``` |
| |
| Returns: |
| list[[`GitCommitInfo`]]: list of objects containing information about the commits for a repo on the Hub. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo |
| does not exist. |
| [`~utils.RevisionNotFoundError`]: |
| If revision is not found (error 404) on the repo. |
| """ |
| repo_type = repo_type or constants.REPO_TYPE_MODEL |
| revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION |
|
|
| |
| return [ |
| GitCommitInfo( |
| commit_id=item["id"], |
| authors=[author["user"] for author in item["authors"]], |
| created_at=parse_datetime(item["date"]), |
| title=item["title"], |
| message=item["message"], |
| formatted_title=item.get("formatted", {}).get("title"), |
| formatted_message=item.get("formatted", {}).get("message"), |
| ) |
| for item in paginate( |
| f"{self.endpoint}/api/{repo_type}s/{repo_id}/commits/{revision}", |
| headers=self._build_hf_headers(token=token), |
| params={"expand[]": "formatted"} if formatted else {}, |
| ) |
| ] |
|
|
| @validate_hf_hub_args |
| def get_paths_info( |
| self, |
| repo_id: str, |
| paths: Union[list[str], str], |
| *, |
| expand: bool = False, |
| revision: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| ) -> list[Union[RepoFile, RepoFolder]]: |
| """ |
| Get information about a repo's paths. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated by a `/`. |
| paths (`Union[list[str], str]`, *optional*): |
| The paths to get information about. If a path do not exist, it is ignored without raising |
| an exception. |
| expand (`bool`, *optional*, defaults to `False`): |
| Whether to fetch more information about the paths (e.g. last commit and files' security scan results). This |
| operation is more expensive for the server so only 50 results are returned per page (instead of 1000). |
| As pagination is implemented in `huggingface_hub`, this is transparent for you except for the time it |
| takes to get the results. |
| revision (`str`, *optional*): |
| The revision of the repository from which to get the information. Defaults to `"main"` branch. |
| repo_type (`str`, *optional*): |
| The type of the repository from which to get the information (`"model"`, `"dataset"` or `"space"`. |
| Defaults to `"model"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `list[Union[RepoFile, RepoFolder]]`: |
| The information about the paths, as a list of [`RepoFile`] and [`RepoFolder`] objects. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo |
| does not exist. |
| [`~utils.RevisionNotFoundError`]: |
| If revision is not found (error 404) on the repo. |
| |
| Example: |
| ```py |
| >>> from huggingface_hub import get_paths_info |
| >>> paths_info = get_paths_info("allenai/c4", ["README.md", "en"], repo_type="dataset") |
| >>> paths_info |
| [ |
| RepoFile(path='README.md', size=2379, blob_id='f84cb4c97182890fc1dbdeaf1a6a468fd27b4fff', lfs=None, last_commit=None, security=None), |
| RepoFolder(path='en', tree_id='dc943c4c40f53d02b31ced1defa7e5f438d5862e', last_commit=None) |
| ] |
| ``` |
| """ |
| repo_type = repo_type or constants.REPO_TYPE_MODEL |
| revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION |
| headers = self._build_hf_headers(token=token) |
|
|
| response = get_session().post( |
| f"{self.endpoint}/api/{repo_type}s/{repo_id}/paths-info/{revision}", |
| data={ |
| "paths": paths if isinstance(paths, list) else [paths], |
| "expand": expand, |
| }, |
| headers=headers, |
| ) |
| hf_raise_for_status(response) |
| paths_info = response.json() |
| return [ |
| RepoFile(**path_info) if path_info["type"] == "file" else RepoFolder(**path_info) |
| for path_info in paths_info |
| ] |
|
|
| @validate_hf_hub_args |
| def super_squash_history( |
| self, |
| repo_id: str, |
| *, |
| branch: Optional[str] = None, |
| commit_message: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| ) -> None: |
| """Squash commit history on a branch for a repo on the Hub. |
| |
| Squashing the repo history is useful when you know you'll make hundreds of commits and you don't want to |
| clutter the history. Squashing commits can only be performed from the head of a branch. |
| |
| > [!WARNING] |
| > Once squashed, the commit history cannot be retrieved. This is a non-revertible operation. |
| |
| > [!WARNING] |
| > Once the history of a branch has been squashed, it is not possible to merge it back into another branch since |
| > their history will have diverged. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated by a `/`. |
| branch (`str`, *optional*): |
| The branch to squash. Defaults to the head of the `"main"` branch. |
| commit_message (`str`, *optional*): |
| The commit message to use for the squashed commit. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if listing commits from a dataset or a Space, `None` or `"model"` if |
| listing from a model. Default is `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If repository is not found (error 404): wrong repo_id/repo_type, private but not authenticated or repo |
| does not exist. |
| [`~utils.RevisionNotFoundError`]: |
| If the branch to squash cannot be found. |
| [`~utils.BadRequestError`]: |
| If invalid reference for a branch. You cannot squash history on tags. |
| |
| Example: |
| ```py |
| >>> from huggingface_hub import HfApi |
| >>> api = HfApi() |
| |
| # Create repo |
| >>> repo_id = api.create_repo("test-squash").repo_id |
| |
| # Make a lot of commits. |
| >>> api.upload_file(repo_id=repo_id, path_in_repo="file.txt", path_or_fileobj=b"content") |
| >>> api.upload_file(repo_id=repo_id, path_in_repo="lfs.bin", path_or_fileobj=b"content") |
| >>> api.upload_file(repo_id=repo_id, path_in_repo="file.txt", path_or_fileobj=b"another_content") |
| |
| # Squash history |
| >>> api.super_squash_history(repo_id=repo_id) |
| ``` |
| """ |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError("Invalid repo type") |
| if branch is None: |
| branch = constants.DEFAULT_REVISION |
|
|
| |
| url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/super-squash/{quote(branch, safe='')}" |
| headers = self._build_hf_headers(token=token) |
| commit_message = commit_message or f"Super-squash branch '{branch}' using huggingface_hub" |
|
|
| |
| response = get_session().post(url=url, headers=headers, json={"message": commit_message}) |
| hf_raise_for_status(response) |
|
|
| @validate_hf_hub_args |
| def list_lfs_files( |
| self, |
| repo_id: str, |
| *, |
| repo_type: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> Iterable[LFSFileInfo]: |
| """ |
| List all LFS files in a repo on the Hub. |
| |
| This is primarily useful to count how much storage a repo is using and to eventually clean up large files |
| with [`permanently_delete_lfs_files`]. Note that this would be a permanent action that will affect all commits |
| referencing this deleted files and that cannot be undone. |
| |
| Args: |
| repo_id (`str`): |
| The repository for which you are listing LFS files. |
| repo_type (`str`, *optional*): |
| Type of repository. Set to `"dataset"` or `"space"` if listing from a dataset or space, `None` or |
| `"model"` if listing from a model. Default is `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[LFSFileInfo]`: An iterator of [`LFSFileInfo`] objects. |
| |
| Example: |
| ```py |
| >>> from huggingface_hub import HfApi |
| >>> api = HfApi() |
| >>> lfs_files = api.list_lfs_files("username/my-cool-repo") |
| |
| # Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`. |
| # e.g. select only LFS files in the "checkpoints" folder |
| >>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/")) |
| |
| # Permanently delete LFS files |
| >>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete) |
| ``` |
| """ |
| |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
| url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/lfs-files" |
| headers = self._build_hf_headers(token=token) |
|
|
| |
| for item in paginate(url, params={}, headers=headers): |
| yield LFSFileInfo(**item) |
|
|
| @validate_hf_hub_args |
| def permanently_delete_lfs_files( |
| self, |
| repo_id: str, |
| lfs_files: Iterable[LFSFileInfo], |
| *, |
| rewrite_history: bool = True, |
| repo_type: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> None: |
| """ |
| Permanently delete LFS files from a repo on the Hub. |
| |
| > [!WARNING] |
| > This is a permanent action that will affect all commits referencing the deleted files and might corrupt your |
| > repository. This is a non-revertible operation. Use it only if you know what you are doing. |
| |
| Args: |
| repo_id (`str`): |
| The repository for which you are listing LFS files. |
| lfs_files (`Iterable[LFSFileInfo]`): |
| An iterable of [`LFSFileInfo`] items to permanently delete from the repo. Use [`list_lfs_files`] to list |
| all LFS files from a repo. |
| rewrite_history (`bool`, *optional*, default to `True`): |
| Whether to rewrite repository history to remove file pointers referencing the deleted LFS files (recommended). |
| repo_type (`str`, *optional*): |
| Type of repository. Set to `"dataset"` or `"space"` if listing from a dataset or space, `None` or |
| `"model"` if listing from a model. Default is `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Example: |
| ```py |
| >>> from huggingface_hub import HfApi |
| >>> api = HfApi() |
| >>> lfs_files = api.list_lfs_files("username/my-cool-repo") |
| |
| # Filter files files to delete based on a combination of `filename`, `pushed_at`, `ref` or `size`. |
| # e.g. select only LFS files in the "checkpoints" folder |
| >>> lfs_files_to_delete = (lfs_file for lfs_file in lfs_files if lfs_file.filename.startswith("checkpoints/")) |
| |
| # Permanently delete LFS files |
| >>> api.permanently_delete_lfs_files("username/my-cool-repo", lfs_files_to_delete) |
| ``` |
| """ |
| |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
| url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/lfs-files/batch" |
| headers = self._build_hf_headers(token=token) |
|
|
| |
| for batch in chunk_iterable(lfs_files, 1000): |
| shas = [item.file_oid for item in batch] |
| if len(shas) == 0: |
| return |
| payload = { |
| "deletions": { |
| "sha": shas, |
| "rewriteHistory": rewrite_history, |
| } |
| } |
| response = get_session().post(url, headers=headers, json=payload) |
| hf_raise_for_status(response) |
|
|
| @validate_hf_hub_args |
| def create_repo( |
| self, |
| repo_id: str, |
| *, |
| token: Union[str, bool, None] = None, |
| private: Optional[bool] = None, |
| repo_type: Optional[str] = None, |
| exist_ok: bool = False, |
| resource_group_id: Optional[str] = None, |
| space_sdk: Optional[str] = None, |
| space_hardware: Optional[SpaceHardware] = None, |
| space_storage: Optional[SpaceStorage] = None, |
| space_sleep_time: Optional[int] = None, |
| space_secrets: Optional[list[dict[str, str]]] = None, |
| space_variables: Optional[list[dict[str, str]]] = None, |
| ) -> RepoUrl: |
| """Create an empty repo on the HuggingFace Hub. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| private (`bool`, *optional*): |
| Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| exist_ok (`bool`, *optional*, defaults to `False`): |
| If `True`, do not raise an error if repo already exists. |
| resource_group_id (`str`, *optional*): |
| Resource group in which to create the repo. Resource groups is only available for Enterprise Hub organizations and |
| allow to define which members of the organization can access the resource. The ID of a resource group |
| can be found in the URL of the resource's page on the Hub (e.g. `"66670e5163145ca562cb1988"`). |
| To learn more about resource groups, see https://huggingface.co/docs/hub/en/security-resource-groups. |
| space_sdk (`str`, *optional*): |
| Choice of SDK to use if repo_type is "space". Can be "streamlit", "gradio", "docker", or "static". |
| space_hardware (`SpaceHardware` or `str`, *optional*): |
| Choice of Hardware if repo_type is "space". See [`SpaceHardware`] for a complete list. |
| space_storage (`SpaceStorage` or `str`, *optional*): |
| Choice of persistent storage tier. Example: `"small"`. See [`SpaceStorage`] for a complete list. |
| space_sleep_time (`int`, *optional*): |
| Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want |
| your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure |
| the sleep time (value is fixed to 48 hours of inactivity). |
| See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details. |
| space_secrets (`list[dict[str, str]]`, *optional*): |
| A list of secret keys to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional. |
| For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets. |
| space_variables (`list[dict[str, str]]`, *optional*): |
| A list of public environment variables to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional. |
| For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables. |
| |
| Returns: |
| [`RepoUrl`]: URL to the newly created repo. Value is a subclass of `str` containing |
| attributes like `endpoint`, `repo_type` and `repo_id`. |
| """ |
| organization, name = repo_id.split("/") if "/" in repo_id else (None, repo_id) |
|
|
| path = f"{self.endpoint}/api/repos/create" |
|
|
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError("Invalid repo type") |
|
|
| json: dict[str, Any] = {"name": name, "organization": organization} |
| if private is not None: |
| json["private"] = private |
| if repo_type is not None: |
| json["type"] = repo_type |
| if repo_type == "space": |
| if space_sdk is None: |
| raise ValueError( |
| "No space_sdk provided. `create_repo` expects space_sdk to be one" |
| f" of {constants.SPACES_SDK_TYPES} when repo_type is 'space'`" |
| ) |
| if space_sdk not in constants.SPACES_SDK_TYPES: |
| raise ValueError(f"Invalid space_sdk. Please choose one of {constants.SPACES_SDK_TYPES}.") |
| json["sdk"] = space_sdk |
|
|
| if space_sdk is not None and repo_type != "space": |
| warnings.warn("Ignoring provided space_sdk because repo_type is not 'space'.") |
|
|
| function_args = [ |
| "space_hardware", |
| "space_storage", |
| "space_sleep_time", |
| "space_secrets", |
| "space_variables", |
| ] |
| json_keys = ["hardware", "storageTier", "sleepTimeSeconds", "secrets", "variables"] |
| values = [space_hardware, space_storage, space_sleep_time, space_secrets, space_variables] |
|
|
| if repo_type == "space": |
| json.update({k: v for k, v in zip(json_keys, values) if v is not None}) |
| else: |
| provided_space_args = [key for key, value in zip(function_args, values) if value is not None] |
|
|
| if provided_space_args: |
| warnings.warn(f"Ignoring provided {', '.join(provided_space_args)} because repo_type is not 'space'.") |
|
|
| if resource_group_id is not None: |
| json["resourceGroupId"] = resource_group_id |
|
|
| headers = self._build_hf_headers(token=token) |
| while True: |
| r = get_session().post(path, headers=headers, json=json) |
| if r.status_code == 409 and "Cannot create repo: another conflicting operation is in progress" in r.text: |
| |
| |
| |
| |
| |
| |
| logger.debug("Create repo failed due to a concurrency issue. Retrying...") |
| continue |
| break |
|
|
| try: |
| hf_raise_for_status(r) |
| except HfHubHTTPError as err: |
| if exist_ok and err.response.status_code == 409: |
| |
| pass |
| elif exist_ok and err.response.status_code == 403: |
| |
| try: |
| self.repo_info(repo_id=repo_id, repo_type=repo_type, token=token) |
| if repo_type is None or repo_type == constants.REPO_TYPE_MODEL: |
| return RepoUrl(f"{self.endpoint}/{repo_id}") |
| return RepoUrl(f"{self.endpoint}/{repo_type}/{repo_id}") |
| except HfHubHTTPError: |
| raise err |
| else: |
| raise |
|
|
| d = r.json() |
| return RepoUrl(d["url"], endpoint=self.endpoint) |
|
|
| @validate_hf_hub_args |
| def delete_repo( |
| self, |
| repo_id: str, |
| *, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| missing_ok: bool = False, |
| ) -> None: |
| """ |
| Delete a repo from the HuggingFace Hub. CAUTION: this is irreversible. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. |
| missing_ok (`bool`, *optional*, defaults to `False`): |
| If `True`, do not raise an error if repo does not exist. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`] |
| If the repository to delete from cannot be found and `missing_ok` is set to False (default). |
| """ |
| organization, name = repo_id.split("/") if "/" in repo_id else (None, repo_id) |
|
|
| path = f"{self.endpoint}/api/repos/delete" |
|
|
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError("Invalid repo type") |
|
|
| json = {"name": name, "organization": organization} |
| if repo_type is not None: |
| json["type"] = repo_type |
|
|
| headers = self._build_hf_headers(token=token) |
| r = get_session().request("DELETE", path, headers=headers, json=json) |
| try: |
| hf_raise_for_status(r) |
| except RepositoryNotFoundError: |
| if not missing_ok: |
| raise |
|
|
| @validate_hf_hub_args |
| def update_repo_settings( |
| self, |
| repo_id: str, |
| *, |
| gated: Optional[Literal["auto", "manual", False]] = None, |
| private: Optional[bool] = None, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| ) -> None: |
| """ |
| Update the settings of a repository, including gated access and visibility. |
| |
| To give more control over how repos are used, the Hub allows repo authors to enable |
| access requests for their repos, and also to set the visibility of the repo to private. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated by a /. |
| gated (`Literal["auto", "manual", False]`, *optional*): |
| The gated status for the repository. If set to `None` (default), the `gated` setting of the repository won't be updated. |
| * "auto": The repository is gated, and access requests are automatically approved or denied based on predefined criteria. |
| * "manual": The repository is gated, and access requests require manual approval. |
| * False : The repository is not gated, and anyone can access it. |
| private (`bool`, *optional*): |
| Whether the repository should be private. |
| token (`Union[str, bool, None]`, *optional*): |
| A valid user access token (string). Defaults to the locally saved token, |
| which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass False. |
| repo_type (`str`, *optional*): |
| The type of the repository to update settings from (`"model"`, `"dataset"` or `"space"`). |
| Defaults to `"model"`. |
| Raises: |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| If gated is not one of "auto", "manual", or False. |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| If repo_type is not one of the values in constants.REPO_TYPES. |
| [`~utils.HfHubHTTPError`]: |
| If the request to the Hugging Face Hub API fails. |
| [`~utils.RepositoryNotFoundError`] |
| If the repository to download from cannot be found. This may be because it doesn't exist, |
| or because it is set to `private` and you do not have access. |
| """ |
|
|
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
|
|
| |
| payload: dict = {} |
|
|
| if gated is not None: |
| if gated not in ["auto", "manual", False]: |
| raise ValueError(f"Invalid gated status, must be one of 'auto', 'manual', or False. Got '{gated}'.") |
| payload["gated"] = gated |
|
|
| if private is not None: |
| payload["private"] = private |
|
|
| if len(payload) == 0: |
| raise ValueError("At least one setting must be updated.") |
|
|
| |
| headers = self._build_hf_headers(token=token) |
|
|
| r = get_session().put( |
| url=f"{self.endpoint}/api/{repo_type}s/{repo_id}/settings", |
| headers=headers, |
| json=payload, |
| ) |
| hf_raise_for_status(r) |
|
|
| def move_repo( |
| self, |
| from_id: str, |
| to_id: str, |
| *, |
| repo_type: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| ): |
| """ |
| Moving a repository from namespace1/repo_name1 to namespace2/repo_name2 |
| |
| Note there are certain limitations. For more information about moving |
| repositories, please see |
| https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo. |
| |
| Args: |
| from_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. Original repository identifier. |
| to_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. Final repository identifier. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| """ |
| if len(from_id.split("/")) != 2: |
| raise ValueError(f"Invalid repo_id: {from_id}. It should have a namespace (:namespace:/:repo_name:)") |
|
|
| if len(to_id.split("/")) != 2: |
| raise ValueError(f"Invalid repo_id: {to_id}. It should have a namespace (:namespace:/:repo_name:)") |
|
|
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
|
|
| json = {"fromRepo": from_id, "toRepo": to_id, "type": repo_type} |
|
|
| path = f"{self.endpoint}/api/repos/move" |
| headers = self._build_hf_headers(token=token) |
| r = get_session().post(path, headers=headers, json=json) |
| try: |
| hf_raise_for_status(r) |
| except HfHubHTTPError as e: |
| e.append_to_message( |
| "\nFor additional documentation please see" |
| " https://hf.co/docs/hub/repositories-settings#renaming-or-transferring-a-repo." |
| ) |
| raise |
|
|
| @overload |
| def create_commit( |
| self, |
| repo_id: str, |
| operations: Iterable[CommitOperation], |
| *, |
| commit_message: str, |
| commit_description: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| num_threads: int = 5, |
| parent_commit: Optional[str] = None, |
| run_as_future: Literal[False] = ..., |
| ) -> CommitInfo: ... |
|
|
| @overload |
| def create_commit( |
| self, |
| repo_id: str, |
| operations: Iterable[CommitOperation], |
| *, |
| commit_message: str, |
| commit_description: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| num_threads: int = 5, |
| parent_commit: Optional[str] = None, |
| run_as_future: Literal[True] = ..., |
| ) -> Future[CommitInfo]: ... |
|
|
| @validate_hf_hub_args |
| @future_compatible |
| def create_commit( |
| self, |
| repo_id: str, |
| operations: Iterable[CommitOperation], |
| *, |
| commit_message: str, |
| commit_description: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| num_threads: int = 5, |
| parent_commit: Optional[str] = None, |
| run_as_future: bool = False, |
| ) -> Union[CommitInfo, Future[CommitInfo]]: |
| """ |
| Creates a commit in the given repo, deleting & uploading files as needed. |
| |
| > [!WARNING] |
| > The input list of `CommitOperation` will be mutated during the commit process. Do not reuse the same objects |
| > for multiple commits. |
| |
| > [!WARNING] |
| > `create_commit` assumes that the repo already exists on the Hub. If you get a |
| > Client error 404, please make sure you are authenticated and that `repo_id` and |
| > `repo_type` are set correctly. If repo does not exist, create it first using |
| > [`~hf_api.create_repo`]. |
| |
| > [!WARNING] |
| > `create_commit` is limited to 25k LFS files and a 1GB payload for regular files. |
| |
| Args: |
| repo_id (`str`): |
| The repository in which the commit will be created, for example: |
| `"username/custom_transformers"` |
| |
| operations (`Iterable` of [`~hf_api.CommitOperation`]): |
| An iterable of operations to include in the commit, either: |
| |
| - [`~hf_api.CommitOperationAdd`] to upload a file |
| - [`~hf_api.CommitOperationDelete`] to delete a file |
| - [`~hf_api.CommitOperationCopy`] to copy a file |
| |
| Operation objects will be mutated to include information relative to the upload. Do not reuse the |
| same objects for multiple commits. |
| |
| commit_message (`str`): |
| The summary (first line) of the commit that will be created. |
| |
| commit_description (`str`, *optional*): |
| The description of the commit that will be created |
| |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| |
| revision (`str`, *optional*): |
| The git revision to commit from. Defaults to the head of the `"main"` branch. |
| |
| create_pr (`boolean`, *optional*): |
| Whether or not to create a Pull Request with that commit. Defaults to `False`. |
| If `revision` is not set, PR is opened against the `"main"` branch. If |
| `revision` is set and is a branch, PR is opened against this branch. If |
| `revision` is set and is not a branch name (example: a commit oid), an |
| `RevisionNotFoundError` is returned by the server. |
| |
| num_threads (`int`, *optional*): |
| Number of concurrent threads for uploading files. Defaults to 5. |
| Setting it to 2 means at most 2 files will be uploaded concurrently. |
| |
| parent_commit (`str`, *optional*): |
| The OID / SHA of the parent commit, as a hexadecimal string. |
| Shorthands (7 first characters) are also supported. If specified and `create_pr` is `False`, |
| the commit will fail if `revision` does not point to `parent_commit`. If specified and `create_pr` |
| is `True`, the pull request will be created from `parent_commit`. Specifying `parent_commit` |
| ensures the repo has not changed before committing the changes, and can be especially useful |
| if the repo is updated / committed to concurrently. |
| run_as_future (`bool`, *optional*): |
| Whether or not to run this method in the background. Background jobs are run sequentially without |
| blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) |
| object. Defaults to `False`. |
| |
| Returns: |
| [`CommitInfo`] or `Future`: |
| Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit |
| url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will |
| contain the result when executed. |
| |
| Raises: |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| If commit message is empty. |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| If parent commit is not a valid commit OID. |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| If a README.md file with an invalid metadata section is committed. In this case, the commit will fail |
| early, before trying to upload any file. |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| If `create_pr` is `True` and revision is neither `None` nor `"main"`. |
| [`~utils.RepositoryNotFoundError`]: |
| If repository is not found (error 404): wrong repo_id/repo_type, private |
| but not authenticated or repo does not exist. |
| """ |
| if parent_commit is not None and not constants.REGEX_COMMIT_OID.fullmatch(parent_commit): |
| raise ValueError( |
| f"`parent_commit` is not a valid commit OID. It must match the following regex: {constants.REGEX_COMMIT_OID}" |
| ) |
|
|
| if commit_message is None or len(commit_message) == 0: |
| raise ValueError("`commit_message` can't be empty, please pass a value.") |
|
|
| commit_description = commit_description if commit_description is not None else "" |
| repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
| unquoted_revision = revision or constants.DEFAULT_REVISION |
| revision = quote(unquoted_revision, safe="") |
| create_pr = create_pr if create_pr is not None else False |
|
|
| headers = self._build_hf_headers(token=token) |
|
|
| operations = list(operations) |
| additions = [op for op in operations if isinstance(op, CommitOperationAdd)] |
| copies = [op for op in operations if isinstance(op, CommitOperationCopy)] |
| nb_additions = len(additions) |
| nb_copies = len(copies) |
| nb_deletions = len(operations) - nb_additions - nb_copies |
|
|
| for addition in additions: |
| if addition._is_committed: |
| raise ValueError( |
| f"CommitOperationAdd {addition} has already being committed and cannot be reused. Please create a" |
| " new CommitOperationAdd object if you want to create a new commit." |
| ) |
|
|
| if repo_type != "dataset": |
| for addition in additions: |
| if addition.path_in_repo.endswith((".arrow", ".parquet")): |
| warnings.warn( |
| f"It seems that you are about to commit a data file ({addition.path_in_repo}) to a {repo_type}" |
| " repository. You are sure this is intended? If you are trying to upload a dataset, please" |
| " set `repo_type='dataset'` or `--repo-type=dataset` in a CLI." |
| ) |
|
|
| logger.debug( |
| f"About to commit to the hub: {len(additions)} addition(s), {len(copies)} copie(s) and" |
| f" {nb_deletions} deletion(s)." |
| ) |
|
|
| |
| |
| for addition in additions: |
| if addition.path_in_repo == "README.md": |
| with addition.as_file() as file: |
| content = file.read().decode() |
| self._validate_yaml(content, repo_type=repo_type, token=token) |
| |
| break |
|
|
| |
| _warn_on_overwriting_operations(operations) |
|
|
| self.preupload_lfs_files( |
| repo_id=repo_id, |
| additions=additions, |
| token=token, |
| repo_type=repo_type, |
| revision=unquoted_revision, |
| create_pr=create_pr, |
| num_threads=num_threads, |
| free_memory=False, |
| ) |
|
|
| files_to_copy = _fetch_files_to_copy( |
| copies=copies, |
| repo_type=repo_type, |
| repo_id=repo_id, |
| headers=headers, |
| revision=unquoted_revision, |
| endpoint=self.endpoint, |
| ) |
| |
| operations_without_no_op = [] |
| for operation in operations: |
| if ( |
| isinstance(operation, CommitOperationAdd) |
| and operation._remote_oid is not None |
| and operation._remote_oid == operation._local_oid |
| ): |
| |
| logger.debug(f"Skipping upload for '{operation.path_in_repo}' as the file has not changed.") |
| continue |
| if ( |
| isinstance(operation, CommitOperationCopy) |
| and operation._dest_oid is not None |
| and operation._dest_oid == operation._src_oid |
| ): |
| |
| logger.debug( |
| f"Skipping copy for '{operation.src_path_in_repo}' -> '{operation.path_in_repo}' as the content of the source file is the same as the destination file." |
| ) |
| continue |
| operations_without_no_op.append(operation) |
| if len(operations) != len(operations_without_no_op): |
| logger.info( |
| f"Removing {len(operations) - len(operations_without_no_op)} file(s) from commit that have not changed." |
| ) |
|
|
| |
| if len(operations_without_no_op) == 0: |
| logger.warning("No files have been modified since last commit. Skipping to prevent empty commit.") |
|
|
| |
| try: |
| info = self.repo_info(repo_id=repo_id, repo_type=repo_type, revision=unquoted_revision, token=token) |
| except RepositoryNotFoundError as e: |
| e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE) |
| raise |
|
|
| |
| url_prefix = self.endpoint |
| if repo_type is not None and repo_type != constants.REPO_TYPE_MODEL: |
| url_prefix = f"{url_prefix}/{repo_type}s" |
| return CommitInfo( |
| commit_url=f"{url_prefix}/{repo_id}/commit/{info.sha}", |
| commit_message=commit_message, |
| commit_description=commit_description, |
| oid=info.sha, |
| ) |
|
|
| commit_payload = _prepare_commit_payload( |
| operations=operations, |
| files_to_copy=files_to_copy, |
| commit_message=commit_message, |
| commit_description=commit_description, |
| parent_commit=parent_commit, |
| ) |
| commit_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/commit/{revision}" |
|
|
| def _payload_as_ndjson() -> Iterable[bytes]: |
| for item in commit_payload: |
| yield json.dumps(item).encode() |
| yield b"\n" |
|
|
| headers = { |
| |
| "Content-Type": "application/x-ndjson", |
| **headers, |
| } |
| data = b"".join(_payload_as_ndjson()) |
| params = {"create_pr": "1"} if create_pr else None |
|
|
| try: |
| commit_resp = get_session().post(url=commit_url, headers=headers, content=data, params=params) |
| hf_raise_for_status(commit_resp, endpoint_name="commit") |
| except RepositoryNotFoundError as e: |
| e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE) |
| raise |
| except RemoteEntryNotFoundError as e: |
| if nb_deletions > 0 and "A file with this name doesn't exist" in str(e): |
| e.append_to_message( |
| "\nMake sure to differentiate file and folder paths in delete" |
| " operations with a trailing '/' or using `is_folder=True/False`." |
| ) |
| raise |
|
|
| |
| for addition in additions: |
| addition._is_committed = True |
|
|
| commit_data = commit_resp.json() |
| return CommitInfo( |
| commit_url=commit_data["commitUrl"], |
| commit_message=commit_message, |
| commit_description=commit_description, |
| oid=commit_data["commitOid"], |
| pr_url=commit_data["pullRequestUrl"] if create_pr else None, |
| ) |
|
|
| def preupload_lfs_files( |
| self, |
| repo_id: str, |
| additions: Iterable[CommitOperationAdd], |
| *, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| num_threads: int = 5, |
| free_memory: bool = True, |
| gitignore_content: Optional[str] = None, |
| ): |
| """Pre-upload LFS files to S3 in preparation on a future commit. |
| |
| This method is useful if you are generating the files to upload on-the-fly and you don't want to store them |
| in memory before uploading them all at once. |
| |
| > [!WARNING] |
| > This is a power-user method. You shouldn't need to call it directly to make a normal commit. |
| > Use [`create_commit`] directly instead. |
| |
| > [!WARNING] |
| > Commit operations will be mutated during the process. In particular, the attached `path_or_fileobj` will be |
| > removed after the upload to save memory (and replaced by an empty `bytes` object). Do not reuse the same |
| > objects except to pass them to [`create_commit`]. If you don't want to remove the attached content from the |
| > commit operation object, pass `free_memory=False`. |
| |
| Args: |
| repo_id (`str`): |
| The repository in which you will commit the files, for example: `"username/custom_transformers"`. |
| |
| operations (`Iterable` of [`CommitOperationAdd`]): |
| The list of files to upload. Warning: the objects in this list will be mutated to include information |
| relative to the upload. Do not reuse the same objects for multiple commits. |
| |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| repo_type (`str`, *optional*): |
| The type of repository to upload to (e.g. `"model"` -default-, `"dataset"` or `"space"`). |
| |
| revision (`str`, *optional*): |
| The git revision to commit from. Defaults to the head of the `"main"` branch. |
| |
| create_pr (`boolean`, *optional*): |
| Whether or not you plan to create a Pull Request with that commit. Defaults to `False`. |
| |
| num_threads (`int`, *optional*): |
| Number of concurrent threads for uploading files. Defaults to 5. |
| Setting it to 2 means at most 2 files will be uploaded concurrently. |
| |
| gitignore_content (`str`, *optional*): |
| The content of the `.gitignore` file to know which files should be ignored. The order of priority |
| is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present |
| in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub |
| (if any). |
| |
| Example: |
| ```py |
| >>> from huggingface_hub import CommitOperationAdd, preupload_lfs_files, create_commit, create_repo |
| |
| >>> repo_id = create_repo("test_preupload").repo_id |
| |
| # Generate and preupload LFS files one by one |
| >>> operations = [] # List of all `CommitOperationAdd` objects that will be generated |
| >>> for i in range(5): |
| ... content = ... # generate binary content |
| ... addition = CommitOperationAdd(path_in_repo=f"shard_{i}_of_5.bin", path_or_fileobj=content) |
| ... preupload_lfs_files(repo_id, additions=[addition]) # upload + free memory |
| ... operations.append(addition) |
| |
| # Create commit |
| >>> create_commit(repo_id, operations=operations, commit_message="Commit all shards") |
| ``` |
| """ |
| repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
| revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION |
| create_pr = create_pr if create_pr is not None else False |
| headers = self._build_hf_headers(token=token) |
|
|
| |
| additions = list(additions) |
| if gitignore_content is None: |
| for addition in additions: |
| if addition.path_in_repo == ".gitignore": |
| with addition.as_file() as f: |
| gitignore_content = f.read().decode() |
| break |
|
|
| |
| new_additions = [addition for addition in additions if not addition._is_uploaded] |
|
|
| |
| |
| additions_no_upload_mode = [addition for addition in new_additions if addition._upload_mode is None] |
| if len(additions_no_upload_mode) > 0: |
| try: |
| _fetch_upload_modes( |
| additions=additions_no_upload_mode, |
| repo_type=repo_type, |
| repo_id=repo_id, |
| headers=headers, |
| revision=revision, |
| endpoint=self.endpoint, |
| create_pr=create_pr or False, |
| gitignore_content=gitignore_content, |
| ) |
| except RepositoryNotFoundError as e: |
| e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE) |
| raise |
|
|
| |
| new_lfs_additions = [addition for addition in new_additions if addition._upload_mode == "lfs"] |
|
|
| |
| new_lfs_additions_to_upload = [] |
| for addition in new_lfs_additions: |
| if addition._should_ignore: |
| logger.debug(f"Skipping upload for LFS file '{addition.path_in_repo}' (ignored by gitignore file).") |
| else: |
| new_lfs_additions_to_upload.append(addition) |
| if len(new_lfs_additions) != len(new_lfs_additions_to_upload): |
| logger.info( |
| f"Skipped upload for {len(new_lfs_additions) - len(new_lfs_additions_to_upload)} LFS file(s) " |
| "(ignored by gitignore file)." |
| ) |
| |
| if len(new_lfs_additions_to_upload) == 0: |
| logger.debug("No LFS files to upload.") |
| return |
| |
| upload_kwargs = { |
| "additions": new_lfs_additions_to_upload, |
| "repo_type": repo_type, |
| "repo_id": repo_id, |
| "headers": headers, |
| "endpoint": self.endpoint, |
| |
| |
| |
| "revision": revision if not create_pr else None, |
| } |
| _upload_files(**upload_kwargs, num_threads=num_threads, create_pr=create_pr) |
| for addition in new_lfs_additions_to_upload: |
| addition._is_uploaded = True |
| if free_memory: |
| addition.path_or_fileobj = b"" |
|
|
| @overload |
| def upload_file( |
| self, |
| *, |
| path_or_fileobj: Union[str, Path, bytes, BinaryIO], |
| path_in_repo: str, |
| repo_id: str, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| commit_message: Optional[str] = None, |
| commit_description: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| parent_commit: Optional[str] = None, |
| run_as_future: Literal[False] = ..., |
| ) -> CommitInfo: ... |
|
|
| @overload |
| def upload_file( |
| self, |
| *, |
| path_or_fileobj: Union[str, Path, bytes, BinaryIO], |
| path_in_repo: str, |
| repo_id: str, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| commit_message: Optional[str] = None, |
| commit_description: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| parent_commit: Optional[str] = None, |
| run_as_future: Literal[True] = ..., |
| ) -> Future[CommitInfo]: ... |
|
|
| @validate_hf_hub_args |
| @future_compatible |
| def upload_file( |
| self, |
| *, |
| path_or_fileobj: Union[str, Path, bytes, BinaryIO], |
| path_in_repo: str, |
| repo_id: str, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| commit_message: Optional[str] = None, |
| commit_description: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| parent_commit: Optional[str] = None, |
| run_as_future: bool = False, |
| ) -> Union[CommitInfo, Future[CommitInfo]]: |
| """ |
| Upload a local file (up to 50 GB) to the given repo. The upload is done |
| through a HTTP post request, and doesn't require git or git-lfs to be |
| installed. |
| |
| Args: |
| path_or_fileobj (`str`, `Path`, `bytes`, or `IO`): |
| Path to a file on the local machine or binary data stream / |
| fileobj / buffer. |
| path_in_repo (`str`): |
| Relative filepath in the repo, for example: |
| `"checkpoints/1fec34a/weights.bin"` |
| repo_id (`str`): |
| The repository to which the file will be uploaded, for example: |
| `"username/custom_transformers"` |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| revision (`str`, *optional*): |
| The git revision to commit from. Defaults to the head of the `"main"` branch. |
| commit_message (`str`, *optional*): |
| The summary / title / first line of the generated commit |
| commit_description (`str` *optional*) |
| The description of the generated commit |
| create_pr (`boolean`, *optional*): |
| Whether or not to create a Pull Request with that commit. Defaults to `False`. |
| If `revision` is not set, PR is opened against the `"main"` branch. If |
| `revision` is set and is a branch, PR is opened against this branch. If |
| `revision` is set and is not a branch name (example: a commit oid), an |
| `RevisionNotFoundError` is returned by the server. |
| parent_commit (`str`, *optional*): |
| The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. |
| If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. |
| If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. |
| Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be |
| especially useful if the repo is updated / committed to concurrently. |
| run_as_future (`bool`, *optional*): |
| Whether or not to run this method in the background. Background jobs are run sequentially without |
| blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) |
| object. Defaults to `False`. |
| |
| |
| Returns: |
| [`CommitInfo`] or `Future`: |
| Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit |
| url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will |
| contain the result when executed. |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the HuggingFace API returned an error |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if some parameter value is invalid |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| > - [`~utils.RevisionNotFoundError`] |
| > If the revision to download from cannot be found. |
| |
| > [!WARNING] |
| > `upload_file` assumes that the repo already exists on the Hub. If you get a |
| > Client error 404, please make sure you are authenticated and that `repo_id` and |
| > `repo_type` are set correctly. If repo does not exist, create it first using |
| > [`~hf_api.create_repo`]. |
| |
| Example: |
| |
| ```python |
| >>> from huggingface_hub import upload_file |
| |
| >>> with open("./local/filepath", "rb") as fobj: |
| ... upload_file( |
| ... path_or_fileobj=fileobj, |
| ... path_in_repo="remote/file/path.h5", |
| ... repo_id="username/my-dataset", |
| ... repo_type="dataset", |
| ... token="my_token", |
| ... ) |
| |
| >>> upload_file( |
| ... path_or_fileobj=".\\\\local\\\\file\\\\path", |
| ... path_in_repo="remote/file/path.h5", |
| ... repo_id="username/my-model", |
| ... token="my_token", |
| ... ) |
| |
| >>> upload_file( |
| ... path_or_fileobj=".\\\\local\\\\file\\\\path", |
| ... path_in_repo="remote/file/path.h5", |
| ... repo_id="username/my-model", |
| ... token="my_token", |
| ... create_pr=True, |
| ... ) |
| ``` |
| """ |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
|
|
| commit_message = ( |
| commit_message if commit_message is not None else f"Upload {path_in_repo} with huggingface_hub" |
| ) |
| operation = CommitOperationAdd( |
| path_or_fileobj=path_or_fileobj, |
| path_in_repo=path_in_repo, |
| ) |
|
|
| return self.create_commit( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| operations=[operation], |
| commit_message=commit_message, |
| commit_description=commit_description, |
| token=token, |
| revision=revision, |
| create_pr=create_pr, |
| parent_commit=parent_commit, |
| ) |
|
|
| @overload |
| def upload_folder( |
| self, |
| *, |
| repo_id: str, |
| folder_path: Union[str, Path], |
| path_in_repo: Optional[str] = None, |
| commit_message: Optional[str] = None, |
| commit_description: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| parent_commit: Optional[str] = None, |
| allow_patterns: Optional[Union[list[str], str]] = None, |
| ignore_patterns: Optional[Union[list[str], str]] = None, |
| delete_patterns: Optional[Union[list[str], str]] = None, |
| run_as_future: Literal[False] = ..., |
| ) -> CommitInfo: ... |
|
|
| @overload |
| def upload_folder( |
| self, |
| *, |
| repo_id: str, |
| folder_path: Union[str, Path], |
| path_in_repo: Optional[str] = None, |
| commit_message: Optional[str] = None, |
| commit_description: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| parent_commit: Optional[str] = None, |
| allow_patterns: Optional[Union[list[str], str]] = None, |
| ignore_patterns: Optional[Union[list[str], str]] = None, |
| delete_patterns: Optional[Union[list[str], str]] = None, |
| run_as_future: Literal[True] = ..., |
| ) -> Future[CommitInfo]: ... |
|
|
| @validate_hf_hub_args |
| @future_compatible |
| def upload_folder( |
| self, |
| *, |
| repo_id: str, |
| folder_path: Union[str, Path], |
| path_in_repo: Optional[str] = None, |
| commit_message: Optional[str] = None, |
| commit_description: Optional[str] = None, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| parent_commit: Optional[str] = None, |
| allow_patterns: Optional[Union[list[str], str]] = None, |
| ignore_patterns: Optional[Union[list[str], str]] = None, |
| delete_patterns: Optional[Union[list[str], str]] = None, |
| run_as_future: bool = False, |
| ) -> Union[CommitInfo, Future[CommitInfo]]: |
| """ |
| Upload a local folder to the given repo. The upload is done through a HTTP requests, and doesn't require git or |
| git-lfs to be installed. |
| |
| The structure of the folder will be preserved. Files with the same name already present in the repository will |
| be overwritten. Others will be left untouched. |
| |
| Use the `allow_patterns` and `ignore_patterns` arguments to specify which files to upload. These parameters |
| accept either a single pattern or a list of patterns. Patterns are Standard Wildcards (globbing patterns) as |
| documented [here](https://tldp.org/LDP/GNU-Linux-Tools-Summary/html/x11655.htm). If both `allow_patterns` and |
| `ignore_patterns` are provided, both constraints apply. By default, all files from the folder are uploaded. |
| |
| Use the `delete_patterns` argument to specify remote files you want to delete. Input type is the same as for |
| `allow_patterns` (see above). If `path_in_repo` is also provided, the patterns are matched against paths |
| relative to this folder. For example, `upload_folder(..., path_in_repo="experiment", delete_patterns="logs/*")` |
| will delete any remote file under `./experiment/logs/`. Note that the `.gitattributes` file will not be deleted |
| even if it matches the patterns. |
| |
| Any `.git/` folder present in any subdirectory will be ignored. However, please be aware that the `.gitignore` |
| file is not taken into account. |
| |
| Uses `HfApi.create_commit` under the hood. |
| |
| Args: |
| repo_id (`str`): |
| The repository to which the file will be uploaded, for example: |
| `"username/custom_transformers"` |
| folder_path (`str` or `Path`): |
| Path to the folder to upload on the local file system |
| path_in_repo (`str`, *optional*): |
| Relative path of the directory in the repo, for example: |
| `"checkpoints/1fec34a/results"`. Will default to the root folder of the repository. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| revision (`str`, *optional*): |
| The git revision to commit from. Defaults to the head of the `"main"` branch. |
| commit_message (`str`, *optional*): |
| The summary / title / first line of the generated commit. Defaults to: |
| `f"Upload {path_in_repo} with huggingface_hub"` |
| commit_description (`str` *optional*): |
| The description of the generated commit |
| create_pr (`boolean`, *optional*): |
| Whether or not to create a Pull Request with that commit. Defaults to `False`. If `revision` is not |
| set, PR is opened against the `"main"` branch. If `revision` is set and is a branch, PR is opened |
| against this branch. If `revision` is set and is not a branch name (example: a commit oid), an |
| `RevisionNotFoundError` is returned by the server. |
| parent_commit (`str`, *optional*): |
| The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. |
| If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. |
| If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. |
| Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be |
| especially useful if the repo is updated / committed to concurrently. |
| allow_patterns (`list[str]` or `str`, *optional*): |
| If provided, only files matching at least one pattern are uploaded. |
| ignore_patterns (`list[str]` or `str`, *optional*): |
| If provided, files matching any of the patterns are not uploaded. |
| delete_patterns (`list[str]` or `str`, *optional*): |
| If provided, remote files matching any of the patterns will be deleted from the repo while committing |
| new files. This is useful if you don't know which files have already been uploaded. |
| Note: to avoid discrepancies the `.gitattributes` file is not deleted even if it matches the pattern. |
| run_as_future (`bool`, *optional*): |
| Whether or not to run this method in the background. Background jobs are run sequentially without |
| blocking the main thread. Passing `run_as_future=True` will return a [Future](https://docs.python.org/3/library/concurrent.futures.html#future-objects) |
| object. Defaults to `False`. |
| |
| Returns: |
| [`CommitInfo`] or `Future`: |
| Instance of [`CommitInfo`] containing information about the newly created commit (commit hash, commit |
| url, pr url, commit message,...). If `run_as_future=True` is passed, returns a Future object which will |
| contain the result when executed. |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the HuggingFace API returned an error |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if some parameter value is invalid |
| |
| > [!WARNING] |
| > `upload_folder` assumes that the repo already exists on the Hub. If you get a Client error 404, please make |
| > sure you are authenticated and that `repo_id` and `repo_type` are set correctly. If repo does not exist, create |
| > it first using [`~hf_api.create_repo`]. |
| |
| > [!TIP] |
| > When dealing with a large folder (thousands of files or hundreds of GB), we recommend using [`~hf_api.upload_large_folder`] instead. |
| |
| Example: |
| |
| ```python |
| # Upload checkpoints folder except the log files |
| >>> upload_folder( |
| ... folder_path="local/checkpoints", |
| ... path_in_repo="remote/experiment/checkpoints", |
| ... repo_id="username/my-dataset", |
| ... repo_type="datasets", |
| ... token="my_token", |
| ... ignore_patterns="**/logs/*.txt", |
| ... ) |
| |
| # Upload checkpoints folder including logs while deleting existing logs from the repo |
| # Useful if you don't know exactly which log files have already being pushed |
| >>> upload_folder( |
| ... folder_path="local/checkpoints", |
| ... path_in_repo="remote/experiment/checkpoints", |
| ... repo_id="username/my-dataset", |
| ... repo_type="datasets", |
| ... token="my_token", |
| ... delete_patterns="**/logs/*.txt", |
| ... ) |
| |
| # Upload checkpoints folder while creating a PR |
| >>> upload_folder( |
| ... folder_path="local/checkpoints", |
| ... path_in_repo="remote/experiment/checkpoints", |
| ... repo_id="username/my-dataset", |
| ... repo_type="datasets", |
| ... token="my_token", |
| ... create_pr=True, |
| ... ) |
| ``` |
| """ |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
|
|
| |
| if path_in_repo is None: |
| path_in_repo = "" |
|
|
| |
| if ignore_patterns is None: |
| ignore_patterns = [] |
| elif isinstance(ignore_patterns, str): |
| ignore_patterns = [ignore_patterns] |
| ignore_patterns += DEFAULT_IGNORE_PATTERNS |
|
|
| delete_operations = self._prepare_folder_deletions( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| revision=constants.DEFAULT_REVISION if create_pr else revision, |
| token=token, |
| path_in_repo=path_in_repo, |
| delete_patterns=delete_patterns, |
| ) |
| add_operations = self._prepare_upload_folder_additions( |
| folder_path, |
| path_in_repo, |
| allow_patterns=allow_patterns, |
| ignore_patterns=ignore_patterns, |
| token=token, |
| repo_type=repo_type, |
| ) |
|
|
| |
| if len(add_operations) > 0: |
| added_paths = set(op.path_in_repo for op in add_operations) |
| delete_operations = [ |
| delete_op for delete_op in delete_operations if delete_op.path_in_repo not in added_paths |
| ] |
| commit_operations = delete_operations + add_operations |
|
|
| commit_message = commit_message or "Upload folder using huggingface_hub" |
|
|
| return self.create_commit( |
| repo_type=repo_type, |
| repo_id=repo_id, |
| operations=commit_operations, |
| commit_message=commit_message, |
| commit_description=commit_description, |
| token=token, |
| revision=revision, |
| create_pr=create_pr, |
| parent_commit=parent_commit, |
| ) |
|
|
| @validate_hf_hub_args |
| def delete_file( |
| self, |
| path_in_repo: str, |
| repo_id: str, |
| *, |
| token: Union[str, bool, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| commit_message: Optional[str] = None, |
| commit_description: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| parent_commit: Optional[str] = None, |
| ) -> CommitInfo: |
| """ |
| Deletes a file in the given repo. |
| |
| Args: |
| path_in_repo (`str`): |
| Relative filepath in the repo, for example: |
| `"checkpoints/1fec34a/weights.bin"` |
| repo_id (`str`): |
| The repository from which the file will be deleted, for example: |
| `"username/custom_transformers"` |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if the file is in a dataset or |
| space, `None` or `"model"` if in a model. Default is `None`. |
| revision (`str`, *optional*): |
| The git revision to commit from. Defaults to the head of the `"main"` branch. |
| commit_message (`str`, *optional*): |
| The summary / title / first line of the generated commit. Defaults to |
| `f"Delete {path_in_repo} with huggingface_hub"`. |
| commit_description (`str` *optional*) |
| The description of the generated commit |
| create_pr (`boolean`, *optional*): |
| Whether or not to create a Pull Request with that commit. Defaults to `False`. |
| If `revision` is not set, PR is opened against the `"main"` branch. If |
| `revision` is set and is a branch, PR is opened against this branch. If |
| `revision` is set and is not a branch name (example: a commit oid), an |
| `RevisionNotFoundError` is returned by the server. |
| parent_commit (`str`, *optional*): |
| The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. |
| If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. |
| If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. |
| Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be |
| especially useful if the repo is updated / committed to concurrently. |
| |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the HuggingFace API returned an error |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if some parameter value is invalid |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| > - [`~utils.RevisionNotFoundError`] |
| > If the revision to download from cannot be found. |
| > - [`~utils.EntryNotFoundError`] |
| > If the file to download cannot be found. |
| |
| """ |
| commit_message = ( |
| commit_message if commit_message is not None else f"Delete {path_in_repo} with huggingface_hub" |
| ) |
|
|
| operations = [CommitOperationDelete(path_in_repo=path_in_repo)] |
|
|
| return self.create_commit( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| token=token, |
| operations=operations, |
| revision=revision, |
| commit_message=commit_message, |
| commit_description=commit_description, |
| create_pr=create_pr, |
| parent_commit=parent_commit, |
| ) |
|
|
| @validate_hf_hub_args |
| def delete_files( |
| self, |
| repo_id: str, |
| delete_patterns: list[str], |
| *, |
| token: Union[bool, str, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| commit_message: Optional[str] = None, |
| commit_description: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| parent_commit: Optional[str] = None, |
| ) -> CommitInfo: |
| """ |
| Delete files from a repository on the Hub. |
| |
| If a folder path is provided, the entire folder is deleted as well as |
| all files it contained. |
| |
| Args: |
| repo_id (`str`): |
| The repository from which the folder will be deleted, for example: |
| `"username/custom_transformers"` |
| delete_patterns (`list[str]`): |
| List of files or folders to delete. Each string can either be |
| a file path, a folder path or a Unix shell-style wildcard. |
| E.g. `["file.txt", "folder/", "data/*.parquet"]` |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| to the stored token. |
| repo_type (`str`, *optional*): |
| Type of the repo to delete files from. Can be `"model"`, |
| `"dataset"` or `"space"`. Defaults to `"model"`. |
| revision (`str`, *optional*): |
| The git revision to commit from. Defaults to the head of the `"main"` branch. |
| commit_message (`str`, *optional*): |
| The summary (first line) of the generated commit. Defaults to |
| `f"Delete files using huggingface_hub"`. |
| commit_description (`str` *optional*) |
| The description of the generated commit. |
| create_pr (`boolean`, *optional*): |
| Whether or not to create a Pull Request with that commit. Defaults to `False`. |
| If `revision` is not set, PR is opened against the `"main"` branch. If |
| `revision` is set and is a branch, PR is opened against this branch. If |
| `revision` is set and is not a branch name (example: a commit oid), an |
| `RevisionNotFoundError` is returned by the server. |
| parent_commit (`str`, *optional*): |
| The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. |
| If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. |
| If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. |
| Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be |
| especially useful if the repo is updated / committed to concurrently. |
| """ |
| operations = self._prepare_folder_deletions( |
| repo_id=repo_id, repo_type=repo_type, delete_patterns=delete_patterns, path_in_repo="", revision=revision |
| ) |
|
|
| if commit_message is None: |
| commit_message = f"Delete files {' '.join(delete_patterns)} with huggingface_hub" |
|
|
| return self.create_commit( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| token=token, |
| operations=operations, |
| revision=revision, |
| commit_message=commit_message, |
| commit_description=commit_description, |
| create_pr=create_pr, |
| parent_commit=parent_commit, |
| ) |
|
|
| @validate_hf_hub_args |
| def delete_folder( |
| self, |
| path_in_repo: str, |
| repo_id: str, |
| *, |
| token: Union[bool, str, None] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| commit_message: Optional[str] = None, |
| commit_description: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| parent_commit: Optional[str] = None, |
| ) -> CommitInfo: |
| """ |
| Deletes a folder in the given repo. |
| |
| Simple wrapper around [`create_commit`] method. |
| |
| Args: |
| path_in_repo (`str`): |
| Relative folder path in the repo, for example: `"checkpoints/1fec34a"`. |
| repo_id (`str`): |
| The repository from which the folder will be deleted, for example: |
| `"username/custom_transformers"` |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| to the stored token. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if the folder is in a dataset or |
| space, `None` or `"model"` if in a model. Default is `None`. |
| revision (`str`, *optional*): |
| The git revision to commit from. Defaults to the head of the `"main"` branch. |
| commit_message (`str`, *optional*): |
| The summary / title / first line of the generated commit. Defaults to |
| `f"Delete folder {path_in_repo} with huggingface_hub"`. |
| commit_description (`str` *optional*) |
| The description of the generated commit. |
| create_pr (`boolean`, *optional*): |
| Whether or not to create a Pull Request with that commit. Defaults to `False`. |
| If `revision` is not set, PR is opened against the `"main"` branch. If |
| `revision` is set and is a branch, PR is opened against this branch. If |
| `revision` is set and is not a branch name (example: a commit oid), an |
| `RevisionNotFoundError` is returned by the server. |
| parent_commit (`str`, *optional*): |
| The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. |
| If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. |
| If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. |
| Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be |
| especially useful if the repo is updated / committed to concurrently. |
| """ |
| return self.create_commit( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| token=token, |
| operations=[CommitOperationDelete(path_in_repo=path_in_repo, is_folder=True)], |
| revision=revision, |
| commit_message=( |
| commit_message if commit_message is not None else f"Delete folder {path_in_repo} with huggingface_hub" |
| ), |
| commit_description=commit_description, |
| create_pr=create_pr, |
| parent_commit=parent_commit, |
| ) |
|
|
| def upload_large_folder( |
| self, |
| repo_id: str, |
| folder_path: Union[str, Path], |
| *, |
| repo_type: str, |
| revision: Optional[str] = None, |
| private: Optional[bool] = None, |
| allow_patterns: Optional[Union[list[str], str]] = None, |
| ignore_patterns: Optional[Union[list[str], str]] = None, |
| num_workers: Optional[int] = None, |
| print_report: bool = True, |
| print_report_every: int = 60, |
| ) -> None: |
| """Upload a large folder to the Hub in the most resilient way possible. |
| |
| Several workers are started to upload files in an optimized way. Before being committed to a repo, files must be |
| hashed and be pre-uploaded if they are LFS files. Workers will perform these tasks for each file in the folder. |
| At each step, some metadata information about the upload process is saved in the folder under `.cache/.huggingface/` |
| to be able to resume the process if interrupted. The whole process might result in several commits. |
| |
| Args: |
| repo_id (`str`): |
| The repository to which the file will be uploaded. |
| E.g. `"HuggingFaceTB/smollm-corpus"`. |
| folder_path (`str` or `Path`): |
| Path to the folder to upload on the local file system. |
| repo_type (`str`): |
| Type of the repository. Must be one of `"model"`, `"dataset"` or `"space"`. |
| Unlike in all other `HfApi` methods, `repo_type` is explicitly required here. This is to avoid |
| any mistake when uploading a large folder to the Hub, and therefore prevent from having to re-upload |
| everything. |
| revision (`str`, `optional`): |
| The branch to commit to. If not provided, the `main` branch will be used. |
| private (`bool`, `optional`): |
| Whether the repository should be private. |
| If `None` (default), the repo will be public unless the organization's default is private. |
| allow_patterns (`list[str]` or `str`, *optional*): |
| If provided, only files matching at least one pattern are uploaded. |
| ignore_patterns (`list[str]` or `str`, *optional*): |
| If provided, files matching any of the patterns are not uploaded. |
| num_workers (`int`, *optional*): |
| Number of workers to start. Defaults to half of CPU cores (minimum 1). |
| A higher number of workers may speed up the process if your machine allows it. However, on machines with a |
| slower connection, it is recommended to keep the number of workers low to ensure better resumability. |
| Indeed, partially uploaded files will have to be completely re-uploaded if the process is interrupted. |
| print_report (`bool`, *optional*): |
| Whether to print a report of the upload progress. Defaults to True. |
| Report is printed to `sys.stdout` every X seconds (60 by defaults) and overwrites the previous report. |
| print_report_every (`int`, *optional*): |
| Frequency at which the report is printed. Defaults to 60 seconds. |
| |
| > [!TIP] |
| > A few things to keep in mind: |
| > - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations |
| > - Do not start several processes in parallel. |
| > - You can interrupt and resume the process at any time. |
| > - Do not upload the same folder to several repositories. If you need to do so, you must delete the local `.cache/.huggingface/` folder first. |
| |
| > [!WARNING] |
| > While being much more robust to upload large folders, `upload_large_folder` is more limited than [`upload_folder`] feature-wise. In practice: |
| > - you cannot set a custom `path_in_repo`. If you want to upload to a subfolder, you need to set the proper structure locally. |
| > - you cannot set a custom `commit_message` and `commit_description` since multiple commits are created. |
| > - you cannot delete from the repo while uploading. Please make a separate commit first. |
| > - you cannot create a PR directly. Please create a PR first (from the UI or using [`create_pull_request`]) and then commit to it by passing `revision`. |
| |
| **Technical details:** |
| |
| `upload_large_folder` process is as follow: |
| 1. (Check parameters and setup.) |
| 2. Create repo if missing. |
| 3. List local files to upload. |
| 4. Run validation checks and display warnings if repository limits might be exceeded: |
| - Warns if the total number of files exceeds 100k (recommended limit). |
| - Warns if any folder contains more than 10k files (recommended limit). |
| - Warns about files larger than 20GB (recommended) or 50GB (hard limit). |
| 5. Start workers. Workers can perform the following tasks: |
| - Hash a file. |
| - Get upload mode (regular or LFS) for a list of files. |
| - Pre-upload an LFS file. |
| - Commit a bunch of files. |
| Once a worker finishes a task, it will move on to the next task based on the priority list (see below) until |
| all files are uploaded and committed. |
| 6. While workers are up, regularly print a report to sys.stdout. |
| |
| Order of priority: |
| 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file). |
| 2. Commit if at least 150 files are ready to commit. |
| 3. Get upload mode if at least 10 files have been hashed. |
| 4. Pre-upload LFS file if at least 1 file and no worker is pre-uploading. |
| 5. Hash file if at least 1 file and no worker is hashing. |
| 6. Get upload mode if at least 1 file and no worker is getting upload mode. |
| 7. Pre-upload LFS file if at least 1 file. |
| 8. Hash file if at least 1 file to hash. |
| 9. Get upload mode if at least 1 file to get upload mode. |
| 10. Commit if at least 1 file to commit and at least 1 min since last commit attempt. |
| 11. Commit if at least 1 file to commit and all other queues are empty. |
| |
| Special rules: |
| - Only one worker can commit at a time. |
| - If no tasks are available, the worker waits for 10 seconds before checking again. |
| """ |
| return upload_large_folder_internal( |
| self, |
| repo_id=repo_id, |
| folder_path=folder_path, |
| repo_type=repo_type, |
| revision=revision, |
| private=private, |
| allow_patterns=allow_patterns, |
| ignore_patterns=ignore_patterns, |
| num_workers=num_workers, |
| print_report=print_report, |
| print_report_every=print_report_every, |
| ) |
|
|
| @validate_hf_hub_args |
| def get_hf_file_metadata( |
| self, |
| *, |
| url: str, |
| token: Union[bool, str, None] = None, |
| timeout: Optional[float] = constants.HF_HUB_ETAG_TIMEOUT, |
| ) -> HfFileMetadata: |
| """Fetch metadata of a file versioned on the Hub for a given url. |
| |
| Args: |
| url (`str`): |
| File url, for example returned by [`hf_hub_url`]. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| timeout (`float`, *optional*, defaults to 10): |
| How many seconds to wait for the server to send metadata before giving up. |
| |
| Returns: |
| A [`HfFileMetadata`] object containing metadata such as location, etag, size and commit_hash. |
| """ |
| if token is None: |
| |
| token = self.token |
|
|
| return get_hf_file_metadata( |
| url=url, |
| token=token, |
| timeout=timeout, |
| library_name=self.library_name, |
| library_version=self.library_version, |
| user_agent=self.user_agent, |
| endpoint=self.endpoint, |
| ) |
|
|
| @overload |
| def hf_hub_download( |
| self, |
| repo_id: str, |
| filename: str, |
| *, |
| subfolder: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| cache_dir: Union[str, Path, None] = None, |
| local_dir: Union[str, Path, None] = None, |
| force_download: bool = False, |
| etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, |
| token: Union[bool, str, None] = None, |
| local_files_only: bool = False, |
| tqdm_class: Optional[type[base_tqdm]] = None, |
| dry_run: Literal[False] = False, |
| ) -> str: ... |
|
|
| @overload |
| def hf_hub_download( |
| self, |
| repo_id: str, |
| filename: str, |
| *, |
| subfolder: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| cache_dir: Union[str, Path, None] = None, |
| local_dir: Union[str, Path, None] = None, |
| force_download: bool = False, |
| etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, |
| token: Union[bool, str, None] = None, |
| local_files_only: bool = False, |
| tqdm_class: Optional[type[base_tqdm]] = None, |
| dry_run: Literal[True], |
| ) -> DryRunFileInfo: ... |
|
|
| @validate_hf_hub_args |
| def hf_hub_download( |
| self, |
| repo_id: str, |
| filename: str, |
| *, |
| subfolder: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| cache_dir: Union[str, Path, None] = None, |
| local_dir: Union[str, Path, None] = None, |
| force_download: bool = False, |
| etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, |
| token: Union[bool, str, None] = None, |
| local_files_only: bool = False, |
| tqdm_class: Optional[type[base_tqdm]] = None, |
| dry_run: bool = False, |
| ) -> Union[str, DryRunFileInfo]: |
| """Download a given file if it's not already present in the local cache. |
| |
| The new cache file layout looks like this: |
| - The cache directory contains one subfolder per repo_id (namespaced by repo type) |
| - inside each repo folder: |
| - refs is a list of the latest known revision => commit_hash pairs |
| - blobs contains the actual file blobs (identified by their git-sha or sha256, depending on |
| whether they're LFS files or not) |
| - snapshots contains one subfolder per commit, each "commit" contains the subset of the files |
| that have been resolved at that particular commit. Each filename is a symlink to the blob |
| at that particular commit. |
| |
| ``` |
| [ 96] . |
| └── [ 160] models--julien-c--EsperBERTo-small |
| ├── [ 160] blobs |
| │ ├── [321M] 403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd |
| │ ├── [ 398] 7cb18dc9bafbfcf74629a4b760af1b160957a83e |
| │ └── [1.4K] d7edf6bd2a681fb0175f7735299831ee1b22b812 |
| ├── [ 96] refs |
| │ └── [ 40] main |
| └── [ 128] snapshots |
| ├── [ 128] 2439f60ef33a0d46d85da5001d52aeda5b00ce9f |
| │ ├── [ 52] README.md -> ../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812 |
| │ └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd |
| └── [ 128] bbc77c8132af1cc5cf678da3f1ddf2de43606d48 |
| ├── [ 52] README.md -> ../../blobs/7cb18dc9bafbfcf74629a4b760af1b160957a83e |
| └── [ 76] pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd |
| ``` |
| |
| If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this |
| option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir` |
| to store some metadata related to the downloaded files. While this mechanism is not as robust as the main |
| cache-system, it's optimized for regularly pulling the latest version of a repository. |
| |
| Args: |
| repo_id (`str`): |
| A user or an organization name and a repo name separated by a `/`. |
| filename (`str`): |
| The name of the file in the repo. |
| subfolder (`str`, *optional*): |
| An optional value corresponding to a folder inside the repository. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if downloading from a dataset or space, |
| `None` or `"model"` if downloading from a model. Default is `None`. |
| revision (`str`, *optional*): |
| An optional Git revision id which can be a branch name, a tag, or a |
| commit hash. |
| cache_dir (`str`, `Path`, *optional*): |
| Path to the folder where cached files are stored. |
| local_dir (`str` or `Path`, *optional*): |
| If provided, the downloaded file will be placed under this directory. |
| force_download (`bool`, *optional*, defaults to `False`): |
| Whether the file should be downloaded even if it already exists in |
| the local cache. |
| etag_timeout (`float`, *optional*, defaults to `10`): |
| When fetching ETag, how many seconds to wait for the server to send |
| data before giving up which is passed to `httpx.request`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| local_files_only (`bool`, *optional*, defaults to `False`): |
| If `True`, avoid downloading the file and return the path to the |
| local cached file if it exists. |
| tqdm_class (`tqdm`, *optional*): |
| If provided, overwrites the default behavior for the progress bar. Passed |
| argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior. |
| Defaults to the custom HF progress bar that can be disabled by setting |
| `HF_HUB_DISABLE_PROGRESS_BARS` environment variable. |
| dry_run (`bool`, *optional*, defaults to `False`): |
| If `True`, perform a dry run without actually downloading the file. Returns a |
| [`DryRunFileInfo`] object containing information about what would be downloaded. |
| |
| Returns: |
| `str` or [`DryRunFileInfo`]: |
| - If `dry_run=False`: Local path of file or if networking is off, last version of file cached on disk. |
| - If `dry_run=True`: A [`DryRunFileInfo`] object containing download information. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`] |
| If the repository to download from cannot be found. This may be because it doesn't exist, |
| or because it is set to `private` and you do not have access. |
| [`~utils.RevisionNotFoundError`] |
| If the revision to download from cannot be found. |
| [`~utils.RemoteEntryNotFoundError`] |
| If the file to download cannot be found. |
| [`~utils.LocalEntryNotFoundError`] |
| If network is disabled or unavailable and file is not found in cache. |
| [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) |
| If `token=True` but the token cannot be found. |
| [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) |
| If ETag cannot be determined. |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| If some parameter value is invalid. |
| """ |
| from .file_download import hf_hub_download |
|
|
| if token is None: |
| |
| token = self.token |
|
|
| return hf_hub_download( |
| repo_id=repo_id, |
| filename=filename, |
| subfolder=subfolder, |
| repo_type=repo_type, |
| revision=revision, |
| endpoint=self.endpoint, |
| library_name=self.library_name, |
| library_version=self.library_version, |
| cache_dir=cache_dir, |
| local_dir=local_dir, |
| user_agent=self.user_agent, |
| force_download=force_download, |
| etag_timeout=etag_timeout, |
| token=token, |
| headers=self.headers, |
| local_files_only=local_files_only, |
| tqdm_class=tqdm_class, |
| dry_run=dry_run, |
| ) |
|
|
| @validate_hf_hub_args |
| def snapshot_download( |
| self, |
| repo_id: str, |
| *, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| cache_dir: Union[str, Path, None] = None, |
| local_dir: Union[str, Path, None] = None, |
| etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT, |
| force_download: bool = False, |
| token: Union[bool, str, None] = None, |
| local_files_only: bool = False, |
| allow_patterns: Optional[Union[list[str], str]] = None, |
| ignore_patterns: Optional[Union[list[str], str]] = None, |
| max_workers: int = 8, |
| tqdm_class: Optional[type[base_tqdm]] = None, |
| dry_run: bool = False, |
| ) -> Union[str, list[DryRunFileInfo]]: |
| """Download repo files. |
| |
| Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from |
| a repo, because you don't know which ones you will need a priori. All files are nested inside a folder in order |
| to keep their actual filename relative to that folder. You can also filter which files to download using |
| `allow_patterns` and `ignore_patterns`. |
| |
| If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this |
| option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir` |
| to store some metadata related to the downloaded files.While this mechanism is not as robust as the main |
| cache-system, it's optimized for regularly pulling the latest version of a repository. |
| |
| An alternative would be to clone the repo but this requires git and git-lfs to be installed and properly |
| configured. It is also not possible to filter which files to download when cloning a repository using git. |
| |
| Args: |
| repo_id (`str`): |
| A user or an organization name and a repo name separated by a `/`. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if downloading from a dataset or space, |
| `None` or `"model"` if downloading from a model. Default is `None`. |
| revision (`str`, *optional*): |
| An optional Git revision id which can be a branch name, a tag, or a |
| commit hash. |
| cache_dir (`str`, `Path`, *optional*): |
| Path to the folder where cached files are stored. |
| local_dir (`str` or `Path`, *optional*): |
| If provided, the downloaded files will be placed under this directory. |
| etag_timeout (`float`, *optional*, defaults to `10`): |
| When fetching ETag, how many seconds to wait for the server to send |
| data before giving up which is passed to `httpx.request`. |
| force_download (`bool`, *optional*, defaults to `False`): |
| Whether the file should be downloaded even if it already exists in the local cache. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| local_files_only (`bool`, *optional*, defaults to `False`): |
| If `True`, avoid downloading the file and return the path to the |
| local cached file if it exists. |
| allow_patterns (`list[str]` or `str`, *optional*): |
| If provided, only files matching at least one pattern are downloaded. |
| ignore_patterns (`list[str]` or `str`, *optional*): |
| If provided, files matching any of the patterns are not downloaded. |
| max_workers (`int`, *optional*): |
| Number of concurrent threads to download files (1 thread = 1 file download). |
| Defaults to 8. |
| tqdm_class (`tqdm`, *optional*): |
| If provided, overwrites the default behavior for the progress bar. Passed |
| argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior. |
| Note that the `tqdm_class` is not passed to each individual download. |
| Defaults to the custom HF progress bar that can be disabled by setting |
| `HF_HUB_DISABLE_PROGRESS_BARS` environment variable. |
| dry_run (`bool`, *optional*, defaults to `False`): |
| If `True`, perform a dry run without actually downloading the files. Returns a list of |
| [`DryRunFileInfo`] objects containing information about what would be downloaded. |
| |
| Returns: |
| `str` or list of [`DryRunFileInfo`]: |
| - If `dry_run=False`: Folder path of the repo snapshot. |
| - If `dry_run=True`: A list of [`DryRunFileInfo`] objects containing download information. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`] |
| If the repository to download from cannot be found. This may be because it doesn't exist, |
| or because it is set to `private` and you do not have access. |
| [`~utils.RevisionNotFoundError`] |
| If the revision to download from cannot be found. |
| [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError) |
| If `token=True` and the token cannot be found. |
| [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if |
| ETag cannot be determined. |
| [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| if some parameter value is invalid. |
| """ |
| from ._snapshot_download import snapshot_download |
|
|
| if token is None: |
| |
| token = self.token |
|
|
| return snapshot_download( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| revision=revision, |
| endpoint=self.endpoint, |
| cache_dir=cache_dir, |
| local_dir=local_dir, |
| library_name=self.library_name, |
| library_version=self.library_version, |
| user_agent=self.user_agent, |
| etag_timeout=etag_timeout, |
| force_download=force_download, |
| token=token, |
| local_files_only=local_files_only, |
| allow_patterns=allow_patterns, |
| ignore_patterns=ignore_patterns, |
| max_workers=max_workers, |
| tqdm_class=tqdm_class, |
| headers=self.headers, |
| dry_run=dry_run, |
| ) |
|
|
| def get_safetensors_metadata( |
| self, |
| repo_id: str, |
| *, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> SafetensorsRepoMetadata: |
| """ |
| Parse metadata for a safetensors repo on the Hub. |
| |
| We first check if the repo has a single safetensors file or a sharded safetensors repo. If it's a single |
| safetensors file, we parse the metadata from this file. If it's a sharded safetensors repo, we parse the |
| metadata from the index file and then parse the metadata from each shard. |
| |
| To parse metadata from a single safetensors file, use [`parse_safetensors_file_metadata`]. |
| |
| For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format. |
| |
| Args: |
| repo_id (`str`): |
| A user or an organization name and a repo name separated by a `/`. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if the file is in a dataset or space, `None` or `"model"` if in a |
| model. Default is `None`. |
| revision (`str`, *optional*): |
| The git revision to fetch the file from. Can be a branch name, a tag, or a commit hash. Defaults to the |
| head of the `"main"` branch. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`SafetensorsRepoMetadata`]: information related to safetensors repo. |
| |
| Raises: |
| [`NotASafetensorsRepoError`] |
| If the repo is not a safetensors repo i.e. doesn't have either a |
| `model.safetensors` or a `model.safetensors.index.json` file. |
| [`SafetensorsParsingError`] |
| If a safetensors file header couldn't be parsed correctly. |
| |
| Example: |
| ```py |
| # Parse repo with single weights file |
| >>> metadata = get_safetensors_metadata("bigscience/bloomz-560m") |
| >>> metadata |
| SafetensorsRepoMetadata( |
| metadata=None, |
| sharded=False, |
| weight_map={'h.0.input_layernorm.bias': 'model.safetensors', ...}, |
| files_metadata={'model.safetensors': SafetensorsFileMetadata(...)} |
| ) |
| >>> metadata.files_metadata["model.safetensors"].metadata |
| {'format': 'pt'} |
| |
| # Parse repo with sharded model |
| >>> metadata = get_safetensors_metadata("bigscience/bloom") |
| Parse safetensors files: 100%|██████████████████████████████████████████| 72/72 [00:12<00:00, 5.78it/s] |
| >>> metadata |
| SafetensorsRepoMetadata(metadata={'total_size': 352494542848}, sharded=True, weight_map={...}, files_metadata={...}) |
| >>> len(metadata.files_metadata) |
| 72 # All safetensors files have been fetched |
| |
| # Parse repo with sharded model |
| >>> get_safetensors_metadata("runwayml/stable-diffusion-v1-5") |
| NotASafetensorsRepoError: 'runwayml/stable-diffusion-v1-5' is not a safetensors repo. Couldn't find 'model.safetensors.index.json' or 'model.safetensors' files. |
| ``` |
| """ |
| if self.file_exists( |
| repo_id=repo_id, |
| filename=constants.SAFETENSORS_SINGLE_FILE, |
| repo_type=repo_type, |
| revision=revision, |
| token=token, |
| ): |
| file_metadata = self.parse_safetensors_file_metadata( |
| repo_id=repo_id, |
| filename=constants.SAFETENSORS_SINGLE_FILE, |
| repo_type=repo_type, |
| revision=revision, |
| token=token, |
| ) |
| return SafetensorsRepoMetadata( |
| metadata=None, |
| sharded=False, |
| weight_map={ |
| tensor_name: constants.SAFETENSORS_SINGLE_FILE for tensor_name in file_metadata.tensors.keys() |
| }, |
| files_metadata={constants.SAFETENSORS_SINGLE_FILE: file_metadata}, |
| ) |
| elif self.file_exists( |
| repo_id=repo_id, |
| filename=constants.SAFETENSORS_INDEX_FILE, |
| repo_type=repo_type, |
| revision=revision, |
| token=token, |
| ): |
| |
| index_file = self.hf_hub_download( |
| repo_id=repo_id, |
| filename=constants.SAFETENSORS_INDEX_FILE, |
| repo_type=repo_type, |
| revision=revision, |
| token=token, |
| ) |
| with open(index_file) as f: |
| index = json.load(f) |
|
|
| weight_map = index.get("weight_map", {}) |
|
|
| |
| files_metadata = {} |
|
|
| def _parse(filename: str) -> None: |
| files_metadata[filename] = self.parse_safetensors_file_metadata( |
| repo_id=repo_id, filename=filename, repo_type=repo_type, revision=revision, token=token |
| ) |
|
|
| thread_map( |
| _parse, |
| set(weight_map.values()), |
| desc="Parse safetensors files", |
| tqdm_class=hf_tqdm, |
| ) |
|
|
| return SafetensorsRepoMetadata( |
| metadata=index.get("metadata", None), |
| sharded=True, |
| weight_map=weight_map, |
| files_metadata=files_metadata, |
| ) |
| else: |
| |
| raise NotASafetensorsRepoError( |
| f"'{repo_id}' is not a safetensors repo. Couldn't find '{constants.SAFETENSORS_INDEX_FILE}' or '{constants.SAFETENSORS_SINGLE_FILE}' files." |
| ) |
|
|
| def parse_safetensors_file_metadata( |
| self, |
| repo_id: str, |
| filename: str, |
| *, |
| repo_type: Optional[str] = None, |
| revision: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> SafetensorsFileMetadata: |
| """ |
| Parse metadata from a safetensors file on the Hub. |
| |
| To parse metadata from all safetensors files in a repo at once, use [`get_safetensors_metadata`]. |
| |
| For more details regarding the safetensors format, check out https://huggingface.co/docs/safetensors/index#format. |
| |
| Args: |
| repo_id (`str`): |
| A user or an organization name and a repo name separated by a `/`. |
| filename (`str`): |
| The name of the file in the repo. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if the file is in a dataset or space, `None` or `"model"` if in a |
| model. Default is `None`. |
| revision (`str`, *optional*): |
| The git revision to fetch the file from. Can be a branch name, a tag, or a commit hash. Defaults to the |
| head of the `"main"` branch. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`SafetensorsFileMetadata`]: information related to a safetensors file. |
| |
| Raises: |
| [`NotASafetensorsRepoError`]: |
| If the repo is not a safetensors repo i.e. doesn't have either a |
| `model.safetensors` or a `model.safetensors.index.json` file. |
| [`SafetensorsParsingError`]: |
| If a safetensors file header couldn't be parsed correctly. |
| """ |
| url = hf_hub_url( |
| repo_id=repo_id, filename=filename, repo_type=repo_type, revision=revision, endpoint=self.endpoint |
| ) |
| _headers = self._build_hf_headers(token=token) |
|
|
| |
| |
| |
| |
| |
| response = get_session().get(url, headers={**_headers, "range": "bytes=0-100000"}) |
| hf_raise_for_status(response) |
|
|
| |
| metadata_size = struct.unpack("<Q", response.content[:8])[0] |
| if metadata_size > constants.SAFETENSORS_MAX_HEADER_LENGTH: |
| raise SafetensorsParsingError( |
| f"Failed to parse safetensors header for '{filename}' (repo '{repo_id}', revision " |
| f"'{revision or constants.DEFAULT_REVISION}'): safetensors header is too big. Maximum supported size is " |
| f"{constants.SAFETENSORS_MAX_HEADER_LENGTH} bytes (got {metadata_size})." |
| ) |
|
|
| |
| if metadata_size <= 100000: |
| metadata_as_bytes = response.content[8 : 8 + metadata_size] |
| else: |
| response = get_session().get(url, headers={**_headers, "range": f"bytes=8-{metadata_size + 7}"}) |
| hf_raise_for_status(response) |
| metadata_as_bytes = response.content |
|
|
| |
| try: |
| metadata_as_dict = json.loads(metadata_as_bytes.decode(errors="ignore")) |
| except json.JSONDecodeError as e: |
| raise SafetensorsParsingError( |
| f"Failed to parse safetensors header for '{filename}' (repo '{repo_id}', revision " |
| f"'{revision or constants.DEFAULT_REVISION}'): header is not json-encoded string. Please make sure this is a " |
| "correctly formatted safetensors file." |
| ) from e |
|
|
| try: |
| return SafetensorsFileMetadata( |
| metadata=metadata_as_dict.get("__metadata__", {}), |
| tensors={ |
| key: TensorInfo( |
| dtype=tensor["dtype"], |
| shape=tensor["shape"], |
| data_offsets=tuple(tensor["data_offsets"]), |
| ) |
| for key, tensor in metadata_as_dict.items() |
| if key != "__metadata__" |
| }, |
| ) |
| except (KeyError, IndexError) as e: |
| raise SafetensorsParsingError( |
| f"Failed to parse safetensors header for '{filename}' (repo '{repo_id}', revision " |
| f"'{revision or constants.DEFAULT_REVISION}'): header format not recognized. Please make sure this is a correctly" |
| " formatted safetensors file." |
| ) from e |
|
|
| @validate_hf_hub_args |
| def create_branch( |
| self, |
| repo_id: str, |
| *, |
| branch: str, |
| revision: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| repo_type: Optional[str] = None, |
| exist_ok: bool = False, |
| ) -> None: |
| """ |
| Create a new branch for a repo on the Hub, starting from the specified revision (defaults to `main`). |
| To find a revision suiting your needs, you can use [`list_repo_refs`] or [`list_repo_commits`]. |
| |
| Args: |
| repo_id (`str`): |
| The repository in which the branch will be created. |
| Example: `"user/my-cool-model"`. |
| |
| branch (`str`): |
| The name of the branch to create. |
| |
| revision (`str`, *optional*): |
| The git revision to create the branch from. It can be a branch name or |
| the OID/SHA of a commit, as a hexadecimal string. Defaults to the head |
| of the `"main"` branch. |
| |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if creating a branch on a dataset or |
| space, `None` or `"model"` if tagging a model. Default is `None`. |
| |
| exist_ok (`bool`, *optional*, defaults to `False`): |
| If `True`, do not raise an error if branch already exists. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If repository is not found (error 404): wrong repo_id/repo_type, private |
| but not authenticated or repo does not exist. |
| [`~utils.BadRequestError`]: |
| If invalid reference for a branch. Ex: `refs/pr/5` or 'refs/foo/bar'. |
| [`~utils.HfHubHTTPError`]: |
| If the branch already exists on the repo (error 409) and `exist_ok` is |
| set to `False`. |
| """ |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
| branch = quote(branch, safe="") |
|
|
| |
| branch_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/branch/{branch}" |
| headers = self._build_hf_headers(token=token) |
| payload = {} |
| if revision is not None: |
| payload["startingPoint"] = revision |
|
|
| |
| response = get_session().post(url=branch_url, headers=headers, json=payload) |
| try: |
| hf_raise_for_status(response) |
| except HfHubHTTPError as e: |
| if exist_ok and e.response.status_code == 409: |
| return |
| elif exist_ok and e.response.status_code == 403: |
| |
| try: |
| refs = self.list_repo_refs(repo_id=repo_id, repo_type=repo_type, token=token) |
| for branch_ref in refs.branches: |
| if branch_ref.name == branch: |
| return |
| except HfHubHTTPError: |
| pass |
| raise |
|
|
| @validate_hf_hub_args |
| def delete_branch( |
| self, |
| repo_id: str, |
| *, |
| branch: str, |
| token: Union[bool, str, None] = None, |
| repo_type: Optional[str] = None, |
| ) -> None: |
| """ |
| Delete a branch from a repo on the Hub. |
| |
| Args: |
| repo_id (`str`): |
| The repository in which a branch will be deleted. |
| Example: `"user/my-cool-model"`. |
| |
| branch (`str`): |
| The name of the branch to delete. |
| |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if creating a branch on a dataset or |
| space, `None` or `"model"` if tagging a model. Default is `None`. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If repository is not found (error 404): wrong repo_id/repo_type, private |
| but not authenticated or repo does not exist. |
| [`~utils.HfHubHTTPError`]: |
| If trying to delete a protected branch. Ex: `main` cannot be deleted. |
| [`~utils.HfHubHTTPError`]: |
| If trying to delete a branch that does not exist. |
| |
| """ |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
| branch = quote(branch, safe="") |
|
|
| |
| branch_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/branch/{branch}" |
| headers = self._build_hf_headers(token=token) |
|
|
| |
| response = get_session().delete(url=branch_url, headers=headers) |
| hf_raise_for_status(response) |
|
|
| @validate_hf_hub_args |
| def create_tag( |
| self, |
| repo_id: str, |
| *, |
| tag: str, |
| tag_message: Optional[str] = None, |
| revision: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| repo_type: Optional[str] = None, |
| exist_ok: bool = False, |
| ) -> None: |
| """ |
| Tag a given commit of a repo on the Hub. |
| |
| Args: |
| repo_id (`str`): |
| The repository in which a commit will be tagged. |
| Example: `"user/my-cool-model"`. |
| |
| tag (`str`): |
| The name of the tag to create. |
| |
| tag_message (`str`, *optional*): |
| The description of the tag to create. |
| |
| revision (`str`, *optional*): |
| The git revision to tag. It can be a branch name or the OID/SHA of a |
| commit, as a hexadecimal string. Shorthands (7 first characters) are |
| also supported. Defaults to the head of the `"main"` branch. |
| |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if tagging a dataset or |
| space, `None` or `"model"` if tagging a model. Default is |
| `None`. |
| |
| exist_ok (`bool`, *optional*, defaults to `False`): |
| If `True`, do not raise an error if tag already exists. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If repository is not found (error 404): wrong repo_id/repo_type, private |
| but not authenticated or repo does not exist. |
| [`~utils.RevisionNotFoundError`]: |
| If revision is not found (error 404) on the repo. |
| [`~utils.HfHubHTTPError`]: |
| If the branch already exists on the repo (error 409) and `exist_ok` is |
| set to `False`. |
| """ |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
| revision = quote(revision, safe="") if revision is not None else constants.DEFAULT_REVISION |
|
|
| |
| tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{revision}" |
| headers = self._build_hf_headers(token=token) |
| payload = {"tag": tag} |
| if tag_message is not None: |
| payload["message"] = tag_message |
|
|
| |
| response = get_session().post(url=tag_url, headers=headers, json=payload) |
| try: |
| hf_raise_for_status(response) |
| except HfHubHTTPError as e: |
| if not (e.response.status_code == 409 and exist_ok): |
| raise |
|
|
| @validate_hf_hub_args |
| def delete_tag( |
| self, |
| repo_id: str, |
| *, |
| tag: str, |
| token: Union[bool, str, None] = None, |
| repo_type: Optional[str] = None, |
| ) -> None: |
| """ |
| Delete a tag from a repo on the Hub. |
| |
| Args: |
| repo_id (`str`): |
| The repository in which a tag will be deleted. |
| Example: `"user/my-cool-model"`. |
| |
| tag (`str`): |
| The name of the tag to delete. |
| |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if tagging a dataset or space, `None` or |
| `"model"` if tagging a model. Default is `None`. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If repository is not found (error 404): wrong repo_id/repo_type, private |
| but not authenticated or repo does not exist. |
| [`~utils.RevisionNotFoundError`]: |
| If tag is not found. |
| """ |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
| tag = quote(tag, safe="") |
|
|
| |
| tag_url = f"{self.endpoint}/api/{repo_type}s/{repo_id}/tag/{tag}" |
| headers = self._build_hf_headers(token=token) |
|
|
| |
| response = get_session().delete(url=tag_url, headers=headers) |
| hf_raise_for_status(response) |
|
|
| @validate_hf_hub_args |
| def get_full_repo_name( |
| self, |
| model_id: str, |
| *, |
| organization: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ): |
| """ |
| Returns the repository name for a given model ID and optional |
| organization. |
| |
| Args: |
| model_id (`str`): |
| The name of the model. |
| organization (`str`, *optional*): |
| If passed, the repository name will be in the organization |
| namespace instead of the user namespace. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `str`: The repository name in the user's namespace |
| ({username}/{model_id}) if no organization is passed, and under the |
| organization namespace ({organization}/{model_id}) otherwise. |
| """ |
| if organization is None: |
| if "/" in model_id: |
| username = model_id.split("/")[0] |
| else: |
| username = self.whoami(token=token)["name"] |
| return f"{username}/{model_id}" |
| else: |
| return f"{organization}/{model_id}" |
|
|
| @validate_hf_hub_args |
| def get_repo_discussions( |
| self, |
| repo_id: str, |
| *, |
| author: Optional[str] = None, |
| discussion_type: Optional[constants.DiscussionTypeFilter] = None, |
| discussion_status: Optional[constants.DiscussionStatusFilter] = None, |
| repo_type: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> Iterator[Discussion]: |
| """ |
| Fetches Discussions and Pull Requests for the given repo. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| author (`str`, *optional*): |
| Pass a value to filter by discussion author. `None` means no filter. |
| Default is `None`. |
| discussion_type (`str`, *optional*): |
| Set to `"pull_request"` to fetch only pull requests, `"discussion"` |
| to fetch only discussions. Set to `"all"` or `None` to fetch both. |
| Default is `None`. |
| discussion_status (`str`, *optional*): |
| Set to `"open"` (respectively `"closed"`) to fetch only open |
| (respectively closed) discussions. Set to `"all"` or `None` |
| to fetch both. |
| Default is `None`. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if fetching from a dataset or |
| space, `None` or `"model"` if fetching from a model. Default is |
| `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterator[Discussion]`: An iterator of [`Discussion`] objects. |
| |
| Example: |
| Collecting all discussions of a repo in a list: |
| |
| ```python |
| >>> from huggingface_hub import get_repo_discussions |
| >>> discussions_list = list(get_repo_discussions(repo_id="bert-base-uncased")) |
| ``` |
| |
| Iterating over discussions of a repo: |
| |
| ```python |
| >>> from huggingface_hub import get_repo_discussions |
| >>> for discussion in get_repo_discussions(repo_id="bert-base-uncased"): |
| ... print(discussion.num, discussion.title) |
| ``` |
| """ |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
| if discussion_type is not None and discussion_type not in constants.DISCUSSION_TYPES: |
| raise ValueError(f"Invalid discussion_type, must be one of {constants.DISCUSSION_TYPES}") |
| if discussion_status is not None and discussion_status not in constants.DISCUSSION_STATUS: |
| raise ValueError(f"Invalid discussion_status, must be one of {constants.DISCUSSION_STATUS}") |
|
|
| headers = self._build_hf_headers(token=token) |
| path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions" |
|
|
| params: dict[str, Union[str, int]] = {} |
| if discussion_type is not None: |
| params["type"] = discussion_type |
| if discussion_status is not None: |
| params["status"] = discussion_status |
| if author is not None: |
| params["author"] = author |
|
|
| def _fetch_discussion_page(page_index: int): |
| params["p"] = page_index |
| resp = get_session().get(path, headers=headers, params=params) |
| hf_raise_for_status(resp) |
| paginated_discussions = resp.json() |
| total = paginated_discussions["count"] |
| start = paginated_discussions["start"] |
| discussions = paginated_discussions["discussions"] |
| has_next = (start + len(discussions)) < total |
| return discussions, has_next |
|
|
| has_next, page_index = True, 0 |
|
|
| while has_next: |
| discussions, has_next = _fetch_discussion_page(page_index=page_index) |
| for discussion in discussions: |
| yield Discussion( |
| title=discussion["title"], |
| num=discussion["num"], |
| author=discussion.get("author", {}).get("name", "deleted"), |
| created_at=parse_datetime(discussion["createdAt"]), |
| status=discussion["status"], |
| repo_id=discussion["repo"]["name"], |
| repo_type=discussion["repo"]["type"], |
| is_pull_request=discussion["isPullRequest"], |
| endpoint=self.endpoint, |
| ) |
| page_index = page_index + 1 |
|
|
| @validate_hf_hub_args |
| def get_discussion_details( |
| self, |
| repo_id: str, |
| discussion_num: int, |
| *, |
| repo_type: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> DiscussionWithDetails: |
| """Fetches a Discussion's / Pull Request 's details from the Hub. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| discussion_num (`int`): |
| The number of the Discussion or Pull Request . Must be a strictly positive integer. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: [`DiscussionWithDetails`] |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the HuggingFace API returned an error |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if some parameter value is invalid |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| """ |
| if not isinstance(discussion_num, int) or discussion_num <= 0: |
| raise ValueError("Invalid discussion_num, must be a positive integer") |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
|
|
| path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions/{discussion_num}" |
| headers = self._build_hf_headers(token=token) |
| resp = get_session().get(path, params={"diff": "1"}, headers=headers) |
| hf_raise_for_status(resp) |
|
|
| discussion_details = resp.json() |
| is_pull_request = discussion_details["isPullRequest"] |
|
|
| target_branch = discussion_details["changes"]["base"] if is_pull_request else None |
| conflicting_files = discussion_details["filesWithConflicts"] if is_pull_request else None |
| merge_commit_oid = discussion_details["changes"].get("mergeCommitId", None) if is_pull_request else None |
|
|
| return DiscussionWithDetails( |
| title=discussion_details["title"], |
| num=discussion_details["num"], |
| author=discussion_details.get("author", {}).get("name", "deleted"), |
| created_at=parse_datetime(discussion_details["createdAt"]), |
| status=discussion_details["status"], |
| repo_id=discussion_details["repo"]["name"], |
| repo_type=discussion_details["repo"]["type"], |
| is_pull_request=discussion_details["isPullRequest"], |
| events=[deserialize_event(evt) for evt in discussion_details["events"]], |
| conflicting_files=conflicting_files, |
| target_branch=target_branch, |
| merge_commit_oid=merge_commit_oid, |
| diff=discussion_details.get("diff"), |
| endpoint=self.endpoint, |
| ) |
|
|
| @validate_hf_hub_args |
| def create_discussion( |
| self, |
| repo_id: str, |
| title: str, |
| *, |
| token: Union[bool, str, None] = None, |
| description: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| pull_request: bool = False, |
| ) -> DiscussionWithDetails: |
| """Creates a Discussion or Pull Request. |
| |
| Pull Requests created programmatically will be in `"draft"` status. |
| |
| Creating a Pull Request with changes can also be done at once with [`HfApi.create_commit`]. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| title (`str`): |
| The title of the discussion. It can be up to 200 characters long, |
| and must be at least 3 characters long. Leading and trailing whitespaces |
| will be stripped. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| description (`str`, *optional*): |
| An optional description for the Pull Request. |
| Defaults to `"Discussion opened with the huggingface_hub Python library"` |
| pull_request (`bool`, *optional*): |
| Whether to create a Pull Request or discussion. If `True`, creates a Pull Request. |
| If `False`, creates a discussion. Defaults to `False`. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| |
| Returns: [`DiscussionWithDetails`] |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the HuggingFace API returned an error |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if some parameter value is invalid |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access.""" |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
|
|
| if description is not None: |
| description = description.strip() |
| description = ( |
| description |
| if description |
| else ( |
| f"{'Pull Request' if pull_request else 'Discussion'} opened with the" |
| " [huggingface_hub Python" |
| " library](https://huggingface.co/docs/huggingface_hub)" |
| ) |
| ) |
|
|
| headers = self._build_hf_headers(token=token) |
| resp = get_session().post( |
| f"{self.endpoint}/api/{repo_type}s/{repo_id}/discussions", |
| json={ |
| "title": title.strip(), |
| "description": description, |
| "pullRequest": pull_request, |
| }, |
| headers=headers, |
| ) |
| hf_raise_for_status(resp) |
| num = resp.json()["num"] |
| return self.get_discussion_details( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| discussion_num=num, |
| token=token, |
| ) |
|
|
| @validate_hf_hub_args |
| def create_pull_request( |
| self, |
| repo_id: str, |
| title: str, |
| *, |
| token: Union[bool, str, None] = None, |
| description: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| ) -> DiscussionWithDetails: |
| """Creates a Pull Request . Pull Requests created programmatically will be in `"draft"` status. |
| |
| Creating a Pull Request with changes can also be done at once with [`HfApi.create_commit`]; |
| |
| This is a wrapper around [`HfApi.create_discussion`]. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| title (`str`): |
| The title of the discussion. It can be up to 200 characters long, |
| and must be at least 3 characters long. Leading and trailing whitespaces |
| will be stripped. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| description (`str`, *optional*): |
| An optional description for the Pull Request. |
| Defaults to `"Discussion opened with the huggingface_hub Python library"` |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| |
| Returns: [`DiscussionWithDetails`] |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the HuggingFace API returned an error |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if some parameter value is invalid |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access.""" |
| return self.create_discussion( |
| repo_id=repo_id, |
| title=title, |
| token=token, |
| description=description, |
| repo_type=repo_type, |
| pull_request=True, |
| ) |
|
|
| def _post_discussion_changes( |
| self, |
| *, |
| repo_id: str, |
| discussion_num: int, |
| resource: str, |
| body: Optional[dict] = None, |
| token: Union[bool, str, None] = None, |
| repo_type: Optional[str] = None, |
| ) -> httpx.Response: |
| """Internal utility to POST changes to a Discussion or Pull Request""" |
| if not isinstance(discussion_num, int) or discussion_num <= 0: |
| raise ValueError("Invalid discussion_num, must be a positive integer") |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
| repo_id = f"{repo_type}s/{repo_id}" |
|
|
| path = f"{self.endpoint}/api/{repo_id}/discussions/{discussion_num}/{resource}" |
|
|
| headers = self._build_hf_headers(token=token) |
| resp = get_session().post(path, headers=headers, json=body) |
| hf_raise_for_status(resp) |
| return resp |
|
|
| @validate_hf_hub_args |
| def comment_discussion( |
| self, |
| repo_id: str, |
| discussion_num: int, |
| comment: str, |
| *, |
| token: Union[bool, str, None] = None, |
| repo_type: Optional[str] = None, |
| ) -> DiscussionComment: |
| """Creates a new comment on the given Discussion. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| discussion_num (`int`): |
| The number of the Discussion or Pull Request . Must be a strictly positive integer. |
| comment (`str`): |
| The content of the comment to create. Comments support markdown formatting. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`DiscussionComment`]: the newly created comment |
| |
| |
| Examples: |
| ```python |
| |
| >>> comment = \"\"\" |
| ... Hello @otheruser! |
| ... |
| ... # This is a title |
| ... |
| ... **This is bold**, *this is italic* and ~this is strikethrough~ |
| ... And [this](http://url) is a link |
| ... \"\"\" |
| |
| >>> HfApi().comment_discussion( |
| ... repo_id="username/repo_name", |
| ... discussion_num=34 |
| ... comment=comment |
| ... ) |
| # DiscussionComment(id='deadbeef0000000', type='comment', ...) |
| |
| ``` |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the HuggingFace API returned an error |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if some parameter value is invalid |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| """ |
| resp = self._post_discussion_changes( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| discussion_num=discussion_num, |
| token=token, |
| resource="comment", |
| body={"comment": comment}, |
| ) |
| return deserialize_event(resp.json()["newMessage"]) |
|
|
| @validate_hf_hub_args |
| def rename_discussion( |
| self, |
| repo_id: str, |
| discussion_num: int, |
| new_title: str, |
| *, |
| token: Union[bool, str, None] = None, |
| repo_type: Optional[str] = None, |
| ) -> DiscussionTitleChange: |
| """Renames a Discussion. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| discussion_num (`int`): |
| The number of the Discussion or Pull Request . Must be a strictly positive integer. |
| new_title (`str`): |
| The new title for the discussion |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`DiscussionTitleChange`]: the title change event |
| |
| |
| Examples: |
| ```python |
| >>> new_title = "New title, fixing a typo" |
| >>> HfApi().rename_discussion( |
| ... repo_id="username/repo_name", |
| ... discussion_num=34 |
| ... new_title=new_title |
| ... ) |
| # DiscussionTitleChange(id='deadbeef0000000', type='title-change', ...) |
| |
| ``` |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the HuggingFace API returned an error |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if some parameter value is invalid |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| """ |
| resp = self._post_discussion_changes( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| discussion_num=discussion_num, |
| token=token, |
| resource="title", |
| body={"title": new_title}, |
| ) |
| return deserialize_event(resp.json()["newTitle"]) |
|
|
| @validate_hf_hub_args |
| def change_discussion_status( |
| self, |
| repo_id: str, |
| discussion_num: int, |
| new_status: Literal["open", "closed"], |
| *, |
| token: Union[bool, str, None] = None, |
| comment: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| ) -> DiscussionStatusChange: |
| """Closes or re-opens a Discussion or Pull Request. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| discussion_num (`int`): |
| The number of the Discussion or Pull Request . Must be a strictly positive integer. |
| new_status (`str`): |
| The new status for the discussion, either `"open"` or `"closed"`. |
| comment (`str`, *optional*): |
| An optional comment to post with the status change. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`DiscussionStatusChange`]: the status change event |
| |
| |
| Examples: |
| ```python |
| >>> new_title = "New title, fixing a typo" |
| >>> HfApi().rename_discussion( |
| ... repo_id="username/repo_name", |
| ... discussion_num=34 |
| ... new_title=new_title |
| ... ) |
| # DiscussionStatusChange(id='deadbeef0000000', type='status-change', ...) |
| |
| ``` |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the HuggingFace API returned an error |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if some parameter value is invalid |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| """ |
| if new_status not in ["open", "closed"]: |
| raise ValueError("Invalid status, valid statuses are: 'open' and 'closed'") |
| body: dict[str, str] = {"status": new_status} |
| if comment and comment.strip(): |
| body["comment"] = comment.strip() |
| resp = self._post_discussion_changes( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| discussion_num=discussion_num, |
| token=token, |
| resource="status", |
| body=body, |
| ) |
| return deserialize_event(resp.json()["newStatus"]) |
|
|
| @validate_hf_hub_args |
| def merge_pull_request( |
| self, |
| repo_id: str, |
| discussion_num: int, |
| *, |
| token: Union[bool, str, None] = None, |
| comment: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| ): |
| """Merges a Pull Request. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| discussion_num (`int`): |
| The number of the Discussion or Pull Request . Must be a strictly positive integer. |
| comment (`str`, *optional*): |
| An optional comment to post with the status change. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`DiscussionStatusChange`]: the status change event |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the HuggingFace API returned an error |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if some parameter value is invalid |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| """ |
| self._post_discussion_changes( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| discussion_num=discussion_num, |
| token=token, |
| resource="merge", |
| body={"comment": comment.strip()} if comment and comment.strip() else None, |
| ) |
|
|
| @validate_hf_hub_args |
| def edit_discussion_comment( |
| self, |
| repo_id: str, |
| discussion_num: int, |
| comment_id: str, |
| new_content: str, |
| *, |
| token: Union[bool, str, None] = None, |
| repo_type: Optional[str] = None, |
| ) -> DiscussionComment: |
| """Edits a comment on a Discussion / Pull Request. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| discussion_num (`int`): |
| The number of the Discussion or Pull Request . Must be a strictly positive integer. |
| comment_id (`str`): |
| The ID of the comment to edit. |
| new_content (`str`): |
| The new content of the comment. Comments support markdown formatting. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`DiscussionComment`]: the edited comment |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the HuggingFace API returned an error |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if some parameter value is invalid |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| """ |
| resp = self._post_discussion_changes( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| discussion_num=discussion_num, |
| token=token, |
| resource=f"comment/{comment_id.lower()}/edit", |
| body={"content": new_content}, |
| ) |
| return deserialize_event(resp.json()["updatedComment"]) |
|
|
| @validate_hf_hub_args |
| def hide_discussion_comment( |
| self, |
| repo_id: str, |
| discussion_num: int, |
| comment_id: str, |
| *, |
| token: Union[bool, str, None] = None, |
| repo_type: Optional[str] = None, |
| ) -> DiscussionComment: |
| """Hides a comment on a Discussion / Pull Request. |
| |
| > [!WARNING] |
| > Hidden comments' content cannot be retrieved anymore. Hiding a comment is irreversible. |
| |
| Args: |
| repo_id (`str`): |
| A namespace (user or an organization) and a repo name separated |
| by a `/`. |
| discussion_num (`int`): |
| The number of the Discussion or Pull Request . Must be a strictly positive integer. |
| comment_id (`str`): |
| The ID of the comment to edit. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if uploading to a dataset or |
| space, `None` or `"model"` if uploading to a model. Default is |
| `None`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`DiscussionComment`]: the hidden comment |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the HuggingFace API returned an error |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if some parameter value is invalid |
| > - [`~utils.RepositoryNotFoundError`] |
| > If the repository to download from cannot be found. This may be because it doesn't exist, |
| > or because it is set to `private` and you do not have access. |
| """ |
| warnings.warn( |
| "Hidden comments' content cannot be retrieved anymore. Hiding a comment is irreversible.", |
| UserWarning, |
| ) |
| resp = self._post_discussion_changes( |
| repo_id=repo_id, |
| repo_type=repo_type, |
| discussion_num=discussion_num, |
| token=token, |
| resource=f"comment/{comment_id.lower()}/hide", |
| ) |
| return deserialize_event(resp.json()["updatedComment"]) |
|
|
| @validate_hf_hub_args |
| def add_space_secret( |
| self, |
| repo_id: str, |
| key: str, |
| value: str, |
| *, |
| description: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> None: |
| """Adds or updates a secret in a Space. |
| |
| Secrets allow to set secret keys or tokens to a Space without hardcoding them. |
| For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets. |
| |
| Args: |
| repo_id (`str`): |
| ID of the repo to update. Example: `"bigcode/in-the-stack"`. |
| key (`str`): |
| Secret key. Example: `"GITHUB_API_KEY"` |
| value (`str`): |
| Secret value. Example: `"your_github_api_key"`. |
| description (`str`, *optional*): |
| Secret description. Example: `"Github API key to access the Github API"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| """ |
| payload = {"key": key, "value": value} |
| if description is not None: |
| payload["description"] = description |
| r = get_session().post( |
| f"{self.endpoint}/api/spaces/{repo_id}/secrets", |
| headers=self._build_hf_headers(token=token), |
| json=payload, |
| ) |
| hf_raise_for_status(r) |
|
|
| @validate_hf_hub_args |
| def delete_space_secret(self, repo_id: str, key: str, *, token: Union[bool, str, None] = None) -> None: |
| """Deletes a secret from a Space. |
| |
| Secrets allow to set secret keys or tokens to a Space without hardcoding them. |
| For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets. |
| |
| Args: |
| repo_id (`str`): |
| ID of the repo to update. Example: `"bigcode/in-the-stack"`. |
| key (`str`): |
| Secret key. Example: `"GITHUB_API_KEY"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| """ |
| r = get_session().request( |
| "DELETE", |
| f"{self.endpoint}/api/spaces/{repo_id}/secrets", |
| headers=self._build_hf_headers(token=token), |
| json={"key": key}, |
| ) |
| hf_raise_for_status(r) |
|
|
| @validate_hf_hub_args |
| def get_space_variables(self, repo_id: str, *, token: Union[bool, str, None] = None) -> dict[str, SpaceVariable]: |
| """Gets all variables from a Space. |
| |
| Variables allow to set environment variables to a Space without hardcoding them. |
| For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables |
| |
| Args: |
| repo_id (`str`): |
| ID of the repo to query. Example: `"bigcode/in-the-stack"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| """ |
| r = get_session().get( |
| f"{self.endpoint}/api/spaces/{repo_id}/variables", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(r) |
| return {k: SpaceVariable(k, v) for k, v in r.json().items()} |
|
|
| @validate_hf_hub_args |
| def add_space_variable( |
| self, |
| repo_id: str, |
| key: str, |
| value: str, |
| *, |
| description: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> dict[str, SpaceVariable]: |
| """Adds or updates a variable in a Space. |
| |
| Variables allow to set environment variables to a Space without hardcoding them. |
| For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables |
| |
| Args: |
| repo_id (`str`): |
| ID of the repo to update. Example: `"bigcode/in-the-stack"`. |
| key (`str`): |
| Variable key. Example: `"MODEL_REPO_ID"` |
| value (`str`): |
| Variable value. Example: `"the_model_repo_id"`. |
| description (`str`): |
| Description of the variable. Example: `"Model Repo ID of the implemented model"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| """ |
| payload = {"key": key, "value": value} |
| if description is not None: |
| payload["description"] = description |
| r = get_session().post( |
| f"{self.endpoint}/api/spaces/{repo_id}/variables", |
| headers=self._build_hf_headers(token=token), |
| json=payload, |
| ) |
| hf_raise_for_status(r) |
| return {k: SpaceVariable(k, v) for k, v in r.json().items()} |
|
|
| @validate_hf_hub_args |
| def delete_space_variable( |
| self, repo_id: str, key: str, *, token: Union[bool, str, None] = None |
| ) -> dict[str, SpaceVariable]: |
| """Deletes a variable from a Space. |
| |
| Variables allow to set environment variables to a Space without hardcoding them. |
| For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables |
| |
| Args: |
| repo_id (`str`): |
| ID of the repo to update. Example: `"bigcode/in-the-stack"`. |
| key (`str`): |
| Variable key. Example: `"MODEL_REPO_ID"` |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| """ |
| r = get_session().request( |
| "DELETE", |
| f"{self.endpoint}/api/spaces/{repo_id}/variables", |
| headers=self._build_hf_headers(token=token), |
| json={"key": key}, |
| ) |
| hf_raise_for_status(r) |
| return {k: SpaceVariable(k, v) for k, v in r.json().items()} |
|
|
| @validate_hf_hub_args |
| def get_space_runtime(self, repo_id: str, *, token: Union[bool, str, None] = None) -> SpaceRuntime: |
| """Gets runtime information about a Space. |
| |
| Args: |
| repo_id (`str`): |
| ID of the repo to update. Example: `"bigcode/in-the-stack"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| Returns: |
| [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. |
| """ |
| r = get_session().get( |
| f"{self.endpoint}/api/spaces/{repo_id}/runtime", headers=self._build_hf_headers(token=token) |
| ) |
| hf_raise_for_status(r) |
| return SpaceRuntime(r.json()) |
|
|
| @validate_hf_hub_args |
| def request_space_hardware( |
| self, |
| repo_id: str, |
| hardware: SpaceHardware, |
| *, |
| token: Union[bool, str, None] = None, |
| sleep_time: Optional[int] = None, |
| ) -> SpaceRuntime: |
| """Request new hardware for a Space. |
| |
| Args: |
| repo_id (`str`): |
| ID of the repo to update. Example: `"bigcode/in-the-stack"`. |
| hardware (`str` or [`SpaceHardware`]): |
| Hardware on which to run the Space. Example: `"t4-medium"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| sleep_time (`int`, *optional*): |
| Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want |
| your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure |
| the sleep time (value is fixed to 48 hours of inactivity). |
| See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details. |
| Returns: |
| [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. |
| |
| > [!TIP] |
| > It is also possible to request hardware directly when creating the Space repo! See [`create_repo`] for details. |
| """ |
| if sleep_time is not None and hardware == SpaceHardware.CPU_BASIC: |
| warnings.warn( |
| "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more" |
| " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if" |
| " you want to set a custom sleep time, you need to upgrade to a paid Hardware.", |
| UserWarning, |
| ) |
| payload: dict[str, Any] = {"flavor": hardware} |
| if sleep_time is not None: |
| payload["sleepTimeSeconds"] = sleep_time |
| r = get_session().post( |
| f"{self.endpoint}/api/spaces/{repo_id}/hardware", |
| headers=self._build_hf_headers(token=token), |
| json=payload, |
| ) |
| hf_raise_for_status(r) |
| return SpaceRuntime(r.json()) |
|
|
| @validate_hf_hub_args |
| def set_space_sleep_time( |
| self, repo_id: str, sleep_time: int, *, token: Union[bool, str, None] = None |
| ) -> SpaceRuntime: |
| """Set a custom sleep time for a Space running on upgraded hardware.. |
| |
| Your Space will go to sleep after X seconds of inactivity. You are not billed when your Space is in "sleep" |
| mode. If a new visitor lands on your Space, it will "wake it up". Only upgraded hardware can have a |
| configurable sleep time. To know more about the sleep stage, please refer to |
| https://huggingface.co/docs/hub/spaces-gpus#sleep-time. |
| |
| Args: |
| repo_id (`str`): |
| ID of the repo to update. Example: `"bigcode/in-the-stack"`. |
| sleep_time (`int`, *optional*): |
| Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want |
| your Space to pause (default behavior for upgraded hardware). For free hardware, you can't configure |
| the sleep time (value is fixed to 48 hours of inactivity). |
| See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| Returns: |
| [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. |
| |
| > [!TIP] |
| > It is also possible to set a custom sleep time when requesting hardware with [`request_space_hardware`]. |
| """ |
| r = get_session().post( |
| f"{self.endpoint}/api/spaces/{repo_id}/sleeptime", |
| headers=self._build_hf_headers(token=token), |
| json={"seconds": sleep_time}, |
| ) |
| hf_raise_for_status(r) |
| runtime = SpaceRuntime(r.json()) |
|
|
| hardware = runtime.requested_hardware or runtime.hardware |
| if hardware == SpaceHardware.CPU_BASIC: |
| warnings.warn( |
| "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more" |
| " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if" |
| " you want to set a custom sleep time, you need to upgrade to a paid Hardware.", |
| UserWarning, |
| ) |
| return runtime |
|
|
| @validate_hf_hub_args |
| def pause_space(self, repo_id: str, *, token: Union[bool, str, None] = None) -> SpaceRuntime: |
| """Pause your Space. |
| |
| A paused Space stops executing until manually restarted by its owner. This is different from the sleeping |
| state in which free Spaces go after 48h of inactivity. Paused time is not billed to your account, no matter the |
| hardware you've selected. To restart your Space, use [`restart_space`] and go to your Space settings page. |
| |
| For more details, please visit [the docs](https://huggingface.co/docs/hub/spaces-gpus#pause). |
| |
| Args: |
| repo_id (`str`): |
| ID of the Space to pause. Example: `"Salesforce/BLIP2"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`SpaceRuntime`]: Runtime information about your Space including `stage=PAUSED` and requested hardware. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you |
| are not authenticated. |
| [`~utils.HfHubHTTPError`]: |
| 403 Forbidden: only the owner of a Space can pause it. If you want to manage a Space that you don't |
| own, either ask the owner by opening a Discussion or duplicate the Space. |
| [`~utils.BadRequestError`]: |
| If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide |
| a static Space, you can set it to private. |
| """ |
| r = get_session().post( |
| f"{self.endpoint}/api/spaces/{repo_id}/pause", headers=self._build_hf_headers(token=token) |
| ) |
| hf_raise_for_status(r) |
| return SpaceRuntime(r.json()) |
|
|
| @validate_hf_hub_args |
| def restart_space( |
| self, repo_id: str, *, token: Union[bool, str, None] = None, factory_reboot: bool = False |
| ) -> SpaceRuntime: |
| """Restart your Space. |
| |
| This is the only way to programmatically restart a Space if you've put it on Pause (see [`pause_space`]). You |
| must be the owner of the Space to restart it. If you are using an upgraded hardware, your account will be |
| billed as soon as the Space is restarted. You can trigger a restart no matter the current state of a Space. |
| |
| For more details, please visit [the docs](https://huggingface.co/docs/hub/spaces-gpus#pause). |
| |
| Args: |
| repo_id (`str`): |
| ID of the Space to restart. Example: `"Salesforce/BLIP2"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| factory_reboot (`bool`, *optional*): |
| If `True`, the Space will be rebuilt from scratch without caching any requirements. |
| |
| Returns: |
| [`SpaceRuntime`]: Runtime information about your Space. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If your Space is not found (error 404). Most probably wrong repo_id or your space is private but you |
| are not authenticated. |
| [`~utils.HfHubHTTPError`]: |
| 403 Forbidden: only the owner of a Space can restart it. If you want to restart a Space that you don't |
| own, either ask the owner by opening a Discussion or duplicate the Space. |
| [`~utils.BadRequestError`]: |
| If your Space is a static Space. Static Spaces are always running and never billed. If you want to hide |
| a static Space, you can set it to private. |
| """ |
| params = {} |
| if factory_reboot: |
| params["factory"] = "true" |
| r = get_session().post( |
| f"{self.endpoint}/api/spaces/{repo_id}/restart", headers=self._build_hf_headers(token=token), params=params |
| ) |
| hf_raise_for_status(r) |
| return SpaceRuntime(r.json()) |
|
|
| @validate_hf_hub_args |
| def duplicate_space( |
| self, |
| from_id: str, |
| to_id: Optional[str] = None, |
| *, |
| private: Optional[bool] = None, |
| token: Union[bool, str, None] = None, |
| exist_ok: bool = False, |
| hardware: Optional[SpaceHardware] = None, |
| storage: Optional[SpaceStorage] = None, |
| sleep_time: Optional[int] = None, |
| secrets: Optional[list[dict[str, str]]] = None, |
| variables: Optional[list[dict[str, str]]] = None, |
| ) -> RepoUrl: |
| """Duplicate a Space. |
| |
| Programmatically duplicate a Space. The new Space will be created in your account and will be in the same state |
| as the original Space (running or paused). You can duplicate a Space no matter the current state of a Space. |
| |
| Args: |
| from_id (`str`): |
| ID of the Space to duplicate. Example: `"pharma/CLIP-Interrogator"`. |
| to_id (`str`, *optional*): |
| ID of the new Space. Example: `"dog/CLIP-Interrogator"`. If not provided, the new Space will have the same |
| name as the original Space, but in your account. |
| private (`bool`, *optional*): |
| Whether the new Space should be private or not. Defaults to the same privacy as the original Space. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| exist_ok (`bool`, *optional*, defaults to `False`): |
| If `True`, do not raise an error if repo already exists. |
| hardware (`SpaceHardware` or `str`, *optional*): |
| Choice of Hardware. Example: `"t4-medium"`. See [`SpaceHardware`] for a complete list. |
| storage (`SpaceStorage` or `str`, *optional*): |
| Choice of persistent storage tier. Example: `"small"`. See [`SpaceStorage`] for a complete list. |
| sleep_time (`int`, *optional*): |
| Number of seconds of inactivity to wait before a Space is put to sleep. Set to `-1` if you don't want |
| your Space to sleep (default behavior for upgraded hardware). For free hardware, you can't configure |
| the sleep time (value is fixed to 48 hours of inactivity). |
| See https://huggingface.co/docs/hub/spaces-gpus#sleep-time for more details. |
| secrets (`list[dict[str, str]]`, *optional*): |
| A list of secret keys to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional. |
| For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets. |
| variables (`list[dict[str, str]]`, *optional*): |
| A list of public environment variables to set in your Space. Each item is in the form `{"key": ..., "value": ..., "description": ...}` where description is optional. |
| For more details, see https://huggingface.co/docs/hub/spaces-overview#managing-secrets-and-environment-variables. |
| |
| Returns: |
| [`RepoUrl`]: URL to the newly created repo. Value is a subclass of `str` containing |
| attributes like `endpoint`, `repo_type` and `repo_id`. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| If one of `from_id` or `to_id` cannot be found. This may be because it doesn't exist, |
| or because it is set to `private` and you do not have access. |
| [`HfHubHTTPError`]: |
| If the HuggingFace API returned an error |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import duplicate_space |
| |
| # Duplicate a Space to your account |
| >>> duplicate_space("multimodalart/dreambooth-training") |
| RepoUrl('https://huggingface.co/spaces/nateraw/dreambooth-training',...) |
| |
| # Can set custom destination id and visibility flag. |
| >>> duplicate_space("multimodalart/dreambooth-training", to_id="my-dreambooth", private=True) |
| RepoUrl('https://huggingface.co/spaces/nateraw/my-dreambooth',...) |
| ``` |
| """ |
| |
| parsed_to_id = RepoUrl(to_id) if to_id is not None else None |
|
|
| |
| to_namespace = ( |
| parsed_to_id.namespace |
| if parsed_to_id is not None and parsed_to_id.namespace is not None |
| else self.whoami(token)["name"] |
| ) |
| to_repo_name = parsed_to_id.repo_name if to_id is not None else RepoUrl(from_id).repo_name |
|
|
| |
| payload: dict[str, Any] = {"repository": f"{to_namespace}/{to_repo_name}"} |
|
|
| keys = ["private", "hardware", "storageTier", "sleepTimeSeconds", "secrets", "variables"] |
| values = [private, hardware, storage, sleep_time, secrets, variables] |
| payload.update({k: v for k, v in zip(keys, values) if v is not None}) |
|
|
| if sleep_time is not None and hardware == SpaceHardware.CPU_BASIC: |
| warnings.warn( |
| "If your Space runs on the default 'cpu-basic' hardware, it will go to sleep if inactive for more" |
| " than 48 hours. This value is not configurable. If you don't want your Space to deactivate or if" |
| " you want to set a custom sleep time, you need to upgrade to a paid Hardware.", |
| UserWarning, |
| ) |
|
|
| r = get_session().post( |
| f"{self.endpoint}/api/spaces/{from_id}/duplicate", |
| headers=self._build_hf_headers(token=token), |
| json=payload, |
| ) |
|
|
| try: |
| hf_raise_for_status(r) |
| except HfHubHTTPError as err: |
| if exist_ok and err.response.status_code == 409: |
| |
| pass |
| else: |
| raise |
|
|
| return RepoUrl(r.json()["url"], endpoint=self.endpoint) |
|
|
| @validate_hf_hub_args |
| def request_space_storage( |
| self, |
| repo_id: str, |
| storage: SpaceStorage, |
| *, |
| token: Union[bool, str, None] = None, |
| ) -> SpaceRuntime: |
| """Request persistent storage for a Space. |
| |
| Args: |
| repo_id (`str`): |
| ID of the Space to update. Example: `"open-llm-leaderboard/open_llm_leaderboard"`. |
| storage (`str` or [`SpaceStorage`]): |
| Storage tier. Either 'small', 'medium', or 'large'. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| Returns: |
| [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. |
| |
| > [!TIP] |
| > It is not possible to decrease persistent storage after its granted. To do so, you must delete it |
| > via [`delete_space_storage`]. |
| """ |
| payload: dict[str, SpaceStorage] = {"tier": storage} |
| r = get_session().post( |
| f"{self.endpoint}/api/spaces/{repo_id}/storage", |
| headers=self._build_hf_headers(token=token), |
| json=payload, |
| ) |
| hf_raise_for_status(r) |
| return SpaceRuntime(r.json()) |
|
|
| @validate_hf_hub_args |
| def delete_space_storage( |
| self, |
| repo_id: str, |
| *, |
| token: Union[bool, str, None] = None, |
| ) -> SpaceRuntime: |
| """Delete persistent storage for a Space. |
| |
| Args: |
| repo_id (`str`): |
| ID of the Space to update. Example: `"open-llm-leaderboard/open_llm_leaderboard"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| Returns: |
| [`SpaceRuntime`]: Runtime information about a Space including Space stage and hardware. |
| Raises: |
| [`BadRequestError`] |
| If space has no persistent storage. |
| |
| """ |
| r = get_session().delete( |
| f"{self.endpoint}/api/spaces/{repo_id}/storage", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(r) |
| return SpaceRuntime(r.json()) |
|
|
| |
| |
| |
|
|
| def list_inference_endpoints( |
| self, namespace: Optional[str] = None, *, token: Union[bool, str, None] = None |
| ) -> list[InferenceEndpoint]: |
| """Lists all inference endpoints for the given namespace. |
| |
| Args: |
| namespace (`str`, *optional*): |
| The namespace to list endpoints for. Defaults to the current user. Set to `"*"` to list all endpoints |
| from all namespaces (i.e. personal namespace and all orgs the user belongs to). |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| list[`InferenceEndpoint`]: A list of all inference endpoints for the given namespace. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import HfApi |
| >>> api = HfApi() |
| >>> api.list_inference_endpoints() |
| [InferenceEndpoint(name='my-endpoint', ...), ...] |
| ``` |
| """ |
| |
| if namespace == "*": |
| user = self.whoami(token=token) |
|
|
| |
| endpoints: list[InferenceEndpoint] = list_inference_endpoints(namespace=self._get_namespace(token=token)) |
|
|
| |
| for org in user.get("orgs", []): |
| try: |
| endpoints += list_inference_endpoints(namespace=org["name"], token=token) |
| except HfHubHTTPError as error: |
| if error.response.status_code == 401: |
| logger.debug("Cannot list Inference Endpoints for org '%s': %s", org["name"], error) |
| pass |
|
|
| return endpoints |
|
|
| |
| namespace = namespace or self._get_namespace(token=token) |
|
|
| response = get_session().get( |
| f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
|
|
| return [ |
| InferenceEndpoint.from_raw(endpoint, namespace=namespace, token=token) |
| for endpoint in response.json()["items"] |
| ] |
|
|
| def create_inference_endpoint( |
| self, |
| name: str, |
| *, |
| repository: str, |
| framework: str, |
| accelerator: str, |
| instance_size: str, |
| instance_type: str, |
| region: str, |
| vendor: str, |
| account_id: Optional[str] = None, |
| min_replica: int = 1, |
| max_replica: int = 1, |
| scaling_metric: Optional[InferenceEndpointScalingMetric] = None, |
| scaling_threshold: Optional[float] = None, |
| scale_to_zero_timeout: Optional[int] = None, |
| revision: Optional[str] = None, |
| task: Optional[str] = None, |
| custom_image: Optional[dict] = None, |
| env: Optional[dict[str, str]] = None, |
| secrets: Optional[dict[str, str]] = None, |
| type: InferenceEndpointType = InferenceEndpointType.PROTECTED, |
| domain: Optional[str] = None, |
| path: Optional[str] = None, |
| cache_http_responses: Optional[bool] = None, |
| tags: Optional[list[str]] = None, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> InferenceEndpoint: |
| """Create a new Inference Endpoint. |
| |
| Args: |
| name (`str`): |
| The unique name for the new Inference Endpoint. |
| repository (`str`): |
| The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`). |
| framework (`str`): |
| The machine learning framework used for the model (e.g. `"custom"`). |
| accelerator (`str`): |
| The hardware accelerator to be used for inference (e.g. `"cpu"`). |
| instance_size (`str`): |
| The size or type of the instance to be used for hosting the model (e.g. `"x4"`). |
| instance_type (`str`): |
| The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`). |
| region (`str`): |
| The cloud region in which the Inference Endpoint will be created (e.g. `"us-east-1"`). |
| vendor (`str`): |
| The cloud provider or vendor where the Inference Endpoint will be hosted (e.g. `"aws"`). |
| account_id (`str`, *optional*): |
| The account ID used to link a VPC to a private Inference Endpoint (if applicable). |
| min_replica (`int`, *optional*): |
| The minimum number of replicas (instances) to keep running for the Inference Endpoint. To enable |
| scaling to zero, set this value to 0 and adjust `scale_to_zero_timeout` accordingly. Defaults to 1. |
| max_replica (`int`, *optional*): |
| The maximum number of replicas (instances) to scale to for the Inference Endpoint. Defaults to 1. |
| scaling_metric (`str` or [`InferenceEndpointScalingMetric `], *optional*): |
| The metric reference for scaling. Either "pendingRequests" or "hardwareUsage" when provided. Defaults to |
| None (meaning: let the HF Endpoints service specify the metric). |
| scaling_threshold (`float`, *optional*): |
| The scaling metric threshold used to trigger a scale up. Ignored when scaling metric is not provided. |
| Defaults to None (meaning: let the HF Endpoints service specify the threshold). |
| scale_to_zero_timeout (`int`, *optional*): |
| The duration in minutes before an inactive endpoint is scaled to zero, or no scaling to zero if |
| set to None and `min_replica` is not 0. Defaults to None. |
| revision (`str`, *optional*): |
| The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`). |
| task (`str`, *optional*): |
| The task on which to deploy the model (e.g. `"text-classification"`). |
| custom_image (`dict`, *optional*): |
| A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an |
| Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples). |
| env (`dict[str, str]`, *optional*): |
| Non-secret environment variables to inject in the container environment. |
| secrets (`dict[str, str]`, *optional*): |
| Secret values to inject in the container environment. |
| type ([`InferenceEndpointType]`, *optional*): |
| The type of the Inference Endpoint, which can be `"protected"` (default), `"public"` or `"private"`. |
| domain (`str`, *optional*): |
| The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`). |
| path (`str`, *optional*): |
| The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`). |
| cache_http_responses (`bool`, *optional*): |
| Whether to cache HTTP responses from the Inference Endpoint. Defaults to `False`. |
| tags (`list[str]`, *optional*): |
| A list of tags to associate with the Inference Endpoint. |
| namespace (`str`, *optional*): |
| The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`InferenceEndpoint`]: information about the updated Inference Endpoint. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import HfApi |
| >>> api = HfApi() |
| >>> endpoint = api.create_inference_endpoint( |
| ... "my-endpoint-name", |
| ... repository="gpt2", |
| ... framework="pytorch", |
| ... task="text-generation", |
| ... accelerator="cpu", |
| ... vendor="aws", |
| ... region="us-east-1", |
| ... type="protected", |
| ... instance_size="x2", |
| ... instance_type="intel-icl", |
| ... ) |
| >>> endpoint |
| InferenceEndpoint(name='my-endpoint-name', status="pending",...) |
| |
| # Run inference on the endpoint |
| >>> endpoint.client.text_generation(...) |
| "..." |
| ``` |
| |
| ```python |
| # Start an Inference Endpoint running Zephyr-7b-beta on TGI |
| >>> from huggingface_hub import HfApi |
| >>> api = HfApi() |
| >>> endpoint = api.create_inference_endpoint( |
| ... "aws-zephyr-7b-beta-0486", |
| ... repository="HuggingFaceH4/zephyr-7b-beta", |
| ... framework="pytorch", |
| ... task="text-generation", |
| ... accelerator="gpu", |
| ... vendor="aws", |
| ... region="us-east-1", |
| ... type="protected", |
| ... instance_size="x1", |
| ... instance_type="nvidia-a10g", |
| ... env={ |
| ... "MAX_BATCH_PREFILL_TOKENS": "2048", |
| ... "MAX_INPUT_LENGTH": "1024", |
| ... "MAX_TOTAL_TOKENS": "1512", |
| ... "MODEL_ID": "/repository" |
| ... }, |
| ... custom_image={ |
| ... "health_route": "/health", |
| ... "url": "ghcr.io/huggingface/text-generation-inference:1.1.0", |
| ... }, |
| ... secrets={"MY_SECRET_KEY": "secret_value"}, |
| ... tags=["dev", "text-generation"], |
| ... ) |
| ``` |
| |
| ```python |
| # Start an Inference Endpoint running ProsusAI/finbert while scaling to zero in 15 minutes |
| >>> from huggingface_hub import HfApi |
| >>> api = HfApi() |
| >>> endpoint = api.create_inference_endpoint( |
| ... "finbert-classifier", |
| ... repository="ProsusAI/finbert", |
| ... framework="pytorch", |
| ... task="text-classification", |
| ... min_replica=0, |
| ... scale_to_zero_timeout=15, |
| ... accelerator="cpu", |
| ... vendor="aws", |
| ... region="us-east-1", |
| ... type="protected", |
| ... instance_size="x2", |
| ... instance_type="intel-icl", |
| ... ) |
| >>> endpoint.wait(timeout=300) |
| # Run inference on the endpoint |
| >>> endpoint.client.text_generation(...) |
| TextClassificationOutputElement(label='positive', score=0.8983615040779114) |
| ``` |
| |
| """ |
| namespace = namespace or self._get_namespace(token=token) |
|
|
| if custom_image is not None: |
| image = ( |
| custom_image |
| if next(iter(custom_image)) in constants.INFERENCE_ENDPOINT_IMAGE_KEYS |
| else {"custom": custom_image} |
| ) |
| else: |
| image = {"huggingface": {}} |
|
|
| payload: dict = { |
| "accountId": account_id, |
| "compute": { |
| "accelerator": accelerator, |
| "instanceSize": instance_size, |
| "instanceType": instance_type, |
| "scaling": { |
| "maxReplica": max_replica, |
| "minReplica": min_replica, |
| "scaleToZeroTimeout": scale_to_zero_timeout, |
| }, |
| }, |
| "model": { |
| "framework": framework, |
| "repository": repository, |
| "revision": revision, |
| "task": task, |
| "image": image, |
| }, |
| "name": name, |
| "provider": { |
| "region": region, |
| "vendor": vendor, |
| }, |
| "type": type, |
| } |
| if scaling_metric: |
| payload["compute"]["scaling"]["measure"] = {scaling_metric: scaling_threshold} |
| if env: |
| payload["model"]["env"] = env |
| if secrets: |
| payload["model"]["secrets"] = secrets |
| if domain is not None or path is not None: |
| payload["route"] = {} |
| if domain is not None: |
| payload["route"]["domain"] = domain |
| if path is not None: |
| payload["route"]["path"] = path |
| if cache_http_responses is not None: |
| payload["cacheHttpResponses"] = cache_http_responses |
| if tags is not None: |
| payload["tags"] = tags |
|
|
| response = get_session().post( |
| f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}", |
| headers=self._build_hf_headers(token=token), |
| json=payload, |
| ) |
| hf_raise_for_status(response) |
|
|
| return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) |
|
|
| @experimental |
| @validate_hf_hub_args |
| def create_inference_endpoint_from_catalog( |
| self, |
| repo_id: str, |
| *, |
| name: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| namespace: Optional[str] = None, |
| ) -> InferenceEndpoint: |
| """Create a new Inference Endpoint from a model in the Hugging Face Inference Catalog. |
| |
| The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference |
| and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list |
| of available models in the catalog. |
| |
| Args: |
| repo_id (`str`): |
| The ID of the model in the catalog to deploy as an Inference Endpoint. |
| name (`str`, *optional*): |
| The unique name for the new Inference Endpoint. If not provided, a random name will be generated. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| namespace (`str`, *optional*): |
| The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace. |
| |
| Returns: |
| [`InferenceEndpoint`]: information about the new Inference Endpoint. |
| |
| > [!WARNING] |
| > `create_inference_endpoint_from_catalog` is experimental. Its API is subject to change in the future. Please provide feedback |
| > if you have any suggestions or requests. |
| """ |
| token = token or self.token or get_token() |
| payload: dict = { |
| "namespace": namespace or self._get_namespace(token=token), |
| "repoId": repo_id, |
| } |
| if name is not None: |
| payload["endpointName"] = name |
|
|
| response = get_session().post( |
| f"{constants.INFERENCE_CATALOG_ENDPOINT}/deploy", |
| headers=self._build_hf_headers(token=token), |
| json=payload, |
| ) |
| hf_raise_for_status(response) |
| data = response.json()["endpoint"] |
| return InferenceEndpoint.from_raw(data, namespace=data["name"], token=token) |
|
|
| @experimental |
| @validate_hf_hub_args |
| def list_inference_catalog(self, *, token: Union[bool, str, None] = None) -> list[str]: |
| """List models available in the Hugging Face Inference Catalog. |
| |
| The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference |
| and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list |
| of available models in the catalog. |
| |
| Use [`create_inference_endpoint_from_catalog`] to deploy a model from the catalog. |
| |
| Args: |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| |
| Returns: |
| List[`str`]: A list of model IDs available in the catalog. |
| > [!WARNING] |
| > `list_inference_catalog` is experimental. Its API is subject to change in the future. Please provide feedback |
| > if you have any suggestions or requests. |
| """ |
| response = get_session().get( |
| f"{constants.INFERENCE_CATALOG_ENDPOINT}/repo-list", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
| return response.json()["models"] |
|
|
| def get_inference_endpoint( |
| self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None |
| ) -> InferenceEndpoint: |
| """Get information about an Inference Endpoint. |
| |
| Args: |
| name (`str`): |
| The name of the Inference Endpoint to retrieve information about. |
| namespace (`str`, *optional*): |
| The namespace in which the Inference Endpoint is located. Defaults to the current user. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`InferenceEndpoint`]: information about the requested Inference Endpoint. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import HfApi |
| >>> api = HfApi() |
| >>> endpoint = api.get_inference_endpoint("my-text-to-image") |
| >>> endpoint |
| InferenceEndpoint(name='my-text-to-image', ...) |
| |
| # Get status |
| >>> endpoint.status |
| 'running' |
| >>> endpoint.url |
| 'https://my-text-to-image.region.vendor.endpoints.huggingface.cloud' |
| |
| # Run inference |
| >>> endpoint.client.text_to_image(...) |
| ``` |
| """ |
| namespace = namespace or self._get_namespace(token=token) |
|
|
| response = get_session().get( |
| f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
|
|
| return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) |
|
|
| def update_inference_endpoint( |
| self, |
| name: str, |
| *, |
| |
| accelerator: Optional[str] = None, |
| instance_size: Optional[str] = None, |
| instance_type: Optional[str] = None, |
| min_replica: Optional[int] = None, |
| max_replica: Optional[int] = None, |
| scale_to_zero_timeout: Optional[int] = None, |
| scaling_metric: Optional[InferenceEndpointScalingMetric] = None, |
| scaling_threshold: Optional[float] = None, |
| |
| repository: Optional[str] = None, |
| framework: Optional[str] = None, |
| revision: Optional[str] = None, |
| task: Optional[str] = None, |
| custom_image: Optional[dict] = None, |
| env: Optional[dict[str, str]] = None, |
| secrets: Optional[dict[str, str]] = None, |
| |
| domain: Optional[str] = None, |
| path: Optional[str] = None, |
| |
| cache_http_responses: Optional[bool] = None, |
| tags: Optional[list[str]] = None, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> InferenceEndpoint: |
| """Update an Inference Endpoint. |
| |
| This method allows the update of either the compute configuration, the deployed model, the route, or any combination. |
| All arguments are optional but at least one must be provided. |
| |
| For convenience, you can also update an Inference Endpoint using [`InferenceEndpoint.update`]. |
| |
| Args: |
| name (`str`): |
| The name of the Inference Endpoint to update. |
| |
| accelerator (`str`, *optional*): |
| The hardware accelerator to be used for inference (e.g. `"cpu"`). |
| instance_size (`str`, *optional*): |
| The size or type of the instance to be used for hosting the model (e.g. `"x4"`). |
| instance_type (`str`, *optional*): |
| The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`). |
| min_replica (`int`, *optional*): |
| The minimum number of replicas (instances) to keep running for the Inference Endpoint. |
| max_replica (`int`, *optional*): |
| The maximum number of replicas (instances) to scale to for the Inference Endpoint. |
| scale_to_zero_timeout (`int`, *optional*): |
| The duration in minutes before an inactive endpoint is scaled to zero. |
| scaling_metric (`str` or [`InferenceEndpointScalingMetric `], *optional*): |
| The metric reference for scaling. Either "pendingRequests" or "hardwareUsage" when provided. |
| Defaults to None. |
| scaling_threshold (`float`, *optional*): |
| The scaling metric threshold used to trigger a scale up. Ignored when scaling metric is not provided. |
| Defaults to None. |
| repository (`str`, *optional*): |
| The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`). |
| framework (`str`, *optional*): |
| The machine learning framework used for the model (e.g. `"custom"`). |
| revision (`str`, *optional*): |
| The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`). |
| task (`str`, *optional*): |
| The task on which to deploy the model (e.g. `"text-classification"`). |
| custom_image (`dict`, *optional*): |
| A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an |
| Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples). |
| env (`dict[str, str]`, *optional*): |
| Non-secret environment variables to inject in the container environment |
| secrets (`dict[str, str]`, *optional*): |
| Secret values to inject in the container environment. |
| |
| domain (`str`, *optional*): |
| The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`). |
| path (`str`, *optional*): |
| The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`). |
| |
| cache_http_responses (`bool`, *optional*): |
| Whether to cache HTTP responses from the Inference Endpoint. |
| tags (`list[str]`, *optional*): |
| A list of tags to associate with the Inference Endpoint. |
| |
| namespace (`str`, *optional*): |
| The namespace where the Inference Endpoint will be updated. Defaults to the current user's namespace. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`InferenceEndpoint`]: information about the updated Inference Endpoint. |
| """ |
| namespace = namespace or self._get_namespace(token=token) |
|
|
| |
| payload: dict = defaultdict(lambda: defaultdict(dict)) |
| if accelerator is not None: |
| payload["compute"]["accelerator"] = accelerator |
| if instance_size is not None: |
| payload["compute"]["instanceSize"] = instance_size |
| if instance_type is not None: |
| payload["compute"]["instanceType"] = instance_type |
| if max_replica is not None: |
| payload["compute"]["scaling"]["maxReplica"] = max_replica |
| if min_replica is not None: |
| payload["compute"]["scaling"]["minReplica"] = min_replica |
| if scale_to_zero_timeout is not None: |
| payload["compute"]["scaling"]["scaleToZeroTimeout"] = scale_to_zero_timeout |
| if scaling_metric: |
| payload["compute"]["scaling"]["measure"] = {scaling_metric: scaling_threshold} |
| if repository is not None: |
| payload["model"]["repository"] = repository |
| if framework is not None: |
| payload["model"]["framework"] = framework |
| if revision is not None: |
| payload["model"]["revision"] = revision |
| if task is not None: |
| payload["model"]["task"] = task |
| if custom_image is not None: |
| payload["model"]["image"] = {"custom": custom_image} |
| if env is not None: |
| payload["model"]["env"] = env |
| if secrets is not None: |
| payload["model"]["secrets"] = secrets |
| if domain is not None: |
| payload["route"]["domain"] = domain |
| if path is not None: |
| payload["route"]["path"] = path |
| if cache_http_responses is not None: |
| payload["cacheHttpResponses"] = cache_http_responses |
| if tags is not None: |
| payload["tags"] = tags |
|
|
| response = get_session().put( |
| f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}", |
| headers=self._build_hf_headers(token=token), |
| json=payload, |
| ) |
| hf_raise_for_status(response) |
|
|
| return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) |
|
|
| def delete_inference_endpoint( |
| self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None |
| ) -> None: |
| """Delete an Inference Endpoint. |
| |
| This operation is not reversible. If you don't want to be charged for an Inference Endpoint, it is preferable |
| to pause it with [`pause_inference_endpoint`] or scale it to zero with [`scale_to_zero_inference_endpoint`]. |
| |
| For convenience, you can also delete an Inference Endpoint using [`InferenceEndpoint.delete`]. |
| |
| Args: |
| name (`str`): |
| The name of the Inference Endpoint to delete. |
| namespace (`str`, *optional*): |
| The namespace in which the Inference Endpoint is located. Defaults to the current user. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| """ |
| namespace = namespace or self._get_namespace(token=token) |
| response = get_session().delete( |
| f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
|
|
| def pause_inference_endpoint( |
| self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None |
| ) -> InferenceEndpoint: |
| """Pause an Inference Endpoint. |
| |
| A paused Inference Endpoint will not be charged. It can be resumed at any time using [`resume_inference_endpoint`]. |
| This is different than scaling the Inference Endpoint to zero with [`scale_to_zero_inference_endpoint`], which |
| would be automatically restarted when a request is made to it. |
| |
| For convenience, you can also pause an Inference Endpoint using [`pause_inference_endpoint`]. |
| |
| Args: |
| name (`str`): |
| The name of the Inference Endpoint to pause. |
| namespace (`str`, *optional*): |
| The namespace in which the Inference Endpoint is located. Defaults to the current user. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`InferenceEndpoint`]: information about the paused Inference Endpoint. |
| """ |
| namespace = namespace or self._get_namespace(token=token) |
|
|
| response = get_session().post( |
| f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/pause", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
|
|
| return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) |
|
|
| def resume_inference_endpoint( |
| self, |
| name: str, |
| *, |
| namespace: Optional[str] = None, |
| running_ok: bool = True, |
| token: Union[bool, str, None] = None, |
| ) -> InferenceEndpoint: |
| """Resume an Inference Endpoint. |
| |
| For convenience, you can also resume an Inference Endpoint using [`InferenceEndpoint.resume`]. |
| |
| Args: |
| name (`str`): |
| The name of the Inference Endpoint to resume. |
| namespace (`str`, *optional*): |
| The namespace in which the Inference Endpoint is located. Defaults to the current user. |
| running_ok (`bool`, *optional*): |
| If `True`, the method will not raise an error if the Inference Endpoint is already running. Defaults to |
| `True`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`InferenceEndpoint`]: information about the resumed Inference Endpoint. |
| """ |
| namespace = namespace or self._get_namespace(token=token) |
|
|
| response = get_session().post( |
| f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/resume", |
| headers=self._build_hf_headers(token=token), |
| ) |
| try: |
| hf_raise_for_status(response) |
| except HfHubHTTPError as error: |
| |
| if running_ok and error.response.status_code == 400 and "already running" in error.response.text: |
| return self.get_inference_endpoint(name, namespace=namespace, token=token) |
| |
| raise |
|
|
| return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) |
|
|
| def scale_to_zero_inference_endpoint( |
| self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None |
| ) -> InferenceEndpoint: |
| """Scale Inference Endpoint to zero. |
| |
| An Inference Endpoint scaled to zero will not be charged. It will be resume on the next request to it, with a |
| cold start delay. This is different than pausing the Inference Endpoint with [`pause_inference_endpoint`], which |
| would require a manual resume with [`resume_inference_endpoint`]. |
| |
| For convenience, you can also scale an Inference Endpoint to zero using [`InferenceEndpoint.scale_to_zero`]. |
| |
| Args: |
| name (`str`): |
| The name of the Inference Endpoint to scale to zero. |
| namespace (`str`, *optional*): |
| The namespace in which the Inference Endpoint is located. Defaults to the current user. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`InferenceEndpoint`]: information about the scaled-to-zero Inference Endpoint. |
| """ |
| namespace = namespace or self._get_namespace(token=token) |
|
|
| response = get_session().post( |
| f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}/scale-to-zero", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
|
|
| return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) |
|
|
| def _get_namespace(self, token: Union[bool, str, None] = None) -> str: |
| """Get the default namespace for the current user.""" |
| me = self.whoami(token=token) |
| if me["type"] == "user": |
| return me["name"] |
| else: |
| raise ValueError( |
| "Cannot determine default namespace. You must provide a 'namespace' as input or be logged in as a" |
| " user." |
| ) |
|
|
| |
| |
| |
| @validate_hf_hub_args |
| def list_collections( |
| self, |
| *, |
| owner: Union[list[str], str, None] = None, |
| item: Union[list[str], str, None] = None, |
| sort: Optional[Literal["lastModified", "trending", "upvotes"]] = None, |
| limit: Optional[int] = None, |
| token: Union[bool, str, None] = None, |
| ) -> Iterable[Collection]: |
| """List collections on the Huggingface Hub, given some filters. |
| |
| > [!WARNING] |
| > When listing collections, the item list per collection is truncated to 4 items maximum. To retrieve all items |
| > from a collection, you must use [`get_collection`]. |
| |
| Args: |
| owner (`list[str]` or `str`, *optional*): |
| Filter by owner's username. |
| item (`list[str]` or `str`, *optional*): |
| Filter collections containing a particular items. Example: `"models/teknium/OpenHermes-2.5-Mistral-7B"`, `"datasets/squad"` or `"papers/2311.12983"`. |
| sort (`Literal["lastModified", "trending", "upvotes"]`, *optional*): |
| Sort collections by last modified, trending or upvotes. |
| limit (`int`, *optional*): |
| Maximum number of collections to be returned. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[Collection]`: an iterable of [`Collection`] objects. |
| """ |
| |
| path = f"{self.endpoint}/api/collections" |
| headers = self._build_hf_headers(token=token) |
| params: dict = {} |
| if owner is not None: |
| params.update({"owner": owner}) |
| if item is not None: |
| params.update({"item": item}) |
| if sort is not None: |
| params.update({"sort": sort}) |
| if limit is not None: |
| params.update({"limit": limit}) |
|
|
| |
| items = paginate(path, headers=headers, params=params) |
| if limit is not None: |
| items = islice(items, limit) |
|
|
| |
| for position, collection_data in enumerate(items): |
| yield Collection(position=position, **collection_data) |
|
|
| def get_collection(self, collection_slug: str, *, token: Union[bool, str, None] = None) -> Collection: |
| """Gets information about a Collection on the Hub. |
| |
| Args: |
| collection_slug (`str`): |
| Slug of the collection of the Hub. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: [`Collection`] |
| |
| Example: |
| |
| ```py |
| >>> from huggingface_hub import get_collection |
| >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026") |
| >>> collection.title |
| 'Recent models' |
| >>> len(collection.items) |
| 37 |
| >>> collection.items[0] |
| CollectionItem( |
| item_object_id='651446103cd773a050bf64c2', |
| item_id='TheBloke/U-Amethyst-20B-AWQ', |
| item_type='model', |
| position=88, |
| note=None |
| ) |
| ``` |
| """ |
| r = get_session().get( |
| f"{self.endpoint}/api/collections/{collection_slug}", headers=self._build_hf_headers(token=token) |
| ) |
| hf_raise_for_status(r) |
| return Collection(**{**r.json(), "endpoint": self.endpoint}) |
|
|
| def create_collection( |
| self, |
| title: str, |
| *, |
| namespace: Optional[str] = None, |
| description: Optional[str] = None, |
| private: bool = False, |
| exists_ok: bool = False, |
| token: Union[bool, str, None] = None, |
| ) -> Collection: |
| """Create a new Collection on the Hub. |
| |
| Args: |
| title (`str`): |
| Title of the collection to create. Example: `"Recent models"`. |
| namespace (`str`, *optional*): |
| Namespace of the collection to create (username or org). Will default to the owner name. |
| description (`str`, *optional*): |
| Description of the collection to create. |
| private (`bool`, *optional*): |
| Whether the collection should be private or not. Defaults to `False` (i.e. public collection). |
| exists_ok (`bool`, *optional*): |
| If `True`, do not raise an error if collection already exists. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: [`Collection`] |
| |
| Example: |
| |
| ```py |
| >>> from huggingface_hub import create_collection |
| >>> collection = create_collection( |
| ... title="ICCV 2023", |
| ... description="Portfolio of models, papers and demos I presented at ICCV 2023", |
| ... ) |
| >>> collection.slug |
| "username/iccv-2023-64f9a55bb3115b4f513ec026" |
| ``` |
| """ |
| if namespace is None: |
| namespace = self.whoami(token)["name"] |
|
|
| payload = { |
| "title": title, |
| "namespace": namespace, |
| "private": private, |
| } |
| if description is not None: |
| payload["description"] = description |
|
|
| r = get_session().post( |
| f"{self.endpoint}/api/collections", headers=self._build_hf_headers(token=token), json=payload |
| ) |
| try: |
| hf_raise_for_status(r) |
| except HfHubHTTPError as err: |
| if exists_ok and err.response.status_code == 409: |
| |
| slug = r.json()["slug"] |
| return self.get_collection(slug, token=token) |
| else: |
| raise |
| return Collection(**{**r.json(), "endpoint": self.endpoint}) |
|
|
| def update_collection_metadata( |
| self, |
| collection_slug: str, |
| *, |
| title: Optional[str] = None, |
| description: Optional[str] = None, |
| position: Optional[int] = None, |
| private: Optional[bool] = None, |
| theme: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> Collection: |
| """Update metadata of a collection on the Hub. |
| |
| All arguments are optional. Only provided metadata will be updated. |
| |
| Args: |
| collection_slug (`str`): |
| Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. |
| title (`str`): |
| Title of the collection to update. |
| description (`str`, *optional*): |
| Description of the collection to update. |
| position (`int`, *optional*): |
| New position of the collection in the list of collections of the user. |
| private (`bool`, *optional*): |
| Whether the collection should be private or not. |
| theme (`str`, *optional*): |
| Theme of the collection on the Hub. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: [`Collection`] |
| |
| Example: |
| |
| ```py |
| >>> from huggingface_hub import update_collection_metadata |
| >>> collection = update_collection_metadata( |
| ... collection_slug="username/iccv-2023-64f9a55bb3115b4f513ec026", |
| ... title="ICCV Oct. 2023" |
| ... description="Portfolio of models, datasets, papers and demos I presented at ICCV Oct. 2023", |
| ... private=False, |
| ... theme="pink", |
| ... ) |
| >>> collection.slug |
| "username/iccv-oct-2023-64f9a55bb3115b4f513ec026" |
| # ^collection slug got updated but not the trailing ID |
| ``` |
| """ |
| payload = { |
| "position": position, |
| "private": private, |
| "theme": theme, |
| "title": title, |
| "description": description, |
| } |
| r = get_session().patch( |
| f"{self.endpoint}/api/collections/{collection_slug}", |
| headers=self._build_hf_headers(token=token), |
| |
| json={key: value for key, value in payload.items() if value is not None}, |
| ) |
| hf_raise_for_status(r) |
| return Collection(**{**r.json()["data"], "endpoint": self.endpoint}) |
|
|
| def delete_collection( |
| self, collection_slug: str, *, missing_ok: bool = False, token: Union[bool, str, None] = None |
| ) -> None: |
| """Delete a collection on the Hub. |
| |
| Args: |
| collection_slug (`str`): |
| Slug of the collection to delete. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. |
| missing_ok (`bool`, *optional*): |
| If `True`, do not raise an error if collection doesn't exists. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Example: |
| |
| ```py |
| >>> from huggingface_hub import delete_collection |
| >>> collection = delete_collection("username/useless-collection-64f9a55bb3115b4f513ec026", missing_ok=True) |
| ``` |
| |
| > [!WARNING] |
| > This is a non-revertible action. A deleted collection cannot be restored. |
| """ |
| r = get_session().delete( |
| f"{self.endpoint}/api/collections/{collection_slug}", headers=self._build_hf_headers(token=token) |
| ) |
| try: |
| hf_raise_for_status(r) |
| except HfHubHTTPError as err: |
| if missing_ok and err.response.status_code == 404: |
| |
| return |
| else: |
| raise |
|
|
| def add_collection_item( |
| self, |
| collection_slug: str, |
| item_id: str, |
| item_type: CollectionItemType_T, |
| *, |
| note: Optional[str] = None, |
| exists_ok: bool = False, |
| token: Union[bool, str, None] = None, |
| ) -> Collection: |
| """Add an item to a collection on the Hub. |
| |
| Args: |
| collection_slug (`str`): |
| Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. |
| item_id (`str`): |
| Id of the item to add to the collection. Use the repo_id for repos/spaces/datasets, |
| the paper id for papers, or the slug of another collection (e.g. `"moonshotai/kimi-k2"`). |
| item_type (`str`): |
| Type of the item to add. Can be one of `"model"`, `"dataset"`, `"space"`, `"paper"` or `"collection"`. |
| note (`str`, *optional*): |
| A note to attach to the item in the collection. The maximum size for a note is 500 characters. |
| exists_ok (`bool`, *optional*): |
| If `True`, do not raise an error if item already exists. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: [`Collection`] |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` |
| or `admin` role in the organization the repo belongs to or if you passed a `read` token. |
| [`HfHubHTTPError`]: |
| HTTP 404 if the item you try to add to the collection does not exist on the Hub. |
| [`HfHubHTTPError`]: |
| HTTP 409 if the item you try to add to the collection is already in the collection (and exists_ok=False) |
| |
| Example: |
| |
| ```py |
| >>> from huggingface_hub import add_collection_item |
| >>> collection = add_collection_item( |
| ... collection_slug="davanstrien/climate-64f99dc2a5067f6b65531bab", |
| ... item_id="pierre-loic/climate-news-articles", |
| ... item_type="dataset" |
| ... ) |
| >>> collection.items[-1].item_id |
| "pierre-loic/climate-news-articles" |
| # ^item got added to the collection on last position |
| |
| # Add item with a note |
| >>> add_collection_item( |
| ... collection_slug="davanstrien/climate-64f99dc2a5067f6b65531bab", |
| ... item_id="datasets/climate_fever", |
| ... item_type="dataset" |
| ... note="This dataset adopts the FEVER methodology that consists of 1,535 real-world claims regarding climate-change collected on the internet." |
| ... ) |
| (...) |
| ``` |
| """ |
| payload: dict[str, Any] = {"item": {"id": item_id, "type": item_type}} |
| if note is not None: |
| payload["note"] = note |
| r = get_session().post( |
| f"{self.endpoint}/api/collections/{collection_slug}/items", |
| headers=self._build_hf_headers(token=token), |
| json=payload, |
| ) |
| try: |
| hf_raise_for_status(r) |
| except HfHubHTTPError as err: |
| if exists_ok and err.response.status_code == 409: |
| |
| return self.get_collection(collection_slug, token=token) |
| else: |
| raise |
| return Collection(**{**r.json(), "endpoint": self.endpoint}) |
|
|
| def update_collection_item( |
| self, |
| collection_slug: str, |
| item_object_id: str, |
| *, |
| note: Optional[str] = None, |
| position: Optional[int] = None, |
| token: Union[bool, str, None] = None, |
| ) -> None: |
| """Update an item in a collection. |
| |
| Args: |
| collection_slug (`str`): |
| Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. |
| item_object_id (`str`): |
| ID of the item in the collection. This is not the id of the item on the Hub (repo_id or paper id). |
| It must be retrieved from a [`CollectionItem`] object. Example: `collection.items[0].item_object_id`. |
| note (`str`, *optional*): |
| A note to attach to the item in the collection. The maximum size for a note is 500 characters. |
| position (`int`, *optional*): |
| New position of the item in the collection. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Example: |
| |
| ```py |
| >>> from huggingface_hub import get_collection, update_collection_item |
| |
| # Get collection first |
| >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026") |
| |
| # Update item based on its ID (add note + update position) |
| >>> update_collection_item( |
| ... collection_slug="TheBloke/recent-models-64f9a55bb3115b4f513ec026", |
| ... item_object_id=collection.items[-1].item_object_id, |
| ... note="Newly updated model!" |
| ... position=0, |
| ... ) |
| ``` |
| """ |
| payload = {"position": position, "note": note} |
| r = get_session().patch( |
| f"{self.endpoint}/api/collections/{collection_slug}/items/{item_object_id}", |
| headers=self._build_hf_headers(token=token), |
| |
| json={key: value for key, value in payload.items() if value is not None}, |
| ) |
| hf_raise_for_status(r) |
|
|
| def delete_collection_item( |
| self, |
| collection_slug: str, |
| item_object_id: str, |
| *, |
| missing_ok: bool = False, |
| token: Union[bool, str, None] = None, |
| ) -> None: |
| """Delete an item from a collection. |
| |
| Args: |
| collection_slug (`str`): |
| Slug of the collection to update. Example: `"TheBloke/recent-models-64f9a55bb3115b4f513ec026"`. |
| item_object_id (`str`): |
| ID of the item in the collection. This is not the id of the item on the Hub (repo_id or paper id). |
| It must be retrieved from a [`CollectionItem`] object. Example: `collection.items[0].item_object_id`. |
| missing_ok (`bool`, *optional*): |
| If `True`, do not raise an error if item doesn't exists. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Example: |
| |
| ```py |
| >>> from huggingface_hub import get_collection, delete_collection_item |
| |
| # Get collection first |
| >>> collection = get_collection("TheBloke/recent-models-64f9a55bb3115b4f513ec026") |
| |
| # Delete item based on its ID |
| >>> delete_collection_item( |
| ... collection_slug="TheBloke/recent-models-64f9a55bb3115b4f513ec026", |
| ... item_object_id=collection.items[-1].item_object_id, |
| ... ) |
| ``` |
| """ |
| r = get_session().delete( |
| f"{self.endpoint}/api/collections/{collection_slug}/items/{item_object_id}", |
| headers=self._build_hf_headers(token=token), |
| ) |
| try: |
| hf_raise_for_status(r) |
| except HfHubHTTPError as err: |
| if missing_ok and err.response.status_code == 404: |
| |
| return |
| else: |
| raise |
|
|
| |
| |
| |
|
|
| @validate_hf_hub_args |
| def list_pending_access_requests( |
| self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None |
| ) -> Iterable[AccessRequest]: |
| """ |
| Get pending access requests for a given gated repo. |
| |
| A pending request means the user has requested access to the repo but the request has not been processed yet. |
| If the approval mode is automatic, this list should be empty. Pending requests can be accepted or rejected |
| using [`accept_access_request`] and [`reject_access_request`]. |
| |
| For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. |
| |
| Args: |
| repo_id (`str`): |
| The id of the repo to get access requests for. |
| repo_type (`str`, *optional*): |
| The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`. |
| Defaults to `model`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[AccessRequest]`: An iterable of [`AccessRequest`] objects. Each time contains a `username`, `email`, |
| `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will |
| be populated with user's answers. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 400 if the repo is not gated. |
| [`HfHubHTTPError`]: |
| HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` |
| or `admin` role in the organization the repo belongs to or if you passed a `read` token. |
| |
| Example: |
| ```py |
| >>> from huggingface_hub import list_pending_access_requests, accept_access_request |
| |
| # List pending requests |
| >>> requests = list(list_pending_access_requests("meta-llama/Llama-2-7b")) |
| >>> len(requests) |
| 411 |
| >>> requests[0] |
| [ |
| AccessRequest( |
| username='clem', |
| fullname='Clem 🤗', |
| email='***', |
| timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc), |
| status='pending', |
| fields=None, |
| ), |
| ... |
| ] |
| |
| # Accept Clem's request |
| >>> accept_access_request("meta-llama/Llama-2-7b", "clem") |
| ``` |
| """ |
| yield from self._list_access_requests(repo_id, "pending", repo_type=repo_type, token=token) |
|
|
| @validate_hf_hub_args |
| def list_accepted_access_requests( |
| self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None |
| ) -> Iterable[AccessRequest]: |
| """ |
| Get accepted access requests for a given gated repo. |
| |
| An accepted request means the user has requested access to the repo and the request has been accepted. The user |
| can download any file of the repo. If the approval mode is automatic, this list should contains by default all |
| requests. Accepted requests can be cancelled or rejected at any time using [`cancel_access_request`] and |
| [`reject_access_request`]. A cancelled request will go back to the pending list while a rejected request will |
| go to the rejected list. In both cases, the user will lose access to the repo. |
| |
| For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. |
| |
| Args: |
| repo_id (`str`): |
| The id of the repo to get access requests for. |
| repo_type (`str`, *optional*): |
| The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`. |
| Defaults to `model`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[AccessRequest]`: An iterable of [`AccessRequest`] objects. Each time contains a `username`, `email`, |
| `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will |
| be populated with user's answers. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 400 if the repo is not gated. |
| [`HfHubHTTPError`]: |
| HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` |
| or `admin` role in the organization the repo belongs to or if you passed a `read` token. |
| |
| Example: |
| ```py |
| >>> from huggingface_hub import list_accepted_access_requests |
| |
| >>> requests = list(list_accepted_access_requests("meta-llama/Llama-2-7b")) |
| >>> len(requests) |
| 411 |
| >>> requests[0] |
| [ |
| AccessRequest( |
| username='clem', |
| fullname='Clem 🤗', |
| email='***', |
| timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc), |
| status='accepted', |
| fields=None, |
| ), |
| ... |
| ] |
| ``` |
| """ |
| yield from self._list_access_requests(repo_id, "accepted", repo_type=repo_type, token=token) |
|
|
| @validate_hf_hub_args |
| def list_rejected_access_requests( |
| self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None |
| ) -> Iterable[AccessRequest]: |
| """ |
| Get rejected access requests for a given gated repo. |
| |
| A rejected request means the user has requested access to the repo and the request has been explicitly rejected |
| by a repo owner (either you or another user from your organization). The user cannot download any file of the |
| repo. Rejected requests can be accepted or cancelled at any time using [`accept_access_request`] and |
| [`cancel_access_request`]. A cancelled request will go back to the pending list while an accepted request will |
| go to the accepted list. |
| |
| For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. |
| |
| Args: |
| repo_id (`str`): |
| The id of the repo to get access requests for. |
| repo_type (`str`, *optional*): |
| The type of the repo to get access requests for. Must be one of `model`, `dataset` or `space`. |
| Defaults to `model`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[AccessRequest]`: An iterable of [`AccessRequest`] objects. Each time contains a `username`, `email`, |
| `status` and `timestamp` attribute. If the gated repo has a custom form, the `fields` attribute will |
| be populated with user's answers. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 400 if the repo is not gated. |
| [`HfHubHTTPError`]: |
| HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` |
| or `admin` role in the organization the repo belongs to or if you passed a `read` token. |
| |
| Example: |
| ```py |
| >>> from huggingface_hub import list_rejected_access_requests |
| |
| >>> requests = list(list_rejected_access_requests("meta-llama/Llama-2-7b")) |
| >>> len(requests) |
| 411 |
| >>> requests[0] |
| [ |
| AccessRequest( |
| username='clem', |
| fullname='Clem 🤗', |
| email='***', |
| timestamp=datetime.datetime(2023, 11, 23, 18, 4, 53, 828000, tzinfo=datetime.timezone.utc), |
| status='rejected', |
| fields=None, |
| ), |
| ... |
| ] |
| ``` |
| """ |
| yield from self._list_access_requests(repo_id, "rejected", repo_type=repo_type, token=token) |
|
|
| def _list_access_requests( |
| self, |
| repo_id: str, |
| status: Literal["accepted", "rejected", "pending"], |
| repo_type: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> Iterable[AccessRequest]: |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
|
|
| for request in paginate( |
| f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/{status}", |
| params={}, |
| headers=self._build_hf_headers(token=token), |
| ): |
| yield AccessRequest( |
| username=request["user"]["user"], |
| fullname=request["user"]["fullname"], |
| email=request["user"].get("email"), |
| status=request["status"], |
| timestamp=parse_datetime(request["timestamp"]), |
| fields=request.get("fields"), |
| ) |
|
|
| @validate_hf_hub_args |
| def cancel_access_request( |
| self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None |
| ) -> None: |
| """ |
| Cancel an access request from a user for a given gated repo. |
| |
| A cancelled request will go back to the pending list and the user will lose access to the repo. |
| |
| For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. |
| |
| Args: |
| repo_id (`str`): |
| The id of the repo to cancel access request for. |
| user (`str`): |
| The username of the user which access request should be cancelled. |
| repo_type (`str`, *optional*): |
| The type of the repo to cancel access request for. Must be one of `model`, `dataset` or `space`. |
| Defaults to `model`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 400 if the repo is not gated. |
| [`HfHubHTTPError`]: |
| HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` |
| or `admin` role in the organization the repo belongs to or if you passed a `read` token. |
| [`HfHubHTTPError`]: |
| HTTP 404 if the user does not exist on the Hub. |
| [`HfHubHTTPError`]: |
| HTTP 404 if the user access request cannot be found. |
| [`HfHubHTTPError`]: |
| HTTP 404 if the user access request is already in the pending list. |
| """ |
| self._handle_access_request(repo_id, user, "pending", repo_type=repo_type, token=token) |
|
|
| @validate_hf_hub_args |
| def accept_access_request( |
| self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None |
| ) -> None: |
| """ |
| Accept an access request from a user for a given gated repo. |
| |
| Once the request is accepted, the user will be able to download any file of the repo and access the community |
| tab. If the approval mode is automatic, you don't have to accept requests manually. An accepted request can be |
| cancelled or rejected at any time using [`cancel_access_request`] and [`reject_access_request`]. |
| |
| For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. |
| |
| Args: |
| repo_id (`str`): |
| The id of the repo to accept access request for. |
| user (`str`): |
| The username of the user which access request should be accepted. |
| repo_type (`str`, *optional*): |
| The type of the repo to accept access request for. Must be one of `model`, `dataset` or `space`. |
| Defaults to `model`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 400 if the repo is not gated. |
| [`HfHubHTTPError`]: |
| HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` |
| or `admin` role in the organization the repo belongs to or if you passed a `read` token. |
| [`HfHubHTTPError`]: |
| HTTP 404 if the user does not exist on the Hub. |
| [`HfHubHTTPError`]: |
| HTTP 404 if the user access request cannot be found. |
| [`HfHubHTTPError`]: |
| HTTP 404 if the user access request is already in the accepted list. |
| """ |
| self._handle_access_request(repo_id, user, "accepted", repo_type=repo_type, token=token) |
|
|
| @validate_hf_hub_args |
| def reject_access_request( |
| self, |
| repo_id: str, |
| user: str, |
| *, |
| repo_type: Optional[str] = None, |
| rejection_reason: Optional[str], |
| token: Union[bool, str, None] = None, |
| ) -> None: |
| """ |
| Reject an access request from a user for a given gated repo. |
| |
| A rejected request will go to the rejected list. The user cannot download any file of the repo. Rejected |
| requests can be accepted or cancelled at any time using [`accept_access_request`] and [`cancel_access_request`]. |
| A cancelled request will go back to the pending list while an accepted request will go to the accepted list. |
| |
| For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. |
| |
| Args: |
| repo_id (`str`): |
| The id of the repo to reject access request for. |
| user (`str`): |
| The username of the user which access request should be rejected. |
| repo_type (`str`, *optional*): |
| The type of the repo to reject access request for. Must be one of `model`, `dataset` or `space`. |
| Defaults to `model`. |
| rejection_reason (`str`, *optional*): |
| Optional rejection reason that will be visible to the user (max 200 characters). |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 400 if the repo is not gated. |
| [`HfHubHTTPError`]: |
| HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` |
| or `admin` role in the organization the repo belongs to or if you passed a `read` token. |
| [`HfHubHTTPError`]: |
| HTTP 404 if the user does not exist on the Hub. |
| [`HfHubHTTPError`]: |
| HTTP 404 if the user access request cannot be found. |
| [`HfHubHTTPError`]: |
| HTTP 404 if the user access request is already in the rejected list. |
| """ |
| self._handle_access_request( |
| repo_id, user, "rejected", repo_type=repo_type, rejection_reason=rejection_reason, token=token |
| ) |
|
|
| @validate_hf_hub_args |
| def _handle_access_request( |
| self, |
| repo_id: str, |
| user: str, |
| status: Literal["accepted", "rejected", "pending"], |
| repo_type: Optional[str] = None, |
| rejection_reason: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> None: |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
|
|
| payload = {"user": user, "status": status} |
|
|
| if rejection_reason is not None: |
| if status != "rejected": |
| raise ValueError("`rejection_reason` can only be passed when rejecting an access request.") |
| payload["rejectionReason"] = rejection_reason |
|
|
| response = get_session().post( |
| f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/handle", |
| headers=self._build_hf_headers(token=token), |
| json=payload, |
| ) |
| hf_raise_for_status(response) |
|
|
| @validate_hf_hub_args |
| def grant_access( |
| self, repo_id: str, user: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None |
| ) -> None: |
| """ |
| Grant access to a user for a given gated repo. |
| |
| Granting access don't require for the user to send an access request by themselves. The user is automatically |
| added to the accepted list meaning they can download the files You can revoke the granted access at any time |
| using [`cancel_access_request`] or [`reject_access_request`]. |
| |
| For more info about gated repos, see https://huggingface.co/docs/hub/models-gated. |
| |
| Args: |
| repo_id (`str`): |
| The id of the repo to grant access to. |
| user (`str`): |
| The username of the user to grant access. |
| repo_type (`str`, *optional*): |
| The type of the repo to grant access to. Must be one of `model`, `dataset` or `space`. |
| Defaults to `model`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 400 if the repo is not gated. |
| [`HfHubHTTPError`]: |
| HTTP 400 if the user already has access to the repo. |
| [`HfHubHTTPError`]: |
| HTTP 403 if you only have read-only access to the repo. This can be the case if you don't have `write` |
| or `admin` role in the organization the repo belongs to or if you passed a `read` token. |
| [`HfHubHTTPError`]: |
| HTTP 404 if the user does not exist on the Hub. |
| """ |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
|
|
| response = get_session().post( |
| f"{constants.ENDPOINT}/api/{repo_type}s/{repo_id}/user-access-request/grant", |
| headers=self._build_hf_headers(token=token), |
| json={"user": user}, |
| ) |
| hf_raise_for_status(response) |
| return response.json() |
|
|
| |
| |
| |
|
|
| @validate_hf_hub_args |
| def get_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> WebhookInfo: |
| """Get a webhook by its id. |
| |
| Args: |
| webhook_id (`str`): |
| The unique identifier of the webhook to get. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved token, which is the recommended |
| method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`WebhookInfo`]: |
| Info about the webhook. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import get_webhook |
| >>> webhook = get_webhook("654bbbc16f2ec14d77f109cc") |
| >>> print(webhook) |
| WebhookInfo( |
| id="654bbbc16f2ec14d77f109cc", |
| job=None, |
| watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], |
| url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", |
| secret="my-secret", |
| domains=["repo", "discussion"], |
| disabled=False, |
| ) |
| ``` |
| """ |
| response = get_session().get( |
| f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
| webhook_data = response.json()["webhook"] |
|
|
| watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] |
|
|
| webhook = WebhookInfo( |
| id=webhook_data["id"], |
| url=webhook_data.get("url"), |
| job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None, |
| watched=watched_items, |
| domains=webhook_data["domains"], |
| secret=webhook_data.get("secret"), |
| disabled=webhook_data["disabled"], |
| ) |
|
|
| return webhook |
|
|
| @validate_hf_hub_args |
| def list_webhooks(self, *, token: Union[bool, str, None] = None) -> list[WebhookInfo]: |
| """List all configured webhooks. |
| |
| Args: |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved token, which is the recommended |
| method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `list[WebhookInfo]`: |
| List of webhook info objects. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import list_webhooks |
| >>> webhooks = list_webhooks() |
| >>> len(webhooks) |
| 2 |
| >>> webhooks[0] |
| WebhookInfo( |
| id="654bbbc16f2ec14d77f109cc", |
| watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], |
| url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", |
| secret="my-secret", |
| domains=["repo", "discussion"], |
| disabled=False, |
| ) |
| ``` |
| """ |
| response = get_session().get( |
| f"{constants.ENDPOINT}/api/settings/webhooks", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
| webhooks_data = response.json() |
|
|
| return [ |
| WebhookInfo( |
| id=webhook["id"], |
| url=webhook.get("url"), |
| job=JobSpec(**webhook["job"]) if webhook.get("job") else None, |
| watched=[WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook["watched"]], |
| domains=webhook["domains"], |
| secret=webhook.get("secret"), |
| disabled=webhook["disabled"], |
| ) |
| for webhook in webhooks_data |
| ] |
|
|
| @validate_hf_hub_args |
| def create_webhook( |
| self, |
| *, |
| url: Optional[str] = None, |
| job_id: Optional[str] = None, |
| watched: list[Union[dict, WebhookWatchedItem]], |
| domains: Optional[list[constants.WEBHOOK_DOMAIN_T]] = None, |
| secret: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> WebhookInfo: |
| """Create a new webhook. |
| |
| The webhook can either send a payload to a URL, or trigger a Job to run on Hugging Face infrastructure. |
| This function should be called with one of `url` or `job_id`, but not both. |
| |
| Args: |
| url (`str`): |
| URL to send the payload to. |
| job_id (`str`): |
| ID of the source Job to trigger with the webhook payload in the environment variable WEBHOOK_PAYLOAD. |
| Additional environment variables are available for convenience: WEBHOOK_REPO_ID, WEBHOOK_REPO_TYPE and WEBHOOK_SECRET. |
| watched (`list[WebhookWatchedItem]`): |
| List of [`WebhookWatchedItem`] to be watched by the webhook. It can be users, orgs, models, datasets or spaces. |
| Watched items can also be provided as plain dictionaries. |
| domains (`list[Literal["repo", "discussion"]]`, optional): |
| List of domains to watch. It can be "repo", "discussion" or both. |
| secret (`str`, optional): |
| A secret to sign the payload with. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved token, which is the recommended |
| method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`WebhookInfo`]: |
| Info about the newly created webhook. |
| |
| Example: |
| |
| Create a webhook that sends a payload to a URL |
| ```python |
| >>> from huggingface_hub import create_webhook |
| >>> payload = create_webhook( |
| ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}], |
| ... url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", |
| ... domains=["repo", "discussion"], |
| ... secret="my-secret", |
| ... ) |
| >>> print(payload) |
| WebhookInfo( |
| id="654bbbc16f2ec14d77f109cc", |
| url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", |
| job=None, |
| watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], |
| domains=["repo", "discussion"], |
| secret="my-secret", |
| disabled=False, |
| ) |
| ``` |
| |
| Run a Job and then create a webhook that triggers this Job |
| ```python |
| >>> from huggingface_hub import create_webhook, run_job |
| >>> job = run_job( |
| ... image="ubuntu", |
| ... command=["bash", "-c", r"echo An event occured in $WEBHOOK_REPO_ID: $WEBHOOK_PAYLOAD"], |
| ... ) |
| >>> payload = create_webhook( |
| ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}], |
| ... job_id=job.id, |
| ... domains=["repo", "discussion"], |
| ... secret="my-secret", |
| ... ) |
| >>> print(payload) |
| WebhookInfo( |
| id="654bbbc16f2ec14d77f109cc", |
| url=None, |
| job=JobSpec( |
| docker_image='ubuntu', |
| space_id=None, |
| command=['bash', '-c', 'echo An event occured in $WEBHOOK_REPO_ID: $WEBHOOK_PAYLOAD'], |
| arguments=[], |
| environment={}, |
| secrets=[], |
| flavor='cpu-basic', |
| timeout=None, |
| tags=None, |
| arch=None |
| ), |
| watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], |
| domains=["repo", "discussion"], |
| secret="my-secret", |
| disabled=False, |
| ) |
| ``` |
| """ |
| watched_dicts = [asdict(item) if isinstance(item, WebhookWatchedItem) else item for item in watched] |
|
|
| post_webhooks_json = {"watched": watched_dicts, "domains": domains, "secret": secret} |
| if url is not None and job_id is not None: |
| raise ValueError("Set `url` or `job_id` but not both.") |
| elif url is not None: |
| post_webhooks_json["url"] = url |
| elif job_id is not None: |
| post_webhooks_json["jobSourceId"] = job_id |
| else: |
| raise ValueError("Missing argument for webhook: `url` or `job_id`.") |
|
|
| response = get_session().post( |
| f"{constants.ENDPOINT}/api/settings/webhooks", |
| json=post_webhooks_json, |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
| webhook_data = response.json()["webhook"] |
| watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] |
|
|
| webhook = WebhookInfo( |
| id=webhook_data["id"], |
| url=webhook_data.get("url"), |
| job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None, |
| watched=watched_items, |
| domains=webhook_data["domains"], |
| secret=webhook_data.get("secret"), |
| disabled=webhook_data["disabled"], |
| ) |
|
|
| return webhook |
|
|
| @validate_hf_hub_args |
| def update_webhook( |
| self, |
| webhook_id: str, |
| *, |
| url: Optional[str] = None, |
| watched: Optional[list[Union[dict, WebhookWatchedItem]]] = None, |
| domains: Optional[list[constants.WEBHOOK_DOMAIN_T]] = None, |
| secret: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> WebhookInfo: |
| """Update an existing webhook. |
| |
| Args: |
| webhook_id (`str`): |
| The unique identifier of the webhook to be updated. |
| url (`str`, optional): |
| The URL to which the payload will be sent. |
| watched (`list[WebhookWatchedItem]`, optional): |
| List of items to watch. It can be users, orgs, models, datasets, or spaces. |
| Refer to [`WebhookWatchedItem`] for more details. Watched items can also be provided as plain dictionaries. |
| domains (`list[Literal["repo", "discussion"]]`, optional): |
| The domains to watch. This can include "repo", "discussion", or both. |
| secret (`str`, optional): |
| A secret to sign the payload with, providing an additional layer of security. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved token, which is the recommended |
| method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`WebhookInfo`]: |
| Info about the updated webhook. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import update_webhook |
| >>> updated_payload = update_webhook( |
| ... webhook_id="654bbbc16f2ec14d77f109cc", |
| ... url="https://new.webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", |
| ... watched=[{"type": "user", "name": "julien-c"}, {"type": "org", "name": "HuggingFaceH4"}], |
| ... domains=["repo"], |
| ... secret="my-secret", |
| ... ) |
| >>> print(updated_payload) |
| WebhookInfo( |
| id="654bbbc16f2ec14d77f109cc", |
| job=None, |
| url="https://new.webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", |
| watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], |
| domains=["repo"], |
| secret="my-secret", |
| disabled=False, |
| ``` |
| """ |
| if watched is None: |
| watched = [] |
| watched_dicts = [asdict(item) if isinstance(item, WebhookWatchedItem) else item for item in watched] |
|
|
| response = get_session().post( |
| f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}", |
| json={"watched": watched_dicts, "url": url, "domains": domains, "secret": secret}, |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
| webhook_data = response.json()["webhook"] |
|
|
| watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] |
|
|
| webhook = WebhookInfo( |
| id=webhook_data["id"], |
| url=webhook_data.get("url"), |
| job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None, |
| watched=watched_items, |
| domains=webhook_data["domains"], |
| secret=webhook_data.get("secret"), |
| disabled=webhook_data["disabled"], |
| ) |
|
|
| return webhook |
|
|
| @validate_hf_hub_args |
| def enable_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> WebhookInfo: |
| """Enable a webhook (makes it "active"). |
| |
| Args: |
| webhook_id (`str`): |
| The unique identifier of the webhook to enable. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved token, which is the recommended |
| method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`WebhookInfo`]: |
| Info about the enabled webhook. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import enable_webhook |
| >>> enabled_webhook = enable_webhook("654bbbc16f2ec14d77f109cc") |
| >>> enabled_webhook |
| WebhookInfo( |
| id="654bbbc16f2ec14d77f109cc", |
| job=None, |
| url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", |
| watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], |
| domains=["repo", "discussion"], |
| secret="my-secret", |
| disabled=False, |
| ) |
| ``` |
| """ |
| response = get_session().post( |
| f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}/enable", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
| webhook_data = response.json()["webhook"] |
|
|
| watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] |
|
|
| webhook = WebhookInfo( |
| id=webhook_data["id"], |
| url=webhook_data.get("url"), |
| job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None, |
| watched=watched_items, |
| domains=webhook_data["domains"], |
| secret=webhook_data.get("secret"), |
| disabled=webhook_data["disabled"], |
| ) |
|
|
| return webhook |
|
|
| @validate_hf_hub_args |
| def disable_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> WebhookInfo: |
| """Disable a webhook (makes it "disabled"). |
| |
| Args: |
| webhook_id (`str`): |
| The unique identifier of the webhook to disable. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved token, which is the recommended |
| method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| [`WebhookInfo`]: |
| Info about the disabled webhook. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import disable_webhook |
| >>> disabled_webhook = disable_webhook("654bbbc16f2ec14d77f109cc") |
| >>> disabled_webhook |
| WebhookInfo( |
| id="654bbbc16f2ec14d77f109cc", |
| url="https://webhook.site/a2176e82-5720-43ee-9e06-f91cb4c91548", |
| jon=None, |
| watched=[WebhookWatchedItem(type="user", name="julien-c"), WebhookWatchedItem(type="org", name="HuggingFaceH4")], |
| domains=["repo", "discussion"], |
| secret="my-secret", |
| disabled=True, |
| ) |
| ``` |
| """ |
| response = get_session().post( |
| f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}/disable", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
| webhook_data = response.json()["webhook"] |
|
|
| watched_items = [WebhookWatchedItem(type=item["type"], name=item["name"]) for item in webhook_data["watched"]] |
|
|
| webhook = WebhookInfo( |
| id=webhook_data["id"], |
| url=webhook_data.get("url"), |
| job=JobSpec(**webhook_data["job"]) if webhook_data.get("job") else None, |
| watched=watched_items, |
| domains=webhook_data["domains"], |
| secret=webhook_data.get("secret"), |
| disabled=webhook_data["disabled"], |
| ) |
|
|
| return webhook |
|
|
| @validate_hf_hub_args |
| def delete_webhook(self, webhook_id: str, *, token: Union[bool, str, None] = None) -> None: |
| """Delete a webhook. |
| |
| Args: |
| webhook_id (`str`): |
| The unique identifier of the webhook to delete. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved token, which is the recommended |
| method for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `None` |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import delete_webhook |
| >>> delete_webhook("654bbbc16f2ec14d77f109cc") |
| ``` |
| """ |
| response = get_session().delete( |
| f"{constants.ENDPOINT}/api/settings/webhooks/{webhook_id}", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
|
|
| |
| |
| |
|
|
| def _build_hf_headers( |
| self, |
| token: Union[bool, str, None] = None, |
| library_name: Optional[str] = None, |
| library_version: Optional[str] = None, |
| user_agent: Union[dict, str, None] = None, |
| ) -> dict[str, str]: |
| """ |
| Alias for [`build_hf_headers`] that uses the token from [`HfApi`] client |
| when `token` is not provided. |
| """ |
| if token is None: |
| |
| token = self.token |
| return build_hf_headers( |
| token=token, |
| library_name=library_name or self.library_name, |
| library_version=library_version or self.library_version, |
| user_agent=user_agent or self.user_agent, |
| headers=self.headers, |
| ) |
|
|
| def _prepare_folder_deletions( |
| self, |
| repo_id: str, |
| repo_type: Optional[str], |
| revision: Optional[str], |
| path_in_repo: str, |
| delete_patterns: Optional[Union[list[str], str]], |
| token: Union[bool, str, None] = None, |
| ) -> list[CommitOperationDelete]: |
| """Generate the list of Delete operations for a commit to delete files from a repo. |
| |
| List remote files and match them against the `delete_patterns` constraints. Returns a list of [`CommitOperationDelete`] |
| with the matching items. |
| |
| Note: `.gitattributes` file is essential to make a repo work properly on the Hub. This file will always be |
| kept even if it matches the `delete_patterns` constraints. |
| """ |
| if delete_patterns is None: |
| |
| return [] |
|
|
| |
| filenames = self.list_repo_files(repo_id=repo_id, revision=revision, repo_type=repo_type, token=token) |
|
|
| |
| if path_in_repo and path_in_repo not in (".", "./"): |
| path_in_repo = path_in_repo.strip("/") + "/" |
| relpath_to_abspath = { |
| file[len(path_in_repo) :]: file for file in filenames if file.startswith(path_in_repo) |
| } |
| else: |
| relpath_to_abspath = {file: file for file in filenames} |
|
|
| |
| return [ |
| CommitOperationDelete(path_in_repo=relpath_to_abspath[relpath], is_folder=False) |
| for relpath in filter_repo_objects(relpath_to_abspath.keys(), allow_patterns=delete_patterns) |
| if relpath_to_abspath[relpath] != ".gitattributes" |
| ] |
|
|
| def _prepare_upload_folder_additions( |
| self, |
| folder_path: Union[str, Path], |
| path_in_repo: str, |
| allow_patterns: Optional[Union[list[str], str]] = None, |
| ignore_patterns: Optional[Union[list[str], str]] = None, |
| repo_type: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> list[CommitOperationAdd]: |
| """Generate the list of Add operations for a commit to upload a folder. |
| |
| Files not matching the `allow_patterns` (allowlist) and `ignore_patterns` (denylist) |
| constraints are discarded. |
| """ |
|
|
| folder_path = Path(folder_path).expanduser().resolve() |
| if not folder_path.is_dir(): |
| raise ValueError(f"Provided path: '{folder_path}' is not a directory") |
|
|
| |
| relpath_to_abspath = { |
| path.relative_to(folder_path).as_posix(): path |
| for path in sorted(folder_path.glob("**/*")) |
| if path.is_file() |
| } |
|
|
| |
| |
| filtered_repo_objects = list( |
| filter_repo_objects( |
| relpath_to_abspath.keys(), allow_patterns=allow_patterns, ignore_patterns=ignore_patterns |
| ) |
| ) |
|
|
| prefix = f"{path_in_repo.strip('/')}/" if path_in_repo else "" |
|
|
| |
| |
| if "README.md" in filtered_repo_objects: |
| self._validate_yaml( |
| content=relpath_to_abspath["README.md"].read_text(encoding="utf8"), |
| repo_type=repo_type, |
| token=token, |
| ) |
| if len(filtered_repo_objects) > 30: |
| log = logger.warning if len(filtered_repo_objects) > 200 else logger.info |
| log( |
| "It seems you are trying to upload a large folder at once. This might take some time and then fail if " |
| "the folder is too large. For such cases, it is recommended to upload in smaller batches or to use " |
| "`HfApi().upload_large_folder(...)`/`hf upload-large-folder` instead. For more details, " |
| "check out https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#upload-a-large-folder." |
| ) |
|
|
| logger.info(f"Start hashing {len(filtered_repo_objects)} files.") |
| operations = [ |
| CommitOperationAdd( |
| path_or_fileobj=relpath_to_abspath[relpath], |
| path_in_repo=prefix + relpath, |
| ) |
| for relpath in filtered_repo_objects |
| ] |
| logger.info(f"Finished hashing {len(filtered_repo_objects)} files.") |
| return operations |
|
|
| def _validate_yaml(self, content: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None): |
| """ |
| Validate YAML from `README.md`, used before file hashing and upload. |
| |
| Args: |
| content (`str`): |
| Content of `README.md` to validate. |
| repo_type (`str`, *optional*): |
| The type of the repo to grant access to. Must be one of `model`, `dataset` or `space`. |
| Defaults to `model`. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Raises: |
| - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| if YAML is invalid |
| """ |
| repo_type = repo_type if repo_type is not None else constants.REPO_TYPE_MODEL |
| headers = self._build_hf_headers(token=token) |
|
|
| response = get_session().post( |
| f"{self.endpoint}/api/validate-yaml", |
| json={"content": content, "repoType": repo_type}, |
| headers=headers, |
| ) |
| |
| response_content = response.json() |
| message = "\n".join([f"- {warning.get('message')}" for warning in response_content.get("warnings", [])]) |
| if message: |
| warnings.warn(f"Warnings while validating metadata in README.md:\n{message}") |
|
|
| |
| try: |
| hf_raise_for_status(response) |
| except BadRequestError as e: |
| errors = response_content.get("errors", []) |
| message = "\n".join([f"- {error.get('message')}" for error in errors]) |
| raise ValueError(f"Invalid metadata in README.md.\n{message}") from e |
|
|
| def get_user_overview(self, username: str, token: Union[bool, str, None] = None) -> User: |
| """ |
| Get an overview of a user on the Hub. |
| |
| Args: |
| username (`str`): |
| Username of the user to get an overview of. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `User`: A [`User`] object with the user's overview. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 404 If the user does not exist on the Hub. |
| """ |
| r = get_session().get( |
| f"{constants.ENDPOINT}/api/users/{username}/overview", headers=self._build_hf_headers(token=token) |
| ) |
| hf_raise_for_status(r) |
| return User(**r.json()) |
|
|
| @validate_hf_hub_args |
| def get_organization_overview(self, organization: str, token: Union[bool, str, None] = None) -> Organization: |
| """ |
| Get an overview of an organization on the Hub. |
| |
| Args: |
| organization (`str`): |
| Name of the organization to get an overview of. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved token, which is the recommended method |
| for authentication (see https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Organization`: An [`Organization`] object with the organization's overview. |
| |
| Raises: |
| [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError): |
| HTTP 404 If the organization does not exist on the Hub. |
| """ |
| r = get_session().get( |
| f"{constants.ENDPOINT}/api/organizations/{organization}/overview", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(r) |
| return Organization(**r.json()) |
|
|
| @validate_hf_hub_args |
| def list_organization_followers(self, organization: str, token: Union[bool, str, None] = None) -> Iterable[User]: |
| """ |
| List followers of an organization on the Hub. |
| |
| Args: |
| organization (`str`): |
| Name of the organization to get the followers of. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[User]`: A list of [`User`] objects with the followers of the organization. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 404 If the organization does not exist on the Hub. |
| |
| """ |
| for follower in paginate( |
| path=f"{constants.ENDPOINT}/api/organizations/{organization}/followers", |
| params={}, |
| headers=self._build_hf_headers(token=token), |
| ): |
| yield User(**follower) |
|
|
| def list_organization_members(self, organization: str, token: Union[bool, str, None] = None) -> Iterable[User]: |
| """ |
| List of members of an organization on the Hub. |
| |
| Args: |
| organization (`str`): |
| Name of the organization to get the members of. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[User]`: A list of [`User`] objects with the members of the organization. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 404 If the organization does not exist on the Hub. |
| |
| """ |
| for member in paginate( |
| path=f"{constants.ENDPOINT}/api/organizations/{organization}/members", |
| params={}, |
| headers=self._build_hf_headers(token=token), |
| ): |
| yield User(**member) |
|
|
| def list_user_followers(self, username: str, token: Union[bool, str, None] = None) -> Iterable[User]: |
| """ |
| Get the list of followers of a user on the Hub. |
| |
| Args: |
| username (`str`): |
| Username of the user to get the followers of. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[User]`: A list of [`User`] objects with the followers of the user. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 404 If the user does not exist on the Hub. |
| |
| """ |
| for follower in paginate( |
| path=f"{constants.ENDPOINT}/api/users/{username}/followers", |
| params={}, |
| headers=self._build_hf_headers(token=token), |
| ): |
| yield User(**follower) |
|
|
| def list_user_following(self, username: str, token: Union[bool, str, None] = None) -> Iterable[User]: |
| """ |
| Get the list of users followed by a user on the Hub. |
| |
| Args: |
| username (`str`): |
| Username of the user to get the users followed by. |
| token (`bool` or `str`, *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[User]`: A list of [`User`] objects with the users followed by the user. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 404 If the user does not exist on the Hub. |
| |
| """ |
| for followed_user in paginate( |
| path=f"{constants.ENDPOINT}/api/users/{username}/following", |
| params={}, |
| headers=self._build_hf_headers(token=token), |
| ): |
| yield User(**followed_user) |
|
|
| def list_papers( |
| self, |
| *, |
| query: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> Iterable[PaperInfo]: |
| """ |
| List daily papers on the Hugging Face Hub given a search query. |
| |
| Args: |
| query (`str`, *optional*): |
| A search query string to find papers. |
| If provided, returns papers that match the query. |
| token (Union[bool, str, None], *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token, which is the recommended method for authentication (see |
| https://huggingface.co/docs/huggingface_hub/quick-start#authentication). |
| To disable authentication, pass `False`. |
| |
| Returns: |
| `Iterable[PaperInfo]`: an iterable of [`huggingface_hub.hf_api.PaperInfo`] objects. |
| |
| Example: |
| |
| ```python |
| >>> from huggingface_hub import HfApi |
| |
| >>> api = HfApi() |
| |
| # List all papers with "attention" in their title |
| >>> api.list_papers(query="attention") |
| ``` |
| """ |
| path = f"{self.endpoint}/api/papers/search" |
| params = {} |
| if query: |
| params["q"] = query |
| r = get_session().get( |
| path, |
| params=params, |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(r) |
| for paper in r.json(): |
| yield PaperInfo(**paper) |
|
|
| def paper_info(self, id: str) -> PaperInfo: |
| """ |
| Get information for a paper on the Hub. |
| |
| Args: |
| id (`str`, **optional**): |
| ArXiv id of the paper. |
| |
| Returns: |
| `PaperInfo`: A `PaperInfo` object. |
| |
| Raises: |
| [`HfHubHTTPError`]: |
| HTTP 404 If the paper does not exist on the Hub. |
| """ |
| path = f"{self.endpoint}/api/papers/{id}" |
| r = get_session().get(path) |
| hf_raise_for_status(r) |
| return PaperInfo(**r.json()) |
|
|
| def list_daily_papers( |
| self, |
| *, |
| date: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| week: Optional[str] = None, |
| month: Optional[str] = None, |
| submitter: Optional[str] = None, |
| sort: Optional[Literal["publishedAt", "trending"]] = None, |
| p: Optional[int] = None, |
| limit: Optional[int] = None, |
| ) -> Iterable[PaperInfo]: |
| """ |
| List the daily papers published on a given date on the Hugging Face Hub. |
| |
| Args: |
| date (`str`, *optional*): |
| Date in ISO format (YYYY-MM-DD) for which to fetch daily papers. |
| Defaults to most recent ones. |
| token (Union[bool, str, None], *optional*): |
| A valid user access token (string). Defaults to the locally saved |
| token. To disable authentication, pass `False`. |
| week (`str`, *optional*): |
| Week in ISO format (YYYY-Www) for which to fetch daily papers. Example, `2025-W09`. |
| month (`str`, *optional*): |
| Month in ISO format (YYYY-MM) for which to fetch daily papers. Example, `2025-02`. |
| submitter (`str`, *optional*): |
| Username of the submitter to filter daily papers. |
| sort (`Literal["publishedAt", "trending"]`, *optional*): |
| Sort order for the daily papers. Can be either by `publishedAt` or by `trending`. |
| Defaults to `"publishedAt"` |
| p (`int`, *optional*): |
| Page number for pagination. Defaults to 0. |
| limit (`int`, *optional*): |
| Limit of papers to fetch. Defaults to 50. |
| |
| Returns: |
| `Iterable[PaperInfo]`: an iterable of [`huggingface_hub.hf_api.PaperInfo`] objects. |
| |
| Example: |
| |
| ```python |
| >>> from huggingface_hub import HfApi |
| |
| >>> api = HfApi() |
| >>> list(api.list_daily_papers(date="2025-10-29")) |
| ``` |
| """ |
| path = f"{self.endpoint}/api/daily_papers" |
|
|
| params = { |
| k: v |
| for k, v in { |
| "p": p, |
| "limit": limit, |
| "sort": sort, |
| "date": date, |
| "week": week, |
| "month": month, |
| "submitter": submitter, |
| }.items() |
| if v is not None |
| } |
|
|
| r = get_session().get(path, params=params, headers=self._build_hf_headers(token=token)) |
| hf_raise_for_status(r) |
| for paper in r.json(): |
| yield PaperInfo(**paper) |
|
|
| def auth_check( |
| self, repo_id: str, *, repo_type: Optional[str] = None, token: Union[bool, str, None] = None |
| ) -> None: |
| """ |
| Check if the provided user token has access to a specific repository on the Hugging Face Hub. |
| |
| This method verifies whether the user, authenticated via the provided token, has access to the specified |
| repository. If the repository is not found or if the user lacks the required permissions to access it, |
| the method raises an appropriate exception. |
| |
| Args: |
| repo_id (`str`): |
| The repository to check for access. Format should be `"user/repo_name"`. |
| Example: `"user/my-cool-model"`. |
| |
| repo_type (`str`, *optional*): |
| The type of the repository. Should be one of `"model"`, `"dataset"`, or `"space"`. |
| If not specified, the default is `"model"`. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| |
| Raises: |
| [`~utils.RepositoryNotFoundError`]: |
| Raised if the repository does not exist, is private, or the user does not have access. This can |
| occur if the `repo_id` or `repo_type` is incorrect or if the repository is private but the user |
| is not authenticated. |
| |
| [`~utils.GatedRepoError`]: |
| Raised if the repository exists but is gated and the user is not authorized to access it. |
| |
| Example: |
| Check if the user has access to a repository: |
| |
| ```python |
| >>> from huggingface_hub import auth_check |
| >>> from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError |
| |
| try: |
| auth_check("user/my-cool-model") |
| except GatedRepoError: |
| # Handle gated repository error |
| print("You do not have permission to access this gated repository.") |
| except RepositoryNotFoundError: |
| # Handle repository not found error |
| print("The repository was not found or you do not have access.") |
| ``` |
| |
| In this example: |
| - If the user has access, the method completes successfully. |
| - If the repository is gated or does not exist, appropriate exceptions are raised, allowing the user |
| to handle them accordingly. |
| """ |
| headers = self._build_hf_headers(token=token) |
| if repo_type is None: |
| repo_type = constants.REPO_TYPE_MODEL |
| if repo_type not in constants.REPO_TYPES: |
| raise ValueError(f"Invalid repo type, must be one of {constants.REPO_TYPES}") |
| path = f"{self.endpoint}/api/{repo_type}s/{repo_id}/auth-check" |
| r = get_session().get(path, headers=headers) |
| hf_raise_for_status(r) |
|
|
| def run_job( |
| self, |
| *, |
| image: str, |
| command: list[str], |
| env: Optional[dict[str, Any]] = None, |
| secrets: Optional[dict[str, Any]] = None, |
| flavor: Optional[SpaceHardware] = None, |
| timeout: Optional[Union[int, float, str]] = None, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> JobInfo: |
| """ |
| Run compute Jobs on Hugging Face infrastructure. |
| |
| Args: |
| image (`str`): |
| The Docker image to use. |
| Examples: `"ubuntu"`, `"python:3.12"`, `"pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"`. |
| Example with an image from a Space: `"hf.co/spaces/lhoestq/duckdb"`. |
| |
| command (`list[str]`): |
| The command to run. Example: `["echo", "hello"]`. |
| |
| env (`dict[str, Any]`, *optional*): |
| Defines the environment variables for the Job. |
| |
| secrets (`dict[str, Any]`, *optional*): |
| Defines the secret environment variables for the Job. |
| |
| flavor (`str`, *optional*): |
| Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. |
| Defaults to `"cpu-basic"`. |
| |
| timeout (`Union[int, float, str]`, *optional*): |
| Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). |
| Example: `300` or `"5m"` for 5 minutes. |
| |
| namespace (`str`, *optional*): |
| The namespace where the Job will be created. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| |
| Example: |
| Run your first Job: |
| |
| ```python |
| >>> from huggingface_hub import run_job |
| >>> run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"]) |
| ``` |
| |
| Run a GPU Job: |
| |
| ```python |
| >>> from huggingface_hub import run_job |
| >>> image = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel" |
| >>> command = ["python", "-c", "import torch; print(f"This code ran with the following GPU: {torch.cuda.get_device_name()}")"] |
| >>> run_job(image=image, command=command, flavor="a10g-small") |
| ``` |
| |
| """ |
| if namespace is None: |
| namespace = self.whoami(token=token)["name"] |
| job_spec = _create_job_spec( |
| image=image, |
| command=command, |
| env=env, |
| secrets=secrets, |
| flavor=flavor, |
| timeout=timeout, |
| ) |
| response = get_session().post( |
| f"https://huggingface.co/api/jobs/{namespace}", |
| json=job_spec, |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
| job_info = response.json() |
| return JobInfo(**job_info, endpoint=self.endpoint) |
|
|
| def fetch_job_logs( |
| self, |
| *, |
| job_id: str, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> Iterable[str]: |
| """ |
| Fetch all the logs from a compute Job on Hugging Face infrastructure. |
| |
| Args: |
| job_id (`str`): |
| ID of the Job. |
| |
| namespace (`str`, *optional*): |
| The namespace where the Job is running. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| |
| Example: |
| |
| ```python |
| >>> from huggingface_hub import fetch_job_logs, run_job |
| >>> job = run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"]) |
| >>> for log in fetch_job_logs(job.id): |
| ... print(log) |
| Hello from HF compute! |
| ``` |
| """ |
| if namespace is None: |
| namespace = self.whoami(token=token)["name"] |
| logging_finished = logging_started = False |
| job_finished = False |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| max_retries = 5 |
| min_wait_time = 1 |
| max_wait_time = 10 |
| sleep_time = 0 |
| for _ in range(max_retries): |
| time.sleep(sleep_time) |
| sleep_time = min(max_wait_time, max(min_wait_time, sleep_time * 2)) |
| try: |
| with get_session().stream( |
| "GET", |
| f"https://huggingface.co/api/jobs/{namespace}/{job_id}/logs", |
| headers=self._build_hf_headers(token=token), |
| timeout=120, |
| ) as response: |
| log = None |
| for line in response.iter_lines(): |
| if line and line.startswith("data: {"): |
| data = json.loads(line[len("data: ") :]) |
| |
| if not data["data"].startswith("===== Job started"): |
| logging_started = True |
| log = data["data"] |
| yield log |
| logging_finished = logging_started |
| except httpx.DecodingError: |
| |
| break |
| except KeyboardInterrupt: |
| break |
| except httpx.NetworkError as err: |
| is_timeout = err.__context__ and isinstance(getattr(err.__context__, "__cause__", None), TimeoutError) |
| if logging_started or not is_timeout: |
| raise |
| if logging_finished or job_finished: |
| break |
| job_status = ( |
| get_session() |
| .get( |
| f"https://huggingface.co/api/jobs/{namespace}/{job_id}", |
| headers=self._build_hf_headers(token=token), |
| ) |
| .json() |
| ) |
| if "status" in job_status and job_status["status"]["stage"] not in ("RUNNING", "UPDATING"): |
| job_finished = True |
|
|
| def list_jobs( |
| self, |
| *, |
| timeout: Optional[int] = None, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> list[JobInfo]: |
| """ |
| List compute Jobs on Hugging Face infrastructure. |
| |
| Args: |
| timeout (`float`, *optional*): |
| Whether to set a timeout for the request to the Hub. |
| |
| namespace (`str`, *optional*): |
| The namespace from where it lists the jobs. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| """ |
| if namespace is None: |
| namespace = whoami(token=token)["name"] |
| response = get_session().get( |
| f"{self.endpoint}/api/jobs/{namespace}", |
| headers=self._build_hf_headers(token=token), |
| timeout=timeout, |
| ) |
| response.raise_for_status() |
| return [JobInfo(**job_info, endpoint=self.endpoint) for job_info in response.json()] |
|
|
| def inspect_job( |
| self, |
| *, |
| job_id: str, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> JobInfo: |
| """ |
| Inspect a compute Job on Hugging Face infrastructure. |
| |
| Args: |
| job_id (`str`): |
| ID of the Job. |
| |
| namespace (`str`, *optional*): |
| The namespace where the Job is running. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| |
| Example: |
| |
| ```python |
| >>> from huggingface_hub import inspect_job, run_job |
| >>> job = run_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"]) |
| >>> inspect_job(job.id) |
| JobInfo( |
| id='68780d00bbe36d38803f645f', |
| created_at=datetime.datetime(2025, 7, 16, 20, 35, 12, 808000, tzinfo=datetime.timezone.utc), |
| docker_image='python:3.12', |
| space_id=None, |
| command=['python', '-c', "print('Hello from HF compute!')"], |
| arguments=[], |
| environment={}, |
| secrets={}, |
| flavor='cpu-basic', |
| status=JobStatus(stage='RUNNING', message=None) |
| ) |
| ``` |
| """ |
| if namespace is None: |
| namespace = self.whoami(token=token)["name"] |
| response = get_session().get( |
| f"{self.endpoint}/api/jobs/{namespace}/{job_id}", |
| headers=self._build_hf_headers(token=token), |
| ) |
| response.raise_for_status() |
| return JobInfo(**response.json(), endpoint=self.endpoint) |
|
|
| def cancel_job( |
| self, |
| *, |
| job_id: str, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> None: |
| """ |
| Cancel a compute Job on Hugging Face infrastructure. |
| |
| Args: |
| job_id (`str`): |
| ID of the Job. |
| |
| namespace (`str`, *optional*): |
| The namespace where the Job is running. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| """ |
| if namespace is None: |
| namespace = self.whoami(token=token)["name"] |
| get_session().post( |
| f"{self.endpoint}/api/jobs/{namespace}/{job_id}/cancel", |
| headers=self._build_hf_headers(token=token), |
| ).raise_for_status() |
|
|
| @experimental |
| def run_uv_job( |
| self, |
| script: str, |
| *, |
| script_args: Optional[list[str]] = None, |
| dependencies: Optional[list[str]] = None, |
| python: Optional[str] = None, |
| image: Optional[str] = None, |
| env: Optional[dict[str, Any]] = None, |
| secrets: Optional[dict[str, Any]] = None, |
| flavor: Optional[SpaceHardware] = None, |
| timeout: Optional[Union[int, float, str]] = None, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| _repo: Optional[str] = None, |
| ) -> JobInfo: |
| """ |
| Run a UV script Job on Hugging Face infrastructure. |
| |
| Args: |
| script (`str`): |
| Path or URL of the UV script, or a command. |
| |
| script_args (`list[str]`, *optional*) |
| Arguments to pass to the script or command. |
| |
| dependencies (`list[str]`, *optional*) |
| Dependencies to use to run the UV script. |
| |
| python (`str`, *optional*) |
| Use a specific Python version. Default is 3.12. |
| |
| image (`str`, *optional*, defaults to "ghcr.io/astral-sh/uv:python3.12-bookworm"): |
| Use a custom Docker image with `uv` installed. |
| |
| env (`dict[str, Any]`, *optional*): |
| Defines the environment variables for the Job. |
| |
| secrets (`dict[str, Any]`, *optional*): |
| Defines the secret environment variables for the Job. |
| |
| flavor (`str`, *optional*): |
| Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. |
| Defaults to `"cpu-basic"`. |
| |
| timeout (`Union[int, float, str]`, *optional*): |
| Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). |
| Example: `300` or `"5m"` for 5 minutes. |
| |
| namespace (`str`, *optional*): |
| The namespace where the Job will be created. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| |
| Example: |
| |
| Run a script from a URL: |
| |
| ```python |
| >>> from huggingface_hub import run_uv_job |
| >>> script = "https://raw.githubusercontent.com/huggingface/trl/refs/heads/main/trl/scripts/sft.py" |
| >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"] |
| >>> run_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small") |
| ``` |
| |
| Run a local script: |
| |
| ```python |
| >>> from huggingface_hub import run_uv_job |
| >>> script = "my_sft.py" |
| >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"] |
| >>> run_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small") |
| ``` |
| |
| Run a command: |
| |
| ```python |
| >>> from huggingface_hub import run_uv_job |
| >>> script = "lighteval" |
| >>> script_args= ["endpoint", "inference-providers", "model_name=openai/gpt-oss-20b,provider=auto", "lighteval|gsm8k|0|0"] |
| >>> run_uv_job(script, script_args=script_args, dependencies=["lighteval"], flavor="a10g-small") |
| ``` |
| """ |
| image = image or "ghcr.io/astral-sh/uv:python3.12-bookworm" |
| env = env or {} |
| secrets = secrets or {} |
|
|
| |
| command, env, secrets = self._create_uv_command_env_and_secrets( |
| script=script, |
| script_args=script_args, |
| dependencies=dependencies, |
| python=python, |
| env=env, |
| secrets=secrets, |
| namespace=namespace, |
| token=token, |
| _repo=_repo, |
| ) |
| |
| return self.run_job( |
| image=image, |
| command=command, |
| env=env, |
| secrets=secrets, |
| flavor=flavor, |
| timeout=timeout, |
| namespace=namespace, |
| token=token, |
| ) |
|
|
| def create_scheduled_job( |
| self, |
| *, |
| image: str, |
| command: list[str], |
| schedule: str, |
| suspend: Optional[bool] = None, |
| concurrency: Optional[bool] = None, |
| env: Optional[dict[str, Any]] = None, |
| secrets: Optional[dict[str, Any]] = None, |
| flavor: Optional[SpaceHardware] = None, |
| timeout: Optional[Union[int, float, str]] = None, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> ScheduledJobInfo: |
| """ |
| Create scheduled compute Jobs on Hugging Face infrastructure. |
| |
| Args: |
| image (`str`): |
| The Docker image to use. |
| Examples: `"ubuntu"`, `"python:3.12"`, `"pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"`. |
| Example with an image from a Space: `"hf.co/spaces/lhoestq/duckdb"`. |
| |
| command (`list[str]`): |
| The command to run. Example: `["echo", "hello"]`. |
| |
| schedule (`str`): |
| One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a |
| CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday). |
| |
| suspend (`bool`, *optional*): |
| If True, the scheduled Job is suspended (paused). Defaults to False. |
| |
| concurrency (`bool`, *optional*): |
| If True, multiple instances of this Job can run concurrently. Defaults to False. |
| |
| env (`dict[str, Any]`, *optional*): |
| Defines the environment variables for the Job. |
| |
| secrets (`dict[str, Any]`, *optional*): |
| Defines the secret environment variables for the Job. |
| |
| flavor (`str`, *optional*): |
| Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. |
| Defaults to `"cpu-basic"`. |
| |
| timeout (`Union[int, float, str]`, *optional*): |
| Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). |
| Example: `300` or `"5m"` for 5 minutes. |
| |
| namespace (`str`, *optional*): |
| The namespace where the Job will be created. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| |
| Example: |
| Create your first scheduled Job: |
| |
| ```python |
| >>> from huggingface_hub import create_scheduled_job |
| >>> create_scheduled_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"], schedule="@hourly") |
| ``` |
| |
| Use a CRON schedule expression: |
| |
| ```python |
| >>> from huggingface_hub import create_scheduled_job |
| >>> create_scheduled_job(image="python:3.12", command=["python", "-c" ,"print('this runs every 5min')"], schedule="*/5 * * * *") |
| ``` |
| |
| Create a scheduled GPU Job: |
| |
| ```python |
| >>> from huggingface_hub import create_scheduled_job |
| >>> image = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel" |
| >>> command = ["python", "-c", "import torch; print(f"This code ran with the following GPU: {torch.cuda.get_device_name()}")"] |
| >>> create_scheduled_job(image, command, flavor="a10g-small", schedule="@hourly") |
| ``` |
| |
| """ |
| if namespace is None: |
| namespace = self.whoami(token=token)["name"] |
|
|
| |
| job_spec = _create_job_spec( |
| image=image, |
| command=command, |
| env=env, |
| secrets=secrets, |
| flavor=flavor, |
| timeout=timeout, |
| ) |
| input_json: dict[str, Any] = { |
| "jobSpec": job_spec, |
| "schedule": schedule, |
| } |
| if concurrency is not None: |
| input_json["concurrency"] = concurrency |
| if suspend is not None: |
| input_json["suspend"] = suspend |
| response = get_session().post( |
| f"https://huggingface.co/api/scheduled-jobs/{namespace}", |
| json=input_json, |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
| scheduled_job_info = response.json() |
| return ScheduledJobInfo(**scheduled_job_info) |
|
|
| def list_scheduled_jobs( |
| self, |
| *, |
| timeout: Optional[int] = None, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> list[ScheduledJobInfo]: |
| """ |
| List scheduled compute Jobs on Hugging Face infrastructure. |
| |
| Args: |
| timeout (`float`, *optional*): |
| Whether to set a timeout for the request to the Hub. |
| |
| namespace (`str`, *optional*): |
| The namespace from where it lists the jobs. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| """ |
| if namespace is None: |
| namespace = self.whoami(token=token)["name"] |
| response = get_session().get( |
| f"{self.endpoint}/api/scheduled-jobs/{namespace}", |
| headers=self._build_hf_headers(token=token), |
| timeout=timeout, |
| ) |
| hf_raise_for_status(response) |
| return [ScheduledJobInfo(**scheduled_job_info) for scheduled_job_info in response.json()] |
|
|
| def inspect_scheduled_job( |
| self, |
| *, |
| scheduled_job_id: str, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> ScheduledJobInfo: |
| """ |
| Inspect a scheduled compute Job on Hugging Face infrastructure. |
| |
| Args: |
| scheduled_job_id (`str`): |
| ID of the scheduled Job. |
| |
| namespace (`str`, *optional*): |
| The namespace where the scheduled Job is. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| |
| Example: |
| |
| ```python |
| >>> from huggingface_hub import inspect_job, create_scheduled_job |
| >>> scheduled_job = create_scheduled_job(image="python:3.12", command=["python", "-c" ,"print('Hello from HF compute!')"], schedule="@hourly") |
| >>> inspect_scheduled_job(scheduled_job.id) |
| ``` |
| """ |
| if namespace is None: |
| namespace = self.whoami(token=token)["name"] |
| response = get_session().get( |
| f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
| return ScheduledJobInfo(**response.json()) |
|
|
| def delete_scheduled_job( |
| self, |
| *, |
| scheduled_job_id: str, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> None: |
| """ |
| Delete a scheduled compute Job on Hugging Face infrastructure. |
| |
| Args: |
| scheduled_job_id (`str`): |
| ID of the scheduled Job. |
| |
| namespace (`str`, *optional*): |
| The namespace where the scheduled Job is. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| """ |
| if namespace is None: |
| namespace = self.whoami(token=token)["name"] |
| response = get_session().delete( |
| f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}", |
| headers=self._build_hf_headers(token=token), |
| ) |
| hf_raise_for_status(response) |
|
|
| def suspend_scheduled_job( |
| self, |
| *, |
| scheduled_job_id: str, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> None: |
| """ |
| Suspend (pause) a scheduled compute Job on Hugging Face infrastructure. |
| |
| Args: |
| scheduled_job_id (`str`): |
| ID of the scheduled Job. |
| |
| namespace (`str`, *optional*): |
| The namespace where the scheduled Job is. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| """ |
| if namespace is None: |
| namespace = self.whoami(token=token)["name"] |
| get_session().post( |
| f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}/suspend", |
| headers=self._build_hf_headers(token=token), |
| ).raise_for_status() |
|
|
| def resume_scheduled_job( |
| self, |
| *, |
| scheduled_job_id: str, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| ) -> None: |
| """ |
| Resume (unpause) a scheduled compute Job on Hugging Face infrastructure. |
| |
| Args: |
| scheduled_job_id (`str`): |
| ID of the scheduled Job. |
| |
| namespace (`str`, *optional*): |
| The namespace where the scheduled Job is. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| """ |
| if namespace is None: |
| namespace = self.whoami(token=token)["name"] |
| get_session().post( |
| f"{self.endpoint}/api/scheduled-jobs/{namespace}/{scheduled_job_id}/resume", |
| headers=self._build_hf_headers(token=token), |
| ).raise_for_status() |
|
|
| @experimental |
| def create_scheduled_uv_job( |
| self, |
| script: str, |
| *, |
| script_args: Optional[list[str]] = None, |
| schedule: str, |
| suspend: Optional[bool] = None, |
| concurrency: Optional[bool] = None, |
| dependencies: Optional[list[str]] = None, |
| python: Optional[str] = None, |
| image: Optional[str] = None, |
| env: Optional[dict[str, Any]] = None, |
| secrets: Optional[dict[str, Any]] = None, |
| flavor: Optional[SpaceHardware] = None, |
| timeout: Optional[Union[int, float, str]] = None, |
| namespace: Optional[str] = None, |
| token: Union[bool, str, None] = None, |
| _repo: Optional[str] = None, |
| ) -> ScheduledJobInfo: |
| """ |
| Run a UV script Job on Hugging Face infrastructure. |
| |
| Args: |
| script (`str`): |
| Path or URL of the UV script, or a command. |
| |
| script_args (`list[str]`, *optional*) |
| Arguments to pass to the script, or a command. |
| |
| schedule (`str`): |
| One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a |
| CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday). |
| |
| suspend (`bool`, *optional*): |
| If True, the scheduled Job is suspended (paused). Defaults to False. |
| |
| concurrency (`bool`, *optional*): |
| If True, multiple instances of this Job can run concurrently. Defaults to False. |
| |
| dependencies (`list[str]`, *optional*) |
| Dependencies to use to run the UV script. |
| |
| python (`str`, *optional*) |
| Use a specific Python version. Default is 3.12. |
| |
| image (`str`, *optional*, defaults to "ghcr.io/astral-sh/uv:python3.12-bookworm"): |
| Use a custom Docker image with `uv` installed. |
| |
| env (`dict[str, Any]`, *optional*): |
| Defines the environment variables for the Job. |
| |
| secrets (`dict[str, Any]`, *optional*): |
| Defines the secret environment variables for the Job. |
| |
| flavor (`str`, *optional*): |
| Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values. |
| Defaults to `"cpu-basic"`. |
| |
| timeout (`Union[int, float, str]`, *optional*): |
| Max duration for the Job: int/float with s (seconds, default), m (minutes), h (hours) or d (days). |
| Example: `300` or `"5m"` for 5 minutes. |
| |
| namespace (`str`, *optional*): |
| The namespace where the Job will be created. Defaults to the current user's namespace. |
| |
| token `(Union[bool, str, None]`, *optional*): |
| A valid user access token. If not provided, the locally saved token will be used, which is the |
| recommended authentication method. Set to `False` to disable authentication. |
| Refer to: https://huggingface.co/docs/huggingface_hub/quick-start#authentication. |
| |
| Example: |
| |
| Schedule a script from a URL: |
| |
| ```python |
| >>> from huggingface_hub import create_scheduled_uv_job |
| >>> script = "https://raw.githubusercontent.com/huggingface/trl/refs/heads/main/trl/scripts/sft.py" |
| >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"] |
| >>> create_scheduled_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small", schedule="@weekly") |
| ``` |
| |
| Schedule a local script: |
| |
| ```python |
| >>> from huggingface_hub import create_scheduled_uv_job |
| >>> script = "my_sft.py" |
| >>> script_args = ["--model_name_or_path", "Qwen/Qwen2-0.5B", "--dataset_name", "trl-lib/Capybara", "--push_to_hub"] |
| >>> create_scheduled_uv_job(script, script_args=script_args, dependencies=["trl"], flavor="a10g-small", schedule="@weekly") |
| ``` |
| |
| Schedule a command: |
| |
| ```python |
| >>> from huggingface_hub import create_scheduled_uv_job |
| >>> script = "lighteval" |
| >>> script_args= ["endpoint", "inference-providers", "model_name=openai/gpt-oss-20b,provider=auto", "lighteval|gsm8k|0|0"] |
| >>> create_scheduled_uv_job(script, script_args=script_args, dependencies=["lighteval"], flavor="a10g-small", schedule="@weekly") |
| ``` |
| """ |
| image = image or "ghcr.io/astral-sh/uv:python3.12-bookworm" |
| |
| command, env, secrets = self._create_uv_command_env_and_secrets( |
| script=script, |
| script_args=script_args, |
| dependencies=dependencies, |
| python=python, |
| env=env, |
| secrets=secrets, |
| namespace=namespace, |
| token=token, |
| _repo=_repo, |
| ) |
| |
| return self.create_scheduled_job( |
| image=image, |
| command=command, |
| schedule=schedule, |
| suspend=suspend, |
| concurrency=concurrency, |
| env=env, |
| secrets=secrets, |
| flavor=flavor, |
| timeout=timeout, |
| namespace=namespace, |
| token=token, |
| ) |
|
|
| def _create_uv_command_env_and_secrets( |
| self, |
| *, |
| script: str, |
| script_args: Optional[list[str]], |
| dependencies: Optional[list[str]], |
| python: Optional[str], |
| env: Optional[dict[str, Any]], |
| secrets: Optional[dict[str, Any]], |
| namespace: Optional[str], |
| token: Union[bool, str, None], |
| _repo: Optional[str], |
| ) -> tuple[list[str], dict[str, Any], dict[str, Any]]: |
| env = env or {} |
| secrets = secrets or {} |
|
|
| |
| uv_args = [] |
| if dependencies: |
| for dependency in dependencies: |
| uv_args += ["--with", dependency] |
| if python: |
| uv_args += ["--python", python] |
| script_args = script_args or [] |
|
|
| if namespace is None: |
| namespace = self.whoami(token=token)["name"] |
|
|
| is_url = script.startswith("http://") or script.startswith("https://") |
| if is_url or not Path(script).is_file(): |
| |
| command = ["uv", "run"] + uv_args + [script] + script_args |
| else: |
| |
| script_path = Path(script) |
| filename = script_path.name |
| |
| if _repo: |
| repo_id = _repo |
| if "/" not in repo_id: |
| repo_id = f"{namespace}/{repo_id}" |
| else: |
| repo_id = f"{namespace}/hf-cli-jobs-uv-run-scripts" |
|
|
| |
| try: |
| self.repo_info(repo_id, repo_type="dataset") |
| logger.debug(f"Using existing repository: {repo_id}") |
| except RepositoryNotFoundError: |
| logger.info(f"Creating repository: {repo_id}") |
| create_repo(repo_id, repo_type="dataset", private=True, exist_ok=True) |
|
|
| |
| logger.info(f"Uploading {script_path.name} to {repo_id}...") |
| with open(script_path, "r") as f: |
| script_content = f.read() |
|
|
| commit_hash = self.upload_file( |
| path_or_fileobj=script_content.encode(), |
| path_in_repo=filename, |
| repo_id=repo_id, |
| repo_type="dataset", |
| ).oid |
|
|
| script_url = f"{self.endpoint}/datasets/{repo_id}/resolve/{commit_hash}/{filename}" |
| repo_url = f"{self.endpoint}/datasets/{repo_id}" |
|
|
| logger.debug(f"✓ Script uploaded to: {repo_url}/blob/main/{filename}") |
|
|
| |
| timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC") |
| readme_content = dedent( |
| f""" |
| --- |
| tags: |
| - hf-cli-jobs-uv-script |
| - ephemeral |
| viewer: false |
| --- |
| |
| # UV Script: {filename} |
| |
| Executed via `hf jobs uv run` on {timestamp} |
| |
| ## Run this script |
| |
| ```bash |
| hf jobs uv run {filename} |
| ``` |
| |
| --- |
| *Created with [hf jobs](https://huggingface.co/docs/huggingface_hub/main/en/guides/jobs)* |
| """ |
| ) |
| self.upload_file( |
| path_or_fileobj=readme_content.encode(), |
| path_in_repo="README.md", |
| repo_id=repo_id, |
| repo_type="dataset", |
| ) |
|
|
| secrets["UV_SCRIPT_HF_TOKEN"] = token or self.token or get_token() |
| env["UV_SCRIPT_URL"] = script_url |
|
|
| pre_command = ( |
| dedent( |
| """ |
| import urllib.request |
| import os |
| from pathlib import Path |
| o = urllib.request.build_opener() |
| o.addheaders = [("Authorization", "Bearer " + os.environ["UV_SCRIPT_HF_TOKEN"])] |
| Path("/tmp/script.py").write_bytes(o.open(os.environ["UV_SCRIPT_URL"]).read()) |
| """ |
| ) |
| .strip() |
| .replace('"', r"\"") |
| .split("\n") |
| ) |
| pre_command = ["python", "-c", '"' + "; ".join(pre_command) + '"'] |
| command = ["uv", "run"] + uv_args + ["/tmp/script.py"] + script_args |
| command = ["bash", "-c", " ".join(pre_command) + " && " + " ".join(command)] |
| return command, env, secrets |
|
|
|
|
| def _parse_revision_from_pr_url(pr_url: str) -> str: |
| """Safely parse revision number from a PR url. |
| |
| Example: |
| ```py |
| >>> _parse_revision_from_pr_url("https://huggingface.co/bigscience/bloom/discussions/2") |
| "refs/pr/2" |
| ``` |
| """ |
| re_match = re.match(_REGEX_DISCUSSION_URL, pr_url) |
| if re_match is None: |
| raise RuntimeError(f"Unexpected response from the hub, expected a Pull Request URL but got: '{pr_url}'") |
| return f"refs/pr/{re_match[1]}" |
|
|
|
|
| api = HfApi() |
|
|
| whoami = api.whoami |
| auth_check = api.auth_check |
|
|
| list_models = api.list_models |
| model_info = api.model_info |
|
|
| list_datasets = api.list_datasets |
| dataset_info = api.dataset_info |
|
|
| list_spaces = api.list_spaces |
| space_info = api.space_info |
|
|
| list_papers = api.list_papers |
| paper_info = api.paper_info |
| list_daily_papers = api.list_daily_papers |
|
|
| repo_exists = api.repo_exists |
| revision_exists = api.revision_exists |
| file_exists = api.file_exists |
| repo_info = api.repo_info |
| list_repo_files = api.list_repo_files |
| list_repo_refs = api.list_repo_refs |
| list_repo_commits = api.list_repo_commits |
| list_repo_tree = api.list_repo_tree |
| get_paths_info = api.get_paths_info |
| verify_repo_checksums = api.verify_repo_checksums |
|
|
| get_model_tags = api.get_model_tags |
| get_dataset_tags = api.get_dataset_tags |
|
|
| create_commit = api.create_commit |
| create_repo = api.create_repo |
| delete_repo = api.delete_repo |
| update_repo_settings = api.update_repo_settings |
| move_repo = api.move_repo |
| upload_file = api.upload_file |
| upload_folder = api.upload_folder |
| delete_file = api.delete_file |
| delete_folder = api.delete_folder |
| delete_files = api.delete_files |
| upload_large_folder = api.upload_large_folder |
| preupload_lfs_files = api.preupload_lfs_files |
| create_branch = api.create_branch |
| delete_branch = api.delete_branch |
| create_tag = api.create_tag |
| delete_tag = api.delete_tag |
| get_full_repo_name = api.get_full_repo_name |
|
|
| |
| super_squash_history = api.super_squash_history |
| list_lfs_files = api.list_lfs_files |
| permanently_delete_lfs_files = api.permanently_delete_lfs_files |
|
|
| |
| get_safetensors_metadata = api.get_safetensors_metadata |
| parse_safetensors_file_metadata = api.parse_safetensors_file_metadata |
|
|
| |
| run_as_future = api.run_as_future |
|
|
| |
| list_liked_repos = api.list_liked_repos |
| list_repo_likers = api.list_repo_likers |
| unlike = api.unlike |
|
|
| |
| get_discussion_details = api.get_discussion_details |
| get_repo_discussions = api.get_repo_discussions |
| create_discussion = api.create_discussion |
| create_pull_request = api.create_pull_request |
| change_discussion_status = api.change_discussion_status |
| comment_discussion = api.comment_discussion |
| edit_discussion_comment = api.edit_discussion_comment |
| rename_discussion = api.rename_discussion |
| merge_pull_request = api.merge_pull_request |
|
|
| |
| add_space_secret = api.add_space_secret |
| delete_space_secret = api.delete_space_secret |
| get_space_variables = api.get_space_variables |
| add_space_variable = api.add_space_variable |
| delete_space_variable = api.delete_space_variable |
| get_space_runtime = api.get_space_runtime |
| request_space_hardware = api.request_space_hardware |
| set_space_sleep_time = api.set_space_sleep_time |
| pause_space = api.pause_space |
| restart_space = api.restart_space |
| duplicate_space = api.duplicate_space |
| request_space_storage = api.request_space_storage |
| delete_space_storage = api.delete_space_storage |
|
|
| |
| list_inference_endpoints = api.list_inference_endpoints |
| create_inference_endpoint = api.create_inference_endpoint |
| get_inference_endpoint = api.get_inference_endpoint |
| update_inference_endpoint = api.update_inference_endpoint |
| delete_inference_endpoint = api.delete_inference_endpoint |
| pause_inference_endpoint = api.pause_inference_endpoint |
| resume_inference_endpoint = api.resume_inference_endpoint |
| scale_to_zero_inference_endpoint = api.scale_to_zero_inference_endpoint |
| create_inference_endpoint_from_catalog = api.create_inference_endpoint_from_catalog |
| list_inference_catalog = api.list_inference_catalog |
|
|
| |
| get_collection = api.get_collection |
| list_collections = api.list_collections |
| create_collection = api.create_collection |
| update_collection_metadata = api.update_collection_metadata |
| delete_collection = api.delete_collection |
| add_collection_item = api.add_collection_item |
| update_collection_item = api.update_collection_item |
| delete_collection_item = api.delete_collection_item |
| delete_collection_item = api.delete_collection_item |
|
|
| |
| list_pending_access_requests = api.list_pending_access_requests |
| list_accepted_access_requests = api.list_accepted_access_requests |
| list_rejected_access_requests = api.list_rejected_access_requests |
| cancel_access_request = api.cancel_access_request |
| accept_access_request = api.accept_access_request |
| reject_access_request = api.reject_access_request |
| grant_access = api.grant_access |
|
|
| |
| create_webhook = api.create_webhook |
| disable_webhook = api.disable_webhook |
| delete_webhook = api.delete_webhook |
| enable_webhook = api.enable_webhook |
| get_webhook = api.get_webhook |
| list_webhooks = api.list_webhooks |
| update_webhook = api.update_webhook |
|
|
|
|
| |
| get_user_overview = api.get_user_overview |
| get_organization_overview = api.get_organization_overview |
| list_organization_followers = api.list_organization_followers |
| list_organization_members = api.list_organization_members |
| list_user_followers = api.list_user_followers |
| list_user_following = api.list_user_following |
|
|
| |
| run_job = api.run_job |
| fetch_job_logs = api.fetch_job_logs |
| list_jobs = api.list_jobs |
| inspect_job = api.inspect_job |
| cancel_job = api.cancel_job |
| run_uv_job = api.run_uv_job |
| create_scheduled_job = api.create_scheduled_job |
| list_scheduled_jobs = api.list_scheduled_jobs |
| inspect_scheduled_job = api.inspect_scheduled_job |
| delete_scheduled_job = api.delete_scheduled_job |
| suspend_scheduled_job = api.suspend_scheduled_job |
| resume_scheduled_job = api.resume_scheduled_job |
| create_scheduled_uv_job = api.create_scheduled_uv_job |
|
|