koichi12 commited on Feb 12, 2025

Commit

9a8eae1

verified ·

1 Parent(s): 80a73eb

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/__main__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/api.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/cd.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/constant.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/legacy.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/md.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/models.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/utils.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/version.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/charset_normalizer/cli/__init__.py +8 -0
.venv/lib/python3.11/site-packages/charset_normalizer/cli/__main__.py +321 -0
.venv/lib/python3.11/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/__init__.py +1434 -0
.venv/lib/python3.11/site-packages/huggingface_hub/_commit_api.py +758 -0
.venv/lib/python3.11/site-packages/huggingface_hub/_commit_scheduler.py +353 -0
.venv/lib/python3.11/site-packages/huggingface_hub/_inference_endpoints.py +402 -0
.venv/lib/python3.11/site-packages/huggingface_hub/_local_folder.py +432 -0
.venv/lib/python3.11/site-packages/huggingface_hub/_login.py +520 -0
.venv/lib/python3.11/site-packages/huggingface_hub/_snapshot_download.py +307 -0
.venv/lib/python3.11/site-packages/huggingface_hub/_space_api.py +160 -0
.venv/lib/python3.11/site-packages/huggingface_hub/_tensorboard_logger.py +194 -0
.venv/lib/python3.11/site-packages/huggingface_hub/_upload_large_folder.py +621 -0
.venv/lib/python3.11/site-packages/huggingface_hub/_webhooks_payload.py +137 -0
.venv/lib/python3.11/site-packages/huggingface_hub/_webhooks_server.py +386 -0
.venv/lib/python3.11/site-packages/huggingface_hub/community.py +355 -0
.venv/lib/python3.11/site-packages/huggingface_hub/constants.py +229 -0
.venv/lib/python3.11/site-packages/huggingface_hub/errors.py +329 -0
.venv/lib/python3.11/site-packages/huggingface_hub/fastai_utils.py +425 -0
.venv/lib/python3.11/site-packages/huggingface_hub/file_download.py +1621 -0
.venv/lib/python3.11/site-packages/huggingface_hub/hf_api.py +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py +1140 -0
.venv/lib/python3.11/site-packages/huggingface_hub/hub_mixin.py +836 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/__init__.py +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/__pycache__/_common.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/_client.py +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/_common.py +446 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/__init__.py +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/_async_client.py +0 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/audio_to_audio.py +31 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +115 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/depth_estimation.py +29 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/fill_mask.py +48 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/image_segmentation.py +52 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/image_to_image.py +55 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/image_to_text.py +102 -0
.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/object_detection.py +59 -0

.gitattributes CHANGED Viewed

@@ -122,3 +122,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/_
 .venv/lib/python3.11/site-packages/opencv_python_headless.libs/libvpx-9f572e11.so.9.1.0 filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/nvidia/cuda_runtime/lib/libcudart.so.12 filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/nvidia/cublas/lib/libnvblas.so.12 filter=lfs diff=lfs merge=lfs -text

 .venv/lib/python3.11/site-packages/opencv_python_headless.libs/libvpx-9f572e11.so.9.1.0 filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/nvidia/cuda_runtime/lib/libcudart.so.12 filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/nvidia/cublas/lib/libnvblas.so.12 filter=lfs diff=lfs merge=lfs -text
+.venv/lib/python3.11/site-packages/opencv_python_headless.libs/libopenblas-r0-f650aae0.3.3.so filter=lfs diff=lfs merge=lfs -text

.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.9 kB). View file

.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/__main__.cpython-311.pyc ADDED Viewed

Binary file (394 Bytes). View file

.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/api.cpython-311.pyc ADDED Viewed

Binary file (20.8 kB). View file

.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/cd.cpython-311.pyc ADDED Viewed

Binary file (15.9 kB). View file

.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/constant.cpython-311.pyc ADDED Viewed

Binary file (43.6 kB). View file

.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/legacy.cpython-311.pyc ADDED Viewed

Binary file (3.15 kB). View file

.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/md.cpython-311.pyc ADDED Viewed

Binary file (27.6 kB). View file

.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/models.cpython-311.pyc ADDED Viewed

Binary file (18.6 kB). View file

.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (15.4 kB). View file

.venv/lib/python3.11/site-packages/charset_normalizer/__pycache__/version.cpython-311.pyc ADDED Viewed

Binary file (400 Bytes). View file

.venv/lib/python3.11/site-packages/charset_normalizer/cli/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from __future__ import annotations
+from .__main__ import cli_detect, query_yes_no
+__all__ = (
+    "cli_detect",
+    "query_yes_no",
+)

.venv/lib/python3.11/site-packages/charset_normalizer/cli/__main__.py ADDED Viewed

	@@ -0,0 +1,321 @@

+from __future__ import annotations
+import argparse
+import sys
+from json import dumps
+from os.path import abspath, basename, dirname, join, realpath
+from platform import python_version
+from unicodedata import unidata_version
+import charset_normalizer.md as md_module
+from charset_normalizer import from_fp
+from charset_normalizer.models import CliDetectionResult
+from charset_normalizer.version import __version__
+def query_yes_no(question: str, default: str = "yes") -> bool:
+    """Ask a yes/no question via input() and return their answer.
+    "question" is a string that is presented to the user.
+    "default" is the presumed answer if the user just hits <Enter>.
+        It must be "yes" (the default), "no" or None (meaning
+        an answer is required of the user).
+    The "answer" return value is True for "yes" or False for "no".
+    Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
+    """
+    valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
+    if default is None:
+        prompt = " [y/n] "
+    elif default == "yes":
+        prompt = " [Y/n] "
+    elif default == "no":
+        prompt = " [y/N] "
+    else:
+        raise ValueError("invalid default answer: '%s'" % default)
+    while True:
+        sys.stdout.write(question + prompt)
+        choice = input().lower()
+        if default is not None and choice == "":
+            return valid[default]
+        elif choice in valid:
+            return valid[choice]
+        else:
+            sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
+def cli_detect(argv: list[str] | None = None) -> int:
+    """
+    CLI assistant using ARGV and ArgumentParser
+    :param argv:
+    :return: 0 if everything is fine, anything else equal trouble
+    """
+    parser = argparse.ArgumentParser(
+        description="The Real First Universal Charset Detector. "
+        "Discover originating encoding used on text file. "
+        "Normalize text to unicode."
+    )
+    parser.add_argument(
+        "files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        default=False,
+        dest="verbose",
+        help="Display complementary information about file if any. "
+        "Stdout will contain logs about the detection process.",
+    )
+    parser.add_argument(
+        "-a",
+        "--with-alternative",
+        action="store_true",
+        default=False,
+        dest="alternatives",
+        help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
+    )
+    parser.add_argument(
+        "-n",
+        "--normalize",
+        action="store_true",
+        default=False,
+        dest="normalize",
+        help="Permit to normalize input file. If not set, program does not write anything.",
+    )
+    parser.add_argument(
+        "-m",
+        "--minimal",
+        action="store_true",
+        default=False,
+        dest="minimal",
+        help="Only output the charset detected to STDOUT. Disabling JSON output.",
+    )
+    parser.add_argument(
+        "-r",
+        "--replace",
+        action="store_true",
+        default=False,
+        dest="replace",
+        help="Replace file when trying to normalize it instead of creating a new one.",
+    )
+    parser.add_argument(
+        "-f",
+        "--force",
+        action="store_true",
+        default=False,
+        dest="force",
+        help="Replace file without asking if you are sure, use this flag with caution.",
+    )
+    parser.add_argument(
+        "-i",
+        "--no-preemptive",
+        action="store_true",
+        default=False,
+        dest="no_preemptive",
+        help="Disable looking at a charset declaration to hint the detector.",
+    )
+    parser.add_argument(
+        "-t",
+        "--threshold",
+        action="store",
+        default=0.2,
+        type=float,
+        dest="threshold",
+        help="Define a custom maximum amount of noise allowed in decoded content. 0. <= noise <= 1.",
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
+            __version__,
+            python_version(),
+            unidata_version,
+            "OFF" if md_module.__file__.lower().endswith(".py") else "ON",
+        ),
+        help="Show version information and exit.",
+    )
+    args = parser.parse_args(argv)
+    if args.replace is True and args.normalize is False:
+        if args.files:
+            for my_file in args.files:
+                my_file.close()
+        print("Use --replace in addition of --normalize only.", file=sys.stderr)
+        return 1
+    if args.force is True and args.replace is False:
+        if args.files:
+            for my_file in args.files:
+                my_file.close()
+        print("Use --force in addition of --replace only.", file=sys.stderr)
+        return 1
+    if args.threshold < 0.0 or args.threshold > 1.0:
+        if args.files:
+            for my_file in args.files:
+                my_file.close()
+        print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
+        return 1
+    x_ = []
+    for my_file in args.files:
+        matches = from_fp(
+            my_file,
+            threshold=args.threshold,
+            explain=args.verbose,
+            preemptive_behaviour=args.no_preemptive is False,
+        )
+        best_guess = matches.best()
+        if best_guess is None:
+            print(
+                'Unable to identify originating encoding for "{}". {}'.format(
+                    my_file.name,
+                    (
+                        "Maybe try increasing maximum amount of chaos."
+                        if args.threshold < 1.0
+                        else ""
+                    ),
+                ),
+                file=sys.stderr,
+            )
+            x_.append(
+                CliDetectionResult(
+                    abspath(my_file.name),
+                    None,
+                    [],
+                    [],
+                    "Unknown",
+                    [],
+                    False,
+                    1.0,
+                    0.0,
+                    None,
+                    True,
+                )
+            )
+        else:
+            x_.append(
+                CliDetectionResult(
+                    abspath(my_file.name),
+                    best_guess.encoding,
+                    best_guess.encoding_aliases,
+                    [
+                        cp
+                        for cp in best_guess.could_be_from_charset
+                        if cp != best_guess.encoding
+                    ],
+                    best_guess.language,
+                    best_guess.alphabets,
+                    best_guess.bom,
+                    best_guess.percent_chaos,
+                    best_guess.percent_coherence,
+                    None,
+                    True,
+                )
+            )
+            if len(matches) > 1 and args.alternatives:
+                for el in matches:
+                    if el != best_guess:
+                        x_.append(
+                            CliDetectionResult(
+                                abspath(my_file.name),
+                                el.encoding,
+                                el.encoding_aliases,
+                                [
+                                    cp
+                                    for cp in el.could_be_from_charset
+                                    if cp != el.encoding
+                                ],
+                                el.language,
+                                el.alphabets,
+                                el.bom,
+                                el.percent_chaos,
+                                el.percent_coherence,
+                                None,
+                                False,
+                            )
+                        )
+            if args.normalize is True:
+                if best_guess.encoding.startswith("utf") is True:
+                    print(
+                        '"{}" file does not need to be normalized, as it already came from unicode.'.format(
+                            my_file.name
+                        ),
+                        file=sys.stderr,
+                    )
+                    if my_file.closed is False:
+                        my_file.close()
+                    continue
+                dir_path = dirname(realpath(my_file.name))
+                file_name = basename(realpath(my_file.name))
+                o_: list[str] = file_name.split(".")
+                if args.replace is False:
+                    o_.insert(-1, best_guess.encoding)
+                    if my_file.closed is False:
+                        my_file.close()
+                elif (
+                    args.force is False
+                    and query_yes_no(
+                        'Are you sure to normalize "{}" by replacing it ?'.format(
+                            my_file.name
+                        ),
+                        "no",
+                    )
+                    is False
+                ):
+                    if my_file.closed is False:
+                        my_file.close()
+                    continue
+                try:
+                    x_[0].unicode_path = join(dir_path, ".".join(o_))
+                    with open(x_[0].unicode_path, "wb") as fp:
+                        fp.write(best_guess.output())
+                except OSError as e:
+                    print(str(e), file=sys.stderr)
+                    if my_file.closed is False:
+                        my_file.close()
+                    return 2
+        if my_file.closed is False:
+            my_file.close()
+    if args.minimal is False:
+        print(
+            dumps(
+                [el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
+                ensure_ascii=True,
+                indent=4,
+            )
+        )
+    else:
+        for my_file in args.files:
+            print(
+                ", ".join(
+                    [
+                        el.encoding or "undefined"
+                        for el in x_
+                        if el.path == abspath(my_file.name)
+                    ]
+                )
+            )
+    return 0
+if __name__ == "__main__":
+    cli_detect()

.venv/lib/python3.11/site-packages/charset_normalizer/cli/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (366 Bytes). View file

.venv/lib/python3.11/site-packages/charset_normalizer/cli/__pycache__/__main__.cpython-311.pyc ADDED Viewed

Binary file (12.3 kB). View file

.venv/lib/python3.11/site-packages/huggingface_hub/__init__.py ADDED Viewed

	@@ -0,0 +1,1434 @@

+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ***********
+# `huggingface_hub` init has 2 modes:
+# - Normal usage:
+#       If imported to use it, all modules and functions are lazy-loaded. This means
+#       they exist at top level in module but are imported only the first time they are
+#       used. This way, `from huggingface_hub import something` will import `something`
+#       quickly without the hassle of importing all the features from `huggingface_hub`.
+# - Static check:
+#       If statically analyzed, all modules and functions are loaded normally. This way
+#       static typing check works properly as well as autocomplete in text editors and
+#       IDEs.
+#
+# The static model imports are done inside the `if TYPE_CHECKING:` statement at
+# the bottom of this file. Since module/functions imports are duplicated, it is
+# mandatory to make sure to add them twice when adding one. This is checked in the
+# `make quality` command.
+#
+# To update the static imports, please run the following command and commit the changes.
+# ```
+# # Use script
+# python utils/check_static_imports.py --update-file
+#
+# # Or run style on codebase
+# make style
+# ```
+#
+# ***********
+# Lazy loader vendored from https://github.com/scientific-python/lazy_loader
+import importlib
+import os
+import sys
+from typing import TYPE_CHECKING
+__version__ = "0.28.1"
+# Alphabetical order of definitions is ensured in tests
+# WARNING: any comment added in this dictionary definition will be lost when
+# re-generating the file !
+_SUBMOD_ATTRS = {
+    "_commit_scheduler": [
+        "CommitScheduler",
+    ],
+    "_inference_endpoints": [
+        "InferenceEndpoint",
+        "InferenceEndpointError",
+        "InferenceEndpointStatus",
+        "InferenceEndpointTimeoutError",
+        "InferenceEndpointType",
+    ],
+    "_login": [
+        "auth_list",
+        "auth_switch",
+        "interpreter_login",
+        "login",
+        "logout",
+        "notebook_login",
+    ],
+    "_snapshot_download": [
+        "snapshot_download",
+    ],
+    "_space_api": [
+        "SpaceHardware",
+        "SpaceRuntime",
+        "SpaceStage",
+        "SpaceStorage",
+        "SpaceVariable",
+    ],
+    "_tensorboard_logger": [
+        "HFSummaryWriter",
+    ],
+    "_webhooks_payload": [
+        "WebhookPayload",
+        "WebhookPayloadComment",
+        "WebhookPayloadDiscussion",
+        "WebhookPayloadDiscussionChanges",
+        "WebhookPayloadEvent",
+        "WebhookPayloadMovedTo",
+        "WebhookPayloadRepo",
+        "WebhookPayloadUrl",
+        "WebhookPayloadWebhook",
+    ],
+    "_webhooks_server": [
+        "WebhooksServer",
+        "webhook_endpoint",
+    ],
+    "community": [
+        "Discussion",
+        "DiscussionComment",
+        "DiscussionCommit",
+        "DiscussionEvent",
+        "DiscussionStatusChange",
+        "DiscussionTitleChange",
+        "DiscussionWithDetails",
+    ],
+    "constants": [
+        "CONFIG_NAME",
+        "FLAX_WEIGHTS_NAME",
+        "HUGGINGFACE_CO_URL_HOME",
+        "HUGGINGFACE_CO_URL_TEMPLATE",
+        "PYTORCH_WEIGHTS_NAME",
+        "REPO_TYPE_DATASET",
+        "REPO_TYPE_MODEL",
+        "REPO_TYPE_SPACE",
+        "TF2_WEIGHTS_NAME",
+        "TF_WEIGHTS_NAME",
+    ],
+    "fastai_utils": [
+        "_save_pretrained_fastai",
+        "from_pretrained_fastai",
+        "push_to_hub_fastai",
+    ],
+    "file_download": [
+        "HfFileMetadata",
+        "_CACHED_NO_EXIST",
+        "get_hf_file_metadata",
+        "hf_hub_download",
+        "hf_hub_url",
+        "try_to_load_from_cache",
+    ],
+    "hf_api": [
+        "Collection",
+        "CollectionItem",
+        "CommitInfo",
+        "CommitOperation",
+        "CommitOperationAdd",
+        "CommitOperationCopy",
+        "CommitOperationDelete",
+        "DatasetInfo",
+        "GitCommitInfo",
+        "GitRefInfo",
+        "GitRefs",
+        "HfApi",
+        "ModelInfo",
+        "RepoUrl",
+        "SpaceInfo",
+        "User",
+        "UserLikes",
+        "WebhookInfo",
+        "WebhookWatchedItem",
+        "accept_access_request",
+        "add_collection_item",
+        "add_space_secret",
+        "add_space_variable",
+        "auth_check",
+        "cancel_access_request",
+        "change_discussion_status",
+        "comment_discussion",
+        "create_branch",
+        "create_collection",
+        "create_commit",
+        "create_discussion",
+        "create_inference_endpoint",
+        "create_pull_request",
+        "create_repo",
+        "create_tag",
+        "create_webhook",
+        "dataset_info",
+        "delete_branch",
+        "delete_collection",
+        "delete_collection_item",
+        "delete_file",
+        "delete_folder",
+        "delete_inference_endpoint",
+        "delete_repo",
+        "delete_space_secret",
+        "delete_space_storage",
+        "delete_space_variable",
+        "delete_tag",
+        "delete_webhook",
+        "disable_webhook",
+        "duplicate_space",
+        "edit_discussion_comment",
+        "enable_webhook",
+        "file_exists",
+        "get_collection",
+        "get_dataset_tags",
+        "get_discussion_details",
+        "get_full_repo_name",
+        "get_inference_endpoint",
+        "get_model_tags",
+        "get_paths_info",
+        "get_repo_discussions",
+        "get_safetensors_metadata",
+        "get_space_runtime",
+        "get_space_variables",
+        "get_token_permission",
+        "get_user_overview",
+        "get_webhook",
+        "grant_access",
+        "list_accepted_access_requests",
+        "list_collections",
+        "list_datasets",
+        "list_inference_endpoints",
+        "list_liked_repos",
+        "list_models",
+        "list_organization_members",
+        "list_papers",
+        "list_pending_access_requests",
+        "list_rejected_access_requests",
+        "list_repo_commits",
+        "list_repo_files",
+        "list_repo_likers",
+        "list_repo_refs",
+        "list_repo_tree",
+        "list_spaces",
+        "list_user_followers",
+        "list_user_following",
+        "list_webhooks",
+        "merge_pull_request",
+        "model_info",
+        "move_repo",
+        "paper_info",
+        "parse_safetensors_file_metadata",
+        "pause_inference_endpoint",
+        "pause_space",
+        "preupload_lfs_files",
+        "reject_access_request",
+        "rename_discussion",
+        "repo_exists",
+        "repo_info",
+        "repo_type_and_id_from_hf_id",
+        "request_space_hardware",
+        "request_space_storage",
+        "restart_space",
+        "resume_inference_endpoint",
+        "revision_exists",
+        "run_as_future",
+        "scale_to_zero_inference_endpoint",
+        "set_space_sleep_time",
+        "space_info",
+        "super_squash_history",
+        "unlike",
+        "update_collection_item",
+        "update_collection_metadata",
+        "update_inference_endpoint",
+        "update_repo_settings",
+        "update_repo_visibility",
+        "update_webhook",
+        "upload_file",
+        "upload_folder",
+        "upload_large_folder",
+        "whoami",
+    ],
+    "hf_file_system": [
+        "HfFileSystem",
+        "HfFileSystemFile",
+        "HfFileSystemResolvedPath",
+        "HfFileSystemStreamFile",
+    ],
+    "hub_mixin": [
+        "ModelHubMixin",
+        "PyTorchModelHubMixin",
+    ],
+    "inference._client": [
+        "InferenceClient",
+        "InferenceTimeoutError",
+    ],
+    "inference._generated._async_client": [
+        "AsyncInferenceClient",
+    ],
+    "inference._generated.types": [
+        "AudioClassificationInput",
+        "AudioClassificationOutputElement",
+        "AudioClassificationOutputTransform",
+        "AudioClassificationParameters",
+        "AudioToAudioInput",
+        "AudioToAudioOutputElement",
+        "AutomaticSpeechRecognitionEarlyStoppingEnum",
+        "AutomaticSpeechRecognitionGenerationParameters",
+        "AutomaticSpeechRecognitionInput",
+        "AutomaticSpeechRecognitionOutput",
+        "AutomaticSpeechRecognitionOutputChunk",
+        "AutomaticSpeechRecognitionParameters",
+        "ChatCompletionInput",
+        "ChatCompletionInputFunctionDefinition",
+        "ChatCompletionInputFunctionName",
+        "ChatCompletionInputGrammarType",
+        "ChatCompletionInputGrammarTypeType",
+        "ChatCompletionInputMessage",
+        "ChatCompletionInputMessageChunk",
+        "ChatCompletionInputMessageChunkType",
+        "ChatCompletionInputStreamOptions",
+        "ChatCompletionInputTool",
+        "ChatCompletionInputToolChoiceClass",
+        "ChatCompletionInputToolChoiceEnum",
+        "ChatCompletionInputURL",
+        "ChatCompletionOutput",
+        "ChatCompletionOutputComplete",
+        "ChatCompletionOutputFunctionDefinition",
+        "ChatCompletionOutputLogprob",
+        "ChatCompletionOutputLogprobs",
+        "ChatCompletionOutputMessage",
+        "ChatCompletionOutputToolCall",
+        "ChatCompletionOutputTopLogprob",
+        "ChatCompletionOutputUsage",
+        "ChatCompletionStreamOutput",
+        "ChatCompletionStreamOutputChoice",
+        "ChatCompletionStreamOutputDelta",
+        "ChatCompletionStreamOutputDeltaToolCall",
+        "ChatCompletionStreamOutputFunction",
+        "ChatCompletionStreamOutputLogprob",
+        "ChatCompletionStreamOutputLogprobs",
+        "ChatCompletionStreamOutputTopLogprob",
+        "ChatCompletionStreamOutputUsage",
+        "DepthEstimationInput",
+        "DepthEstimationOutput",
+        "DocumentQuestionAnsweringInput",
+        "DocumentQuestionAnsweringInputData",
+        "DocumentQuestionAnsweringOutputElement",
+        "DocumentQuestionAnsweringParameters",
+        "FeatureExtractionInput",
+        "FeatureExtractionInputTruncationDirection",
+        "FillMaskInput",
+        "FillMaskOutputElement",
+        "FillMaskParameters",
+        "ImageClassificationInput",
+        "ImageClassificationOutputElement",
+        "ImageClassificationOutputTransform",
+        "ImageClassificationParameters",
+        "ImageSegmentationInput",
+        "ImageSegmentationOutputElement",
+        "ImageSegmentationParameters",
+        "ImageSegmentationSubtask",
+        "ImageToImageInput",
+        "ImageToImageOutput",
+        "ImageToImageParameters",
+        "ImageToImageTargetSize",
+        "ImageToTextEarlyStoppingEnum",
+        "ImageToTextGenerationParameters",
+        "ImageToTextInput",
+        "ImageToTextOutput",
+        "ImageToTextParameters",
+        "ObjectDetectionBoundingBox",
+        "ObjectDetectionInput",
+        "ObjectDetectionOutputElement",
+        "ObjectDetectionParameters",
+        "Padding",
+        "QuestionAnsweringInput",
+        "QuestionAnsweringInputData",
+        "QuestionAnsweringOutputElement",
+        "QuestionAnsweringParameters",
+        "SentenceSimilarityInput",
+        "SentenceSimilarityInputData",
+        "SummarizationInput",
+        "SummarizationOutput",
+        "SummarizationParameters",
+        "SummarizationTruncationStrategy",
+        "TableQuestionAnsweringInput",
+        "TableQuestionAnsweringInputData",
+        "TableQuestionAnsweringOutputElement",
+        "TableQuestionAnsweringParameters",
+        "Text2TextGenerationInput",
+        "Text2TextGenerationOutput",
+        "Text2TextGenerationParameters",
+        "Text2TextGenerationTruncationStrategy",
+        "TextClassificationInput",
+        "TextClassificationOutputElement",
+        "TextClassificationOutputTransform",
+        "TextClassificationParameters",
+        "TextGenerationInput",
+        "TextGenerationInputGenerateParameters",
+        "TextGenerationInputGrammarType",
+        "TextGenerationOutput",
+        "TextGenerationOutputBestOfSequence",
+        "TextGenerationOutputDetails",
+        "TextGenerationOutputFinishReason",
+        "TextGenerationOutputPrefillToken",
+        "TextGenerationOutputToken",
+        "TextGenerationStreamOutput",
+        "TextGenerationStreamOutputStreamDetails",
+        "TextGenerationStreamOutputToken",
+        "TextToAudioEarlyStoppingEnum",
+        "TextToAudioGenerationParameters",
+        "TextToAudioInput",
+        "TextToAudioOutput",
+        "TextToAudioParameters",
+        "TextToImageInput",
+        "TextToImageOutput",
+        "TextToImageParameters",
+        "TextToImageTargetSize",
+        "TextToSpeechEarlyStoppingEnum",
+        "TextToSpeechGenerationParameters",
+        "TextToSpeechInput",
+        "TextToSpeechOutput",
+        "TextToSpeechParameters",
+        "TextToVideoInput",
+        "TextToVideoOutput",
+        "TextToVideoParameters",
+        "TokenClassificationAggregationStrategy",
+        "TokenClassificationInput",
+        "TokenClassificationOutputElement",
+        "TokenClassificationParameters",
+        "TranslationInput",
+        "TranslationOutput",
+        "TranslationParameters",
+        "TranslationTruncationStrategy",
+        "TypeEnum",
+        "VideoClassificationInput",
+        "VideoClassificationOutputElement",
+        "VideoClassificationOutputTransform",
+        "VideoClassificationParameters",
+        "VisualQuestionAnsweringInput",
+        "VisualQuestionAnsweringInputData",
+        "VisualQuestionAnsweringOutputElement",
+        "VisualQuestionAnsweringParameters",
+        "ZeroShotClassificationInput",
+        "ZeroShotClassificationOutputElement",
+        "ZeroShotClassificationParameters",
+        "ZeroShotImageClassificationInput",
+        "ZeroShotImageClassificationOutputElement",
+        "ZeroShotImageClassificationParameters",
+        "ZeroShotObjectDetectionBoundingBox",
+        "ZeroShotObjectDetectionInput",
+        "ZeroShotObjectDetectionOutputElement",
+        "ZeroShotObjectDetectionParameters",
+    ],
+    "inference_api": [
+        "InferenceApi",
+    ],
+    "keras_mixin": [
+        "KerasModelHubMixin",
+        "from_pretrained_keras",
+        "push_to_hub_keras",
+        "save_pretrained_keras",
+    ],
+    "repocard": [
+        "DatasetCard",
+        "ModelCard",
+        "RepoCard",
+        "SpaceCard",
+        "metadata_eval_result",
+        "metadata_load",
+        "metadata_save",
+        "metadata_update",
+    ],
+    "repocard_data": [
+        "CardData",
+        "DatasetCardData",
+        "EvalResult",
+        "ModelCardData",
+        "SpaceCardData",
+    ],
+    "repository": [
+        "Repository",
+    ],
+    "serialization": [
+        "StateDictSplit",
+        "get_tf_storage_size",
+        "get_torch_storage_id",
+        "get_torch_storage_size",
+        "load_state_dict_from_file",
+        "load_torch_model",
+        "save_torch_model",
+        "save_torch_state_dict",
+        "split_state_dict_into_shards_factory",
+        "split_tf_state_dict_into_shards",
+        "split_torch_state_dict_into_shards",
+    ],
+    "serialization._dduf": [
+        "DDUFEntry",
+        "export_entries_as_dduf",
+        "export_folder_as_dduf",
+        "read_dduf_file",
+    ],
+    "utils": [
+        "CacheNotFound",
+        "CachedFileInfo",
+        "CachedRepoInfo",
+        "CachedRevisionInfo",
+        "CorruptedCacheException",
+        "DeleteCacheStrategy",
+        "HFCacheInfo",
+        "HfFolder",
+        "cached_assets_path",
+        "configure_http_backend",
+        "dump_environment_info",
+        "get_session",
+        "get_token",
+        "logging",
+        "scan_cache_dir",
+    ],
+}
+# WARNING: __all__ is generated automatically, Any manual edit will be lost when re-generating this file !
+#
+# To update the static imports, please run the following command and commit the changes.
+# ```
+# # Use script
+# python utils/check_all_variable.py --update
+#
+# # Or run style on codebase
+# make style
+# ```
+__all__ = [
+    "AsyncInferenceClient",
+    "AudioClassificationInput",
+    "AudioClassificationOutputElement",
+    "AudioClassificationOutputTransform",
+    "AudioClassificationParameters",
+    "AudioToAudioInput",
+    "AudioToAudioOutputElement",
+    "AutomaticSpeechRecognitionEarlyStoppingEnum",
+    "AutomaticSpeechRecognitionGenerationParameters",
+    "AutomaticSpeechRecognitionInput",
+    "AutomaticSpeechRecognitionOutput",
+    "AutomaticSpeechRecognitionOutputChunk",
+    "AutomaticSpeechRecognitionParameters",
+    "CONFIG_NAME",
+    "CacheNotFound",
+    "CachedFileInfo",
+    "CachedRepoInfo",
+    "CachedRevisionInfo",
+    "CardData",
+    "ChatCompletionInput",
+    "ChatCompletionInputFunctionDefinition",
+    "ChatCompletionInputFunctionName",
+    "ChatCompletionInputGrammarType",
+    "ChatCompletionInputGrammarTypeType",
+    "ChatCompletionInputMessage",
+    "ChatCompletionInputMessageChunk",
+    "ChatCompletionInputMessageChunkType",
+    "ChatCompletionInputStreamOptions",
+    "ChatCompletionInputTool",
+    "ChatCompletionInputToolChoiceClass",
+    "ChatCompletionInputToolChoiceEnum",
+    "ChatCompletionInputURL",
+    "ChatCompletionOutput",
+    "ChatCompletionOutputComplete",
+    "ChatCompletionOutputFunctionDefinition",
+    "ChatCompletionOutputLogprob",
+    "ChatCompletionOutputLogprobs",
+    "ChatCompletionOutputMessage",
+    "ChatCompletionOutputToolCall",
+    "ChatCompletionOutputTopLogprob",
+    "ChatCompletionOutputUsage",
+    "ChatCompletionStreamOutput",
+    "ChatCompletionStreamOutputChoice",
+    "ChatCompletionStreamOutputDelta",
+    "ChatCompletionStreamOutputDeltaToolCall",
+    "ChatCompletionStreamOutputFunction",
+    "ChatCompletionStreamOutputLogprob",
+    "ChatCompletionStreamOutputLogprobs",
+    "ChatCompletionStreamOutputTopLogprob",
+    "ChatCompletionStreamOutputUsage",
+    "Collection",
+    "CollectionItem",
+    "CommitInfo",
+    "CommitOperation",
+    "CommitOperationAdd",
+    "CommitOperationCopy",
+    "CommitOperationDelete",
+    "CommitScheduler",
+    "CorruptedCacheException",
+    "DDUFEntry",
+    "DatasetCard",
+    "DatasetCardData",
+    "DatasetInfo",
+    "DeleteCacheStrategy",
+    "DepthEstimationInput",
+    "DepthEstimationOutput",
+    "Discussion",
+    "DiscussionComment",
+    "DiscussionCommit",
+    "DiscussionEvent",
+    "DiscussionStatusChange",
+    "DiscussionTitleChange",
+    "DiscussionWithDetails",
+    "DocumentQuestionAnsweringInput",
+    "DocumentQuestionAnsweringInputData",
+    "DocumentQuestionAnsweringOutputElement",
+    "DocumentQuestionAnsweringParameters",
+    "EvalResult",
+    "FLAX_WEIGHTS_NAME",
+    "FeatureExtractionInput",
+    "FeatureExtractionInputTruncationDirection",
+    "FillMaskInput",
+    "FillMaskOutputElement",
+    "FillMaskParameters",
+    "GitCommitInfo",
+    "GitRefInfo",
+    "GitRefs",
+    "HFCacheInfo",
+    "HFSummaryWriter",
+    "HUGGINGFACE_CO_URL_HOME",
+    "HUGGINGFACE_CO_URL_TEMPLATE",
+    "HfApi",
+    "HfFileMetadata",
+    "HfFileSystem",
+    "HfFileSystemFile",
+    "HfFileSystemResolvedPath",
+    "HfFileSystemStreamFile",
+    "HfFolder",
+    "ImageClassificationInput",
+    "ImageClassificationOutputElement",
+    "ImageClassificationOutputTransform",
+    "ImageClassificationParameters",
+    "ImageSegmentationInput",
+    "ImageSegmentationOutputElement",
+    "ImageSegmentationParameters",
+    "ImageSegmentationSubtask",
+    "ImageToImageInput",
+    "ImageToImageOutput",
+    "ImageToImageParameters",
+    "ImageToImageTargetSize",
+    "ImageToTextEarlyStoppingEnum",
+    "ImageToTextGenerationParameters",
+    "ImageToTextInput",
+    "ImageToTextOutput",
+    "ImageToTextParameters",
+    "InferenceApi",
+    "InferenceClient",
+    "InferenceEndpoint",
+    "InferenceEndpointError",
+    "InferenceEndpointStatus",
+    "InferenceEndpointTimeoutError",
+    "InferenceEndpointType",
+    "InferenceTimeoutError",
+    "KerasModelHubMixin",
+    "ModelCard",
+    "ModelCardData",
+    "ModelHubMixin",
+    "ModelInfo",
+    "ObjectDetectionBoundingBox",
+    "ObjectDetectionInput",
+    "ObjectDetectionOutputElement",
+    "ObjectDetectionParameters",
+    "PYTORCH_WEIGHTS_NAME",
+    "Padding",
+    "PyTorchModelHubMixin",
+    "QuestionAnsweringInput",
+    "QuestionAnsweringInputData",
+    "QuestionAnsweringOutputElement",
+    "QuestionAnsweringParameters",
+    "REPO_TYPE_DATASET",
+    "REPO_TYPE_MODEL",
+    "REPO_TYPE_SPACE",
+    "RepoCard",
+    "RepoUrl",
+    "Repository",
+    "SentenceSimilarityInput",
+    "SentenceSimilarityInputData",
+    "SpaceCard",
+    "SpaceCardData",
+    "SpaceHardware",
+    "SpaceInfo",
+    "SpaceRuntime",
+    "SpaceStage",
+    "SpaceStorage",
+    "SpaceVariable",
+    "StateDictSplit",
+    "SummarizationInput",
+    "SummarizationOutput",
+    "SummarizationParameters",
+    "SummarizationTruncationStrategy",
+    "TF2_WEIGHTS_NAME",
+    "TF_WEIGHTS_NAME",
+    "TableQuestionAnsweringInput",
+    "TableQuestionAnsweringInputData",
+    "TableQuestionAnsweringOutputElement",
+    "TableQuestionAnsweringParameters",
+    "Text2TextGenerationInput",
+    "Text2TextGenerationOutput",
+    "Text2TextGenerationParameters",
+    "Text2TextGenerationTruncationStrategy",
+    "TextClassificationInput",
+    "TextClassificationOutputElement",
+    "TextClassificationOutputTransform",
+    "TextClassificationParameters",
+    "TextGenerationInput",
+    "TextGenerationInputGenerateParameters",
+    "TextGenerationInputGrammarType",
+    "TextGenerationOutput",
+    "TextGenerationOutputBestOfSequence",
+    "TextGenerationOutputDetails",
+    "TextGenerationOutputFinishReason",
+    "TextGenerationOutputPrefillToken",
+    "TextGenerationOutputToken",
+    "TextGenerationStreamOutput",
+    "TextGenerationStreamOutputStreamDetails",
+    "TextGenerationStreamOutputToken",
+    "TextToAudioEarlyStoppingEnum",
+    "TextToAudioGenerationParameters",
+    "TextToAudioInput",
+    "TextToAudioOutput",
+    "TextToAudioParameters",
+    "TextToImageInput",
+    "TextToImageOutput",
+    "TextToImageParameters",
+    "TextToImageTargetSize",
+    "TextToSpeechEarlyStoppingEnum",
+    "TextToSpeechGenerationParameters",
+    "TextToSpeechInput",
+    "TextToSpeechOutput",
+    "TextToSpeechParameters",
+    "TextToVideoInput",
+    "TextToVideoOutput",
+    "TextToVideoParameters",
+    "TokenClassificationAggregationStrategy",
+    "TokenClassificationInput",
+    "TokenClassificationOutputElement",
+    "TokenClassificationParameters",
+    "TranslationInput",
+    "TranslationOutput",
+    "TranslationParameters",
+    "TranslationTruncationStrategy",
+    "TypeEnum",
+    "User",
+    "UserLikes",
+    "VideoClassificationInput",
+    "VideoClassificationOutputElement",
+    "VideoClassificationOutputTransform",
+    "VideoClassificationParameters",
+    "VisualQuestionAnsweringInput",
+    "VisualQuestionAnsweringInputData",
+    "VisualQuestionAnsweringOutputElement",
+    "VisualQuestionAnsweringParameters",
+    "WebhookInfo",
+    "WebhookPayload",
+    "WebhookPayloadComment",
+    "WebhookPayloadDiscussion",
+    "WebhookPayloadDiscussionChanges",
+    "WebhookPayloadEvent",
+    "WebhookPayloadMovedTo",
+    "WebhookPayloadRepo",
+    "WebhookPayloadUrl",
+    "WebhookPayloadWebhook",
+    "WebhookWatchedItem",
+    "WebhooksServer",
+    "ZeroShotClassificationInput",
+    "ZeroShotClassificationOutputElement",
+    "ZeroShotClassificationParameters",
+    "ZeroShotImageClassificationInput",
+    "ZeroShotImageClassificationOutputElement",
+    "ZeroShotImageClassificationParameters",
+    "ZeroShotObjectDetectionBoundingBox",
+    "ZeroShotObjectDetectionInput",
+    "ZeroShotObjectDetectionOutputElement",
+    "ZeroShotObjectDetectionParameters",
+    "_CACHED_NO_EXIST",
+    "_save_pretrained_fastai",
+    "accept_access_request",
+    "add_collection_item",
+    "add_space_secret",
+    "add_space_variable",
+    "auth_check",
+    "auth_list",
+    "auth_switch",
+    "cached_assets_path",
+    "cancel_access_request",
+    "change_discussion_status",
+    "comment_discussion",
+    "configure_http_backend",
+    "create_branch",
+    "create_collection",
+    "create_commit",
+    "create_discussion",
+    "create_inference_endpoint",
+    "create_pull_request",
+    "create_repo",
+    "create_tag",
+    "create_webhook",
+    "dataset_info",
+    "delete_branch",
+    "delete_collection",
+    "delete_collection_item",
+    "delete_file",
+    "delete_folder",
+    "delete_inference_endpoint",
+    "delete_repo",
+    "delete_space_secret",
+    "delete_space_storage",
+    "delete_space_variable",
+    "delete_tag",
+    "delete_webhook",
+    "disable_webhook",
+    "dump_environment_info",
+    "duplicate_space",
+    "edit_discussion_comment",
+    "enable_webhook",
+    "export_entries_as_dduf",
+    "export_folder_as_dduf",
+    "file_exists",
+    "from_pretrained_fastai",
+    "from_pretrained_keras",
+    "get_collection",
+    "get_dataset_tags",
+    "get_discussion_details",
+    "get_full_repo_name",
+    "get_hf_file_metadata",
+    "get_inference_endpoint",
+    "get_model_tags",
+    "get_paths_info",
+    "get_repo_discussions",
+    "get_safetensors_metadata",
+    "get_session",
+    "get_space_runtime",
+    "get_space_variables",
+    "get_tf_storage_size",
+    "get_token",
+    "get_token_permission",
+    "get_torch_storage_id",
+    "get_torch_storage_size",
+    "get_user_overview",
+    "get_webhook",
+    "grant_access",
+    "hf_hub_download",
+    "hf_hub_url",
+    "interpreter_login",
+    "list_accepted_access_requests",
+    "list_collections",
+    "list_datasets",
+    "list_inference_endpoints",
+    "list_liked_repos",
+    "list_models",
+    "list_organization_members",
+    "list_papers",
+    "list_pending_access_requests",
+    "list_rejected_access_requests",
+    "list_repo_commits",
+    "list_repo_files",
+    "list_repo_likers",
+    "list_repo_refs",
+    "list_repo_tree",
+    "list_spaces",
+    "list_user_followers",
+    "list_user_following",
+    "list_webhooks",
+    "load_state_dict_from_file",
+    "load_torch_model",
+    "logging",
+    "login",
+    "logout",
+    "merge_pull_request",
+    "metadata_eval_result",
+    "metadata_load",
+    "metadata_save",
+    "metadata_update",
+    "model_info",
+    "move_repo",
+    "notebook_login",
+    "paper_info",
+    "parse_safetensors_file_metadata",
+    "pause_inference_endpoint",
+    "pause_space",
+    "preupload_lfs_files",
+    "push_to_hub_fastai",
+    "push_to_hub_keras",
+    "read_dduf_file",
+    "reject_access_request",
+    "rename_discussion",
+    "repo_exists",
+    "repo_info",
+    "repo_type_and_id_from_hf_id",
+    "request_space_hardware",
+    "request_space_storage",
+    "restart_space",
+    "resume_inference_endpoint",
+    "revision_exists",
+    "run_as_future",
+    "save_pretrained_keras",
+    "save_torch_model",
+    "save_torch_state_dict",
+    "scale_to_zero_inference_endpoint",
+    "scan_cache_dir",
+    "set_space_sleep_time",
+    "snapshot_download",
+    "space_info",
+    "split_state_dict_into_shards_factory",
+    "split_tf_state_dict_into_shards",
+    "split_torch_state_dict_into_shards",
+    "super_squash_history",
+    "try_to_load_from_cache",
+    "unlike",
+    "update_collection_item",
+    "update_collection_metadata",
+    "update_inference_endpoint",
+    "update_repo_settings",
+    "update_repo_visibility",
+    "update_webhook",
+    "upload_file",
+    "upload_folder",
+    "upload_large_folder",
+    "webhook_endpoint",
+    "whoami",
+]
+def _attach(package_name, submodules=None, submod_attrs=None):
+    """Attach lazily loaded submodules, functions, or other attributes.
+    Typically, modules import submodules and attributes as follows:
+    ```py
+    import mysubmodule
+    import anothersubmodule
+    from .foo import someattr
+    ```
+    The idea is to replace a package's `__getattr__`, `__dir__`, such that all imports
+    work exactly the way they would with normal imports, except that the import occurs
+    upon first use.
+    The typical way to call this function, replacing the above imports, is:
+    ```python
+    __getattr__, __dir__ = lazy.attach(
+        __name__,
+        ['mysubmodule', 'anothersubmodule'],
+        {'foo': ['someattr']}
+    )
+    ```
+    This functionality requires Python 3.7 or higher.
+    Args:
+        package_name (`str`):
+            Typically use `__name__`.
+        submodules (`set`):
+            List of submodules to attach.
+        submod_attrs (`dict`):
+            Dictionary of submodule -> list of attributes / functions.
+            These attributes are imported as they are used.
+    Returns:
+        __getattr__, __dir__, __all__
+    """
+    if submod_attrs is None:
+        submod_attrs = {}
+    if submodules is None:
+        submodules = set()
+    else:
+        submodules = set(submodules)
+    attr_to_modules = {attr: mod for mod, attrs in submod_attrs.items() for attr in attrs}
+    def __getattr__(name):
+        if name in submodules:
+            try:
+                return importlib.import_module(f"{package_name}.{name}")
+            except Exception as e:
+                print(f"Error importing {package_name}.{name}: {e}")
+                raise
+        elif name in attr_to_modules:
+            submod_path = f"{package_name}.{attr_to_modules[name]}"
+            try:
+                submod = importlib.import_module(submod_path)
+            except Exception as e:
+                print(f"Error importing {submod_path}: {e}")
+                raise
+            attr = getattr(submod, name)
+            # If the attribute lives in a file (module) with the same
+            # name as the attribute, ensure that the attribute and *not*
+            # the module is accessible on the package.
+            if name == attr_to_modules[name]:
+                pkg = sys.modules[package_name]
+                pkg.__dict__[name] = attr
+            return attr
+        else:
+            raise AttributeError(f"No {package_name} attribute {name}")
+    def __dir__():
+        return __all__
+    return __getattr__, __dir__
+__getattr__, __dir__ = _attach(__name__, submodules=[], submod_attrs=_SUBMOD_ATTRS)
+if os.environ.get("EAGER_IMPORT", ""):
+    for attr in __all__:
+        __getattr__(attr)
+# WARNING: any content below this statement is generated automatically. Any manual edit
+# will be lost when re-generating this file !
+#
+# To update the static imports, please run the following command and commit the changes.
+# ```
+# # Use script
+# python utils/check_static_imports.py --update
+#
+# # Or run style on codebase
+# make style
+# ```
+if TYPE_CHECKING:  # pragma: no cover
+    from ._commit_scheduler import CommitScheduler  # noqa: F401
+    from ._inference_endpoints import (
+        InferenceEndpoint,  # noqa: F401
+        InferenceEndpointError,  # noqa: F401
+        InferenceEndpointStatus,  # noqa: F401
+        InferenceEndpointTimeoutError,  # noqa: F401
+        InferenceEndpointType,  # noqa: F401
+    )
+    from ._login import (
+        auth_list,  # noqa: F401
+        auth_switch,  # noqa: F401
+        interpreter_login,  # noqa: F401
+        login,  # noqa: F401
+        logout,  # noqa: F401
+        notebook_login,  # noqa: F401
+    )
+    from ._snapshot_download import snapshot_download  # noqa: F401
+    from ._space_api import (
+        SpaceHardware,  # noqa: F401
+        SpaceRuntime,  # noqa: F401
+        SpaceStage,  # noqa: F401
+        SpaceStorage,  # noqa: F401
+        SpaceVariable,  # noqa: F401
+    )
+    from ._tensorboard_logger import HFSummaryWriter  # noqa: F401
+    from ._webhooks_payload import (
+        WebhookPayload,  # noqa: F401
+        WebhookPayloadComment,  # noqa: F401
+        WebhookPayloadDiscussion,  # noqa: F401
+        WebhookPayloadDiscussionChanges,  # noqa: F401
+        WebhookPayloadEvent,  # noqa: F401
+        WebhookPayloadMovedTo,  # noqa: F401
+        WebhookPayloadRepo,  # noqa: F401
+        WebhookPayloadUrl,  # noqa: F401
+        WebhookPayloadWebhook,  # noqa: F401
+    )
+    from ._webhooks_server import (
+        WebhooksServer,  # noqa: F401
+        webhook_endpoint,  # noqa: F401
+    )
+    from .community import (
+        Discussion,  # noqa: F401
+        DiscussionComment,  # noqa: F401
+        DiscussionCommit,  # noqa: F401
+        DiscussionEvent,  # noqa: F401
+        DiscussionStatusChange,  # noqa: F401
+        DiscussionTitleChange,  # noqa: F401
+        DiscussionWithDetails,  # noqa: F401
+    )
+    from .constants import (
+        CONFIG_NAME,  # noqa: F401
+        FLAX_WEIGHTS_NAME,  # noqa: F401
+        HUGGINGFACE_CO_URL_HOME,  # noqa: F401
+        HUGGINGFACE_CO_URL_TEMPLATE,  # noqa: F401
+        PYTORCH_WEIGHTS_NAME,  # noqa: F401
+        REPO_TYPE_DATASET,  # noqa: F401
+        REPO_TYPE_MODEL,  # noqa: F401
+        REPO_TYPE_SPACE,  # noqa: F401
+        TF2_WEIGHTS_NAME,  # noqa: F401
+        TF_WEIGHTS_NAME,  # noqa: F401
+    )
+    from .fastai_utils import (
+        _save_pretrained_fastai,  # noqa: F401
+        from_pretrained_fastai,  # noqa: F401
+        push_to_hub_fastai,  # noqa: F401
+    )
+    from .file_download import (
+        _CACHED_NO_EXIST,  # noqa: F401
+        HfFileMetadata,  # noqa: F401
+        get_hf_file_metadata,  # noqa: F401
+        hf_hub_download,  # noqa: F401
+        hf_hub_url,  # noqa: F401
+        try_to_load_from_cache,  # noqa: F401
+    )
+    from .hf_api import (
+        Collection,  # noqa: F401
+        CollectionItem,  # noqa: F401
+        CommitInfo,  # noqa: F401
+        CommitOperation,  # noqa: F401
+        CommitOperationAdd,  # noqa: F401
+        CommitOperationCopy,  # noqa: F401
+        CommitOperationDelete,  # noqa: F401
+        DatasetInfo,  # noqa: F401
+        GitCommitInfo,  # noqa: F401
+        GitRefInfo,  # noqa: F401
+        GitRefs,  # noqa: F401
+        HfApi,  # noqa: F401
+        ModelInfo,  # noqa: F401
+        RepoUrl,  # noqa: F401
+        SpaceInfo,  # noqa: F401
+        User,  # noqa: F401
+        UserLikes,  # noqa: F401
+        WebhookInfo,  # noqa: F401
+        WebhookWatchedItem,  # noqa: F401
+        accept_access_request,  # noqa: F401
+        add_collection_item,  # noqa: F401
+        add_space_secret,  # noqa: F401
+        add_space_variable,  # noqa: F401
+        auth_check,  # noqa: F401
+        cancel_access_request,  # noqa: F401
+        change_discussion_status,  # noqa: F401
+        comment_discussion,  # noqa: F401
+        create_branch,  # noqa: F401
+        create_collection,  # noqa: F401
+        create_commit,  # noqa: F401
+        create_discussion,  # noqa: F401
+        create_inference_endpoint,  # noqa: F401
+        create_pull_request,  # noqa: F401
+        create_repo,  # noqa: F401
+        create_tag,  # noqa: F401
+        create_webhook,  # noqa: F401
+        dataset_info,  # noqa: F401
+        delete_branch,  # noqa: F401
+        delete_collection,  # noqa: F401
+        delete_collection_item,  # noqa: F401
+        delete_file,  # noqa: F401
+        delete_folder,  # noqa: F401
+        delete_inference_endpoint,  # noqa: F401
+        delete_repo,  # noqa: F401
+        delete_space_secret,  # noqa: F401
+        delete_space_storage,  # noqa: F401
+        delete_space_variable,  # noqa: F401
+        delete_tag,  # noqa: F401
+        delete_webhook,  # noqa: F401
+        disable_webhook,  # noqa: F401
+        duplicate_space,  # noqa: F401
+        edit_discussion_comment,  # noqa: F401
+        enable_webhook,  # noqa: F401
+        file_exists,  # noqa: F401
+        get_collection,  # noqa: F401
+        get_dataset_tags,  # noqa: F401
+        get_discussion_details,  # noqa: F401
+        get_full_repo_name,  # noqa: F401
+        get_inference_endpoint,  # noqa: F401
+        get_model_tags,  # noqa: F401
+        get_paths_info,  # noqa: F401
+        get_repo_discussions,  # noqa: F401
+        get_safetensors_metadata,  # noqa: F401
+        get_space_runtime,  # noqa: F401
+        get_space_variables,  # noqa: F401
+        get_token_permission,  # noqa: F401
+        get_user_overview,  # noqa: F401
+        get_webhook,  # noqa: F401
+        grant_access,  # noqa: F401
+        list_accepted_access_requests,  # noqa: F401
+        list_collections,  # noqa: F401
+        list_datasets,  # noqa: F401
+        list_inference_endpoints,  # noqa: F401
+        list_liked_repos,  # noqa: F401
+        list_models,  # noqa: F401
+        list_organization_members,  # noqa: F401
+        list_papers,  # noqa: F401
+        list_pending_access_requests,  # noqa: F401
+        list_rejected_access_requests,  # noqa: F401
+        list_repo_commits,  # noqa: F401
+        list_repo_files,  # noqa: F401
+        list_repo_likers,  # noqa: F401
+        list_repo_refs,  # noqa: F401
+        list_repo_tree,  # noqa: F401
+        list_spaces,  # noqa: F401
+        list_user_followers,  # noqa: F401
+        list_user_following,  # noqa: F401
+        list_webhooks,  # noqa: F401
+        merge_pull_request,  # noqa: F401
+        model_info,  # noqa: F401
+        move_repo,  # noqa: F401
+        paper_info,  # noqa: F401
+        parse_safetensors_file_metadata,  # noqa: F401
+        pause_inference_endpoint,  # noqa: F401
+        pause_space,  # noqa: F401
+        preupload_lfs_files,  # noqa: F401
+        reject_access_request,  # noqa: F401
+        rename_discussion,  # noqa: F401
+        repo_exists,  # noqa: F401
+        repo_info,  # noqa: F401
+        repo_type_and_id_from_hf_id,  # noqa: F401
+        request_space_hardware,  # noqa: F401
+        request_space_storage,  # noqa: F401
+        restart_space,  # noqa: F401
+        resume_inference_endpoint,  # noqa: F401
+        revision_exists,  # noqa: F401
+        run_as_future,  # noqa: F401
+        scale_to_zero_inference_endpoint,  # noqa: F401
+        set_space_sleep_time,  # noqa: F401
+        space_info,  # noqa: F401
+        super_squash_history,  # noqa: F401
+        unlike,  # noqa: F401
+        update_collection_item,  # noqa: F401
+        update_collection_metadata,  # noqa: F401
+        update_inference_endpoint,  # noqa: F401
+        update_repo_settings,  # noqa: F401
+        update_repo_visibility,  # noqa: F401
+        update_webhook,  # noqa: F401
+        upload_file,  # noqa: F401
+        upload_folder,  # noqa: F401
+        upload_large_folder,  # noqa: F401
+        whoami,  # noqa: F401
+    )
+    from .hf_file_system import (
+        HfFileSystem,  # noqa: F401
+        HfFileSystemFile,  # noqa: F401
+        HfFileSystemResolvedPath,  # noqa: F401
+        HfFileSystemStreamFile,  # noqa: F401
+    )
+    from .hub_mixin import (
+        ModelHubMixin,  # noqa: F401
+        PyTorchModelHubMixin,  # noqa: F401
+    )
+    from .inference._client import (
+        InferenceClient,  # noqa: F401
+        InferenceTimeoutError,  # noqa: F401
+    )
+    from .inference._generated._async_client import AsyncInferenceClient  # noqa: F401
+    from .inference._generated.types import (
+        AudioClassificationInput,  # noqa: F401
+        AudioClassificationOutputElement,  # noqa: F401
+        AudioClassificationOutputTransform,  # noqa: F401
+        AudioClassificationParameters,  # noqa: F401
+        AudioToAudioInput,  # noqa: F401
+        AudioToAudioOutputElement,  # noqa: F401
+        AutomaticSpeechRecognitionEarlyStoppingEnum,  # noqa: F401
+        AutomaticSpeechRecognitionGenerationParameters,  # noqa: F401
+        AutomaticSpeechRecognitionInput,  # noqa: F401
+        AutomaticSpeechRecognitionOutput,  # noqa: F401
+        AutomaticSpeechRecognitionOutputChunk,  # noqa: F401
+        AutomaticSpeechRecognitionParameters,  # noqa: F401
+        ChatCompletionInput,  # noqa: F401
+        ChatCompletionInputFunctionDefinition,  # noqa: F401
+        ChatCompletionInputFunctionName,  # noqa: F401
+        ChatCompletionInputGrammarType,  # noqa: F401
+        ChatCompletionInputGrammarTypeType,  # noqa: F401
+        ChatCompletionInputMessage,  # noqa: F401
+        ChatCompletionInputMessageChunk,  # noqa: F401
+        ChatCompletionInputMessageChunkType,  # noqa: F401
+        ChatCompletionInputStreamOptions,  # noqa: F401
+        ChatCompletionInputTool,  # noqa: F401
+        ChatCompletionInputToolChoiceClass,  # noqa: F401
+        ChatCompletionInputToolChoiceEnum,  # noqa: F401
+        ChatCompletionInputURL,  # noqa: F401
+        ChatCompletionOutput,  # noqa: F401
+        ChatCompletionOutputComplete,  # noqa: F401
+        ChatCompletionOutputFunctionDefinition,  # noqa: F401
+        ChatCompletionOutputLogprob,  # noqa: F401
+        ChatCompletionOutputLogprobs,  # noqa: F401
+        ChatCompletionOutputMessage,  # noqa: F401
+        ChatCompletionOutputToolCall,  # noqa: F401
+        ChatCompletionOutputTopLogprob,  # noqa: F401
+        ChatCompletionOutputUsage,  # noqa: F401
+        ChatCompletionStreamOutput,  # noqa: F401
+        ChatCompletionStreamOutputChoice,  # noqa: F401
+        ChatCompletionStreamOutputDelta,  # noqa: F401
+        ChatCompletionStreamOutputDeltaToolCall,  # noqa: F401
+        ChatCompletionStreamOutputFunction,  # noqa: F401
+        ChatCompletionStreamOutputLogprob,  # noqa: F401
+        ChatCompletionStreamOutputLogprobs,  # noqa: F401
+        ChatCompletionStreamOutputTopLogprob,  # noqa: F401
+        ChatCompletionStreamOutputUsage,  # noqa: F401
+        DepthEstimationInput,  # noqa: F401
+        DepthEstimationOutput,  # noqa: F401
+        DocumentQuestionAnsweringInput,  # noqa: F401
+        DocumentQuestionAnsweringInputData,  # noqa: F401
+        DocumentQuestionAnsweringOutputElement,  # noqa: F401
+        DocumentQuestionAnsweringParameters,  # noqa: F401
+        FeatureExtractionInput,  # noqa: F401
+        FeatureExtractionInputTruncationDirection,  # noqa: F401
+        FillMaskInput,  # noqa: F401
+        FillMaskOutputElement,  # noqa: F401
+        FillMaskParameters,  # noqa: F401
+        ImageClassificationInput,  # noqa: F401
+        ImageClassificationOutputElement,  # noqa: F401
+        ImageClassificationOutputTransform,  # noqa: F401
+        ImageClassificationParameters,  # noqa: F401
+        ImageSegmentationInput,  # noqa: F401
+        ImageSegmentationOutputElement,  # noqa: F401
+        ImageSegmentationParameters,  # noqa: F401
+        ImageSegmentationSubtask,  # noqa: F401
+        ImageToImageInput,  # noqa: F401
+        ImageToImageOutput,  # noqa: F401
+        ImageToImageParameters,  # noqa: F401
+        ImageToImageTargetSize,  # noqa: F401
+        ImageToTextEarlyStoppingEnum,  # noqa: F401
+        ImageToTextGenerationParameters,  # noqa: F401
+        ImageToTextInput,  # noqa: F401
+        ImageToTextOutput,  # noqa: F401
+        ImageToTextParameters,  # noqa: F401
+        ObjectDetectionBoundingBox,  # noqa: F401
+        ObjectDetectionInput,  # noqa: F401
+        ObjectDetectionOutputElement,  # noqa: F401
+        ObjectDetectionParameters,  # noqa: F401
+        Padding,  # noqa: F401
+        QuestionAnsweringInput,  # noqa: F401
+        QuestionAnsweringInputData,  # noqa: F401
+        QuestionAnsweringOutputElement,  # noqa: F401
+        QuestionAnsweringParameters,  # noqa: F401
+        SentenceSimilarityInput,  # noqa: F401
+        SentenceSimilarityInputData,  # noqa: F401
+        SummarizationInput,  # noqa: F401
+        SummarizationOutput,  # noqa: F401
+        SummarizationParameters,  # noqa: F401
+        SummarizationTruncationStrategy,  # noqa: F401
+        TableQuestionAnsweringInput,  # noqa: F401
+        TableQuestionAnsweringInputData,  # noqa: F401
+        TableQuestionAnsweringOutputElement,  # noqa: F401
+        TableQuestionAnsweringParameters,  # noqa: F401
+        Text2TextGenerationInput,  # noqa: F401
+        Text2TextGenerationOutput,  # noqa: F401
+        Text2TextGenerationParameters,  # noqa: F401
+        Text2TextGenerationTruncationStrategy,  # noqa: F401
+        TextClassificationInput,  # noqa: F401
+        TextClassificationOutputElement,  # noqa: F401
+        TextClassificationOutputTransform,  # noqa: F401
+        TextClassificationParameters,  # noqa: F401
+        TextGenerationInput,  # noqa: F401
+        TextGenerationInputGenerateParameters,  # noqa: F401
+        TextGenerationInputGrammarType,  # noqa: F401
+        TextGenerationOutput,  # noqa: F401
+        TextGenerationOutputBestOfSequence,  # noqa: F401
+        TextGenerationOutputDetails,  # noqa: F401
+        TextGenerationOutputFinishReason,  # noqa: F401
+        TextGenerationOutputPrefillToken,  # noqa: F401
+        TextGenerationOutputToken,  # noqa: F401
+        TextGenerationStreamOutput,  # noqa: F401
+        TextGenerationStreamOutputStreamDetails,  # noqa: F401
+        TextGenerationStreamOutputToken,  # noqa: F401
+        TextToAudioEarlyStoppingEnum,  # noqa: F401
+        TextToAudioGenerationParameters,  # noqa: F401
+        TextToAudioInput,  # noqa: F401
+        TextToAudioOutput,  # noqa: F401
+        TextToAudioParameters,  # noqa: F401
+        TextToImageInput,  # noqa: F401
+        TextToImageOutput,  # noqa: F401
+        TextToImageParameters,  # noqa: F401
+        TextToImageTargetSize,  # noqa: F401
+        TextToSpeechEarlyStoppingEnum,  # noqa: F401
+        TextToSpeechGenerationParameters,  # noqa: F401
+        TextToSpeechInput,  # noqa: F401
+        TextToSpeechOutput,  # noqa: F401
+        TextToSpeechParameters,  # noqa: F401
+        TextToVideoInput,  # noqa: F401
+        TextToVideoOutput,  # noqa: F401
+        TextToVideoParameters,  # noqa: F401
+        TokenClassificationAggregationStrategy,  # noqa: F401
+        TokenClassificationInput,  # noqa: F401
+        TokenClassificationOutputElement,  # noqa: F401
+        TokenClassificationParameters,  # noqa: F401
+        TranslationInput,  # noqa: F401
+        TranslationOutput,  # noqa: F401
+        TranslationParameters,  # noqa: F401
+        TranslationTruncationStrategy,  # noqa: F401
+        TypeEnum,  # noqa: F401
+        VideoClassificationInput,  # noqa: F401
+        VideoClassificationOutputElement,  # noqa: F401
+        VideoClassificationOutputTransform,  # noqa: F401
+        VideoClassificationParameters,  # noqa: F401
+        VisualQuestionAnsweringInput,  # noqa: F401
+        VisualQuestionAnsweringInputData,  # noqa: F401
+        VisualQuestionAnsweringOutputElement,  # noqa: F401
+        VisualQuestionAnsweringParameters,  # noqa: F401
+        ZeroShotClassificationInput,  # noqa: F401
+        ZeroShotClassificationOutputElement,  # noqa: F401
+        ZeroShotClassificationParameters,  # noqa: F401
+        ZeroShotImageClassificationInput,  # noqa: F401
+        ZeroShotImageClassificationOutputElement,  # noqa: F401
+        ZeroShotImageClassificationParameters,  # noqa: F401
+        ZeroShotObjectDetectionBoundingBox,  # noqa: F401
+        ZeroShotObjectDetectionInput,  # noqa: F401
+        ZeroShotObjectDetectionOutputElement,  # noqa: F401
+        ZeroShotObjectDetectionParameters,  # noqa: F401
+    )
+    from .inference_api import InferenceApi  # noqa: F401
+    from .keras_mixin import (
+        KerasModelHubMixin,  # noqa: F401
+        from_pretrained_keras,  # noqa: F401
+        push_to_hub_keras,  # noqa: F401
+        save_pretrained_keras,  # noqa: F401
+    )
+    from .repocard import (
+        DatasetCard,  # noqa: F401
+        ModelCard,  # noqa: F401
+        RepoCard,  # noqa: F401
+        SpaceCard,  # noqa: F401
+        metadata_eval_result,  # noqa: F401
+        metadata_load,  # noqa: F401
+        metadata_save,  # noqa: F401
+        metadata_update,  # noqa: F401
+    )
+    from .repocard_data import (
+        CardData,  # noqa: F401
+        DatasetCardData,  # noqa: F401
+        EvalResult,  # noqa: F401
+        ModelCardData,  # noqa: F401
+        SpaceCardData,  # noqa: F401
+    )
+    from .repository import Repository  # noqa: F401
+    from .serialization import (
+        StateDictSplit,  # noqa: F401
+        get_tf_storage_size,  # noqa: F401
+        get_torch_storage_id,  # noqa: F401
+        get_torch_storage_size,  # noqa: F401
+        load_state_dict_from_file,  # noqa: F401
+        load_torch_model,  # noqa: F401
+        save_torch_model,  # noqa: F401
+        save_torch_state_dict,  # noqa: F401
+        split_state_dict_into_shards_factory,  # noqa: F401
+        split_tf_state_dict_into_shards,  # noqa: F401
+        split_torch_state_dict_into_shards,  # noqa: F401
+    )
+    from .serialization._dduf import (
+        DDUFEntry,  # noqa: F401
+        export_entries_as_dduf,  # noqa: F401
+        export_folder_as_dduf,  # noqa: F401
+        read_dduf_file,  # noqa: F401
+    )
+    from .utils import (
+        CachedFileInfo,  # noqa: F401
+        CachedRepoInfo,  # noqa: F401
+        CachedRevisionInfo,  # noqa: F401
+        CacheNotFound,  # noqa: F401
+        CorruptedCacheException,  # noqa: F401
+        DeleteCacheStrategy,  # noqa: F401
+        HFCacheInfo,  # noqa: F401
+        HfFolder,  # noqa: F401
+        cached_assets_path,  # noqa: F401
+        configure_http_backend,  # noqa: F401
+        dump_environment_info,  # noqa: F401
+        get_session,  # noqa: F401
+        get_token,  # noqa: F401
+        logging,  # noqa: F401
+        scan_cache_dir,  # noqa: F401
+    )

.venv/lib/python3.11/site-packages/huggingface_hub/_commit_api.py ADDED Viewed

	@@ -0,0 +1,758 @@

+"""
+Type definitions and utilities for the `create_commit` API
+"""
+import base64
+import io
+import os
+import warnings
+from collections import defaultdict
+from contextlib import contextmanager
+from dataclasses import dataclass, field
+from itertools import groupby
+from pathlib import Path, PurePosixPath
+from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Literal, Optional, Tuple, Union
+from tqdm.contrib.concurrent import thread_map
+from . import constants
+from .errors import EntryNotFoundError
+from .file_download import hf_hub_url
+from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info
+from .utils import (
+    FORBIDDEN_FOLDERS,
+    chunk_iterable,
+    get_session,
+    hf_raise_for_status,
+    logging,
+    sha,
+    tqdm_stream_file,
+    validate_hf_hub_args,
+)
+from .utils import tqdm as hf_tqdm
+if TYPE_CHECKING:
+    from .hf_api import RepoFile
+logger = logging.get_logger(__name__)
+UploadMode = Literal["lfs", "regular"]
+# Max is 1,000 per request on the Hub for HfApi.get_paths_info
+# Otherwise we get:
+# HfHubHTTPError: 413 Client Error: Payload Too Large for url: https://huggingface.co/api/datasets/xxx (Request ID: xxx)\n\ntoo many parameters
+# See https://github.com/huggingface/huggingface_hub/issues/1503
+FETCH_LFS_BATCH_SIZE = 500
+@dataclass
+class CommitOperationDelete:
+    """
+    Data structure holding necessary info to delete a file or a folder from a repository
+    on the Hub.
+    Args:
+        path_in_repo (`str`):
+            Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
+            for a file or `"checkpoints/1fec34a/"` for a folder.
+        is_folder (`bool` or `Literal["auto"]`, *optional*)
+            Whether the Delete Operation applies to a folder or not. If "auto", the path
+            type (file or folder) is guessed automatically by looking if path ends with
+            a "/" (folder) or not (file). To explicitly set the path type, you can set
+            `is_folder=True` or `is_folder=False`.
+    """
+    path_in_repo: str
+    is_folder: Union[bool, Literal["auto"]] = "auto"
+    def __post_init__(self):
+        self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
+        if self.is_folder == "auto":
+            self.is_folder = self.path_in_repo.endswith("/")
+        if not isinstance(self.is_folder, bool):
+            raise ValueError(
+                f"Wrong value for `is_folder`. Must be one of [`True`, `False`, `'auto'`]. Got '{self.is_folder}'."
+            )
+@dataclass
+class CommitOperationCopy:
+    """
+    Data structure holding necessary info to copy a file in a repository on the Hub.
+    Limitations:
+      - Only LFS files can be copied. To copy a regular file, you need to download it locally and re-upload it
+      - Cross-repository copies are not supported.
+    Note: you can combine a [`CommitOperationCopy`] and a [`CommitOperationDelete`] to rename an LFS file on the Hub.
+    Args:
+        src_path_in_repo (`str`):
+            Relative filepath in the repo of the file to be copied, e.g. `"checkpoints/1fec34a/weights.bin"`.
+        path_in_repo (`str`):
+            Relative filepath in the repo where to copy the file, e.g. `"checkpoints/1fec34a/weights_copy.bin"`.
+        src_revision (`str`, *optional*):
+            The git revision of the file to be copied. Can be any valid git revision.
+            Default to the target commit revision.
+    """
+    src_path_in_repo: str
+    path_in_repo: str
+    src_revision: Optional[str] = None
+    # set to the OID of the file to be copied if it has already been uploaded
+    # useful to determine if a commit will be empty or not.
+    _src_oid: Optional[str] = None
+    # set to the OID of the file to copy to if it has already been uploaded
+    # useful to determine if a commit will be empty or not.
+    _dest_oid: Optional[str] = None
+    def __post_init__(self):
+        self.src_path_in_repo = _validate_path_in_repo(self.src_path_in_repo)
+        self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
+@dataclass
+class CommitOperationAdd:
+    """
+    Data structure holding necessary info to upload a file to a repository on the Hub.
+    Args:
+        path_in_repo (`str`):
+            Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
+        path_or_fileobj (`str`, `Path`, `bytes`, or `BinaryIO`):
+            Either:
+            - a path to a local file (as `str` or `pathlib.Path`) to upload
+            - a buffer of bytes (`bytes`) holding the content of the file to upload
+            - a "file object" (subclass of `io.BufferedIOBase`), typically obtained
+                with `open(path, "rb")`. It must support `seek()` and `tell()` methods.
+    Raises:
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If `path_or_fileobj` is not one of `str`, `Path`, `bytes` or `io.BufferedIOBase`.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If `path_or_fileobj` is a `str` or `Path` but not a path to an existing file.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If `path_or_fileobj` is a `io.BufferedIOBase` but it doesn't support both
+            `seek()` and `tell()`.
+    """
+    path_in_repo: str
+    path_or_fileobj: Union[str, Path, bytes, BinaryIO]
+    upload_info: UploadInfo = field(init=False, repr=False)
+    # Internal attributes
+    # set to "lfs" or "regular" once known
+    _upload_mode: Optional[UploadMode] = field(init=False, repr=False, default=None)
+    # set to True if .gitignore rules prevent the file from being uploaded as LFS
+    # (server-side check)
+    _should_ignore: Optional[bool] = field(init=False, repr=False, default=None)
+    # set to the remote OID of the file if it has already been uploaded
+    # useful to determine if a commit will be empty or not
+    _remote_oid: Optional[str] = field(init=False, repr=False, default=None)
+    # set to True once the file has been uploaded as LFS
+    _is_uploaded: bool = field(init=False, repr=False, default=False)
+    # set to True once the file has been committed
+    _is_committed: bool = field(init=False, repr=False, default=False)
+    def __post_init__(self) -> None:
+        """Validates `path_or_fileobj` and compute `upload_info`."""
+        self.path_in_repo = _validate_path_in_repo(self.path_in_repo)
+        # Validate `path_or_fileobj` value
+        if isinstance(self.path_or_fileobj, Path):
+            self.path_or_fileobj = str(self.path_or_fileobj)
+        if isinstance(self.path_or_fileobj, str):
+            path_or_fileobj = os.path.normpath(os.path.expanduser(self.path_or_fileobj))
+            if not os.path.isfile(path_or_fileobj):
+                raise ValueError(f"Provided path: '{path_or_fileobj}' is not a file on the local file system")
+        elif not isinstance(self.path_or_fileobj, (io.BufferedIOBase, bytes)):
+            # ^^ Inspired from: https://stackoverflow.com/questions/44584829/how-to-determine-if-file-is-opened-in-binary-or-text-mode
+            raise ValueError(
+                "path_or_fileobj must be either an instance of str, bytes or"
+                " io.BufferedIOBase. If you passed a file-like object, make sure it is"
+                " in binary mode."
+            )
+        if isinstance(self.path_or_fileobj, io.BufferedIOBase):
+            try:
+                self.path_or_fileobj.tell()
+                self.path_or_fileobj.seek(0, os.SEEK_CUR)
+            except (OSError, AttributeError) as exc:
+                raise ValueError(
+                    "path_or_fileobj is a file-like object but does not implement seek() and tell()"
+                ) from exc
+        # Compute "upload_info" attribute
+        if isinstance(self.path_or_fileobj, str):
+            self.upload_info = UploadInfo.from_path(self.path_or_fileobj)
+        elif isinstance(self.path_or_fileobj, bytes):
+            self.upload_info = UploadInfo.from_bytes(self.path_or_fileobj)
+        else:
+            self.upload_info = UploadInfo.from_fileobj(self.path_or_fileobj)
+    @contextmanager
+    def as_file(self, with_tqdm: bool = False) -> Iterator[BinaryIO]:
+        """
+        A context manager that yields a file-like object allowing to read the underlying
+        data behind `path_or_fileobj`.
+        Args:
+            with_tqdm (`bool`, *optional*, defaults to `False`):
+                If True, iterating over the file object will display a progress bar. Only
+                works if the file-like object is a path to a file. Pure bytes and buffers
+                are not supported.
+        Example:
+        ```python
+        >>> operation = CommitOperationAdd(
+        ...        path_in_repo="remote/dir/weights.h5",
+        ...        path_or_fileobj="./local/weights.h5",
+        ... )
+        CommitOperationAdd(path_in_repo='remote/dir/weights.h5', path_or_fileobj='./local/weights.h5')
+        >>> with operation.as_file() as file:
+        ...     content = file.read()
+        >>> with operation.as_file(with_tqdm=True) as file:
+        ...     while True:
+        ...         data = file.read(1024)
+        ...         if not data:
+        ...              break
+        config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
+        >>> with operation.as_file(with_tqdm=True) as file:
+        ...     requests.put(..., data=file)
+        config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
+        ```
+        """
+        if isinstance(self.path_or_fileobj, str) or isinstance(self.path_or_fileobj, Path):
+            if with_tqdm:
+                with tqdm_stream_file(self.path_or_fileobj) as file:
+                    yield file
+            else:
+                with open(self.path_or_fileobj, "rb") as file:
+                    yield file
+        elif isinstance(self.path_or_fileobj, bytes):
+            yield io.BytesIO(self.path_or_fileobj)
+        elif isinstance(self.path_or_fileobj, io.BufferedIOBase):
+            prev_pos = self.path_or_fileobj.tell()
+            yield self.path_or_fileobj
+            self.path_or_fileobj.seek(prev_pos, io.SEEK_SET)
+    def b64content(self) -> bytes:
+        """
+        The base64-encoded content of `path_or_fileobj`
+        Returns: `bytes`
+        """
+        with self.as_file() as file:
+            return base64.b64encode(file.read())
+    @property
+    def _local_oid(self) -> Optional[str]:
+        """Return the OID of the local file.
+        This OID is then compared to `self._remote_oid` to check if the file has changed compared to the remote one.
+        If the file did not change, we won't upload it again to prevent empty commits.
+        For LFS files, the OID corresponds to the SHA256 of the file content (used a LFS ref).
+        For regular files, the OID corresponds to the SHA1 of the file content.
+        Note: this is slightly different to git OID computation since the oid of an LFS file is usually the git-SHA1 of the
+              pointer file content (not the actual file content). However, using the SHA256 is enough to detect changes
+              and more convenient client-side.
+        """
+        if self._upload_mode is None:
+            return None
+        elif self._upload_mode == "lfs":
+            return self.upload_info.sha256.hex()
+        else:
+            # Regular file => compute sha1
+            # => no need to read by chunk since the file is guaranteed to be <=5MB.
+            with self.as_file() as file:
+                return sha.git_hash(file.read())
+def _validate_path_in_repo(path_in_repo: str) -> str:
+    # Validate `path_in_repo` value to prevent a server-side issue
+    if path_in_repo.startswith("/"):
+        path_in_repo = path_in_repo[1:]
+    if path_in_repo == "." or path_in_repo == ".." or path_in_repo.startswith("../"):
+        raise ValueError(f"Invalid `path_in_repo` in CommitOperation: '{path_in_repo}'")
+    if path_in_repo.startswith("./"):
+        path_in_repo = path_in_repo[2:]
+    for forbidden in FORBIDDEN_FOLDERS:
+        if any(part == forbidden for part in path_in_repo.split("/")):
+            raise ValueError(
+                f"Invalid `path_in_repo` in CommitOperation: cannot update files under a '{forbidden}/' folder (path:"
+                f" '{path_in_repo}')."
+            )
+    return path_in_repo
+CommitOperation = Union[CommitOperationAdd, CommitOperationCopy, CommitOperationDelete]
+def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
+    """
+    Warn user when a list of operations is expected to overwrite itself in a single
+    commit.
+    Rules:
+    - If a filepath is updated by multiple `CommitOperationAdd` operations, a warning
+      message is triggered.
+    - If a filepath is updated at least once by a `CommitOperationAdd` and then deleted
+      by a `CommitOperationDelete`, a warning is triggered.
+    - If a `CommitOperationDelete` deletes a filepath that is then updated by a
+      `CommitOperationAdd`, no warning is triggered. This is usually useless (no need to
+      delete before upload) but can happen if a user deletes an entire folder and then
+      add new files to it.
+    """
+    nb_additions_per_path: Dict[str, int] = defaultdict(int)
+    for operation in operations:
+        path_in_repo = operation.path_in_repo
+        if isinstance(operation, CommitOperationAdd):
+            if nb_additions_per_path[path_in_repo] > 0:
+                warnings.warn(
+                    "About to update multiple times the same file in the same commit:"
+                    f" '{path_in_repo}'. This can cause undesired inconsistencies in"
+                    " your repo."
+                )
+            nb_additions_per_path[path_in_repo] += 1
+            for parent in PurePosixPath(path_in_repo).parents:
+                # Also keep track of number of updated files per folder
+                # => warns if deleting a folder overwrite some contained files
+                nb_additions_per_path[str(parent)] += 1
+        if isinstance(operation, CommitOperationDelete):
+            if nb_additions_per_path[str(PurePosixPath(path_in_repo))] > 0:
+                if operation.is_folder:
+                    warnings.warn(
+                        "About to delete a folder containing files that have just been"
+                        f" updated within the same commit: '{path_in_repo}'. This can"
+                        " cause undesired inconsistencies in your repo."
+                    )
+                else:
+                    warnings.warn(
+                        "About to delete a file that have just been updated within the"
+                        f" same commit: '{path_in_repo}'. This can cause undesired"
+                        " inconsistencies in your repo."
+                    )
+@validate_hf_hub_args
+def _upload_lfs_files(
+    *,
+    additions: List[CommitOperationAdd],
+    repo_type: str,
+    repo_id: str,
+    headers: Dict[str, str],
+    endpoint: Optional[str] = None,
+    num_threads: int = 5,
+    revision: Optional[str] = None,
+):
+    """
+    Uploads the content of `additions` to the Hub using the large file storage protocol.
+    Relevant external documentation:
+        - LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md
+    Args:
+        additions (`List` of `CommitOperationAdd`):
+            The files to be uploaded
+        repo_type (`str`):
+            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
+        repo_id (`str`):
+            A namespace (user or an organization) and a repo name separated
+            by a `/`.
+        headers (`Dict[str, str]`):
+            Headers to use for the request, including authorization headers and user agent.
+        num_threads (`int`, *optional*):
+            The number of concurrent threads to use when uploading. Defaults to 5.
+        revision (`str`, *optional*):
+            The git revision to upload to.
+    Raises:
+        [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+            If an upload failed for any reason
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If the server returns malformed responses
+        [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
+            If the LFS batch endpoint returned an HTTP error.
+    """
+    # Step 1: retrieve upload instructions from the LFS batch endpoint.
+    #         Upload instructions are retrieved by chunk of 256 files to avoid reaching
+    #         the payload limit.
+    batch_actions: List[Dict] = []
+    for chunk in chunk_iterable(additions, chunk_size=256):
+        batch_actions_chunk, batch_errors_chunk = post_lfs_batch_info(
+            upload_infos=[op.upload_info for op in chunk],
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=revision,
+            endpoint=endpoint,
+            headers=headers,
+            token=None,  # already passed in 'headers'
+        )
+        # If at least 1 error, we do not retrieve information for other chunks
+        if batch_errors_chunk:
+            message = "\n".join(
+                [
+                    f"Encountered error for file with OID {err.get('oid')}: `{err.get('error', {}).get('message')}"
+                    for err in batch_errors_chunk
+                ]
+            )
+            raise ValueError(f"LFS batch endpoint returned errors:\n{message}")
+        batch_actions += batch_actions_chunk
+    oid2addop = {add_op.upload_info.sha256.hex(): add_op for add_op in additions}
+    # Step 2: ignore files that have already been uploaded
+    filtered_actions = []
+    for action in batch_actions:
+        if action.get("actions") is None:
+            logger.debug(
+                f"Content of file {oid2addop[action['oid']].path_in_repo} is already"
+                " present upstream - skipping upload."
+            )
+        else:
+            filtered_actions.append(action)
+    if len(filtered_actions) == 0:
+        logger.debug("No LFS files to upload.")
+        return
+    # Step 3: upload files concurrently according to these instructions
+    def _wrapped_lfs_upload(batch_action) -> None:
+        try:
+            operation = oid2addop[batch_action["oid"]]
+            lfs_upload(operation=operation, lfs_batch_action=batch_action, headers=headers, endpoint=endpoint)
+        except Exception as exc:
+            raise RuntimeError(f"Error while uploading '{operation.path_in_repo}' to the Hub.") from exc
+    if constants.HF_HUB_ENABLE_HF_TRANSFER:
+        logger.debug(f"Uploading {len(filtered_actions)} LFS files to the Hub using `hf_transfer`.")
+        for action in hf_tqdm(filtered_actions, name="huggingface_hub.lfs_upload"):
+            _wrapped_lfs_upload(action)
+    elif len(filtered_actions) == 1:
+        logger.debug("Uploading 1 LFS file to the Hub")
+        _wrapped_lfs_upload(filtered_actions[0])
+    else:
+        logger.debug(
+            f"Uploading {len(filtered_actions)} LFS files to the Hub using up to {num_threads} threads concurrently"
+        )
+        thread_map(
+            _wrapped_lfs_upload,
+            filtered_actions,
+            desc=f"Upload {len(filtered_actions)} LFS files",
+            max_workers=num_threads,
+            tqdm_class=hf_tqdm,
+        )
+def _validate_preupload_info(preupload_info: dict):
+    files = preupload_info.get("files")
+    if not isinstance(files, list):
+        raise ValueError("preupload_info is improperly formatted")
+    for file_info in files:
+        if not (
+            isinstance(file_info, dict)
+            and isinstance(file_info.get("path"), str)
+            and isinstance(file_info.get("uploadMode"), str)
+            and (file_info["uploadMode"] in ("lfs", "regular"))
+        ):
+            raise ValueError("preupload_info is improperly formatted:")
+    return preupload_info
+@validate_hf_hub_args
+def _fetch_upload_modes(
+    additions: Iterable[CommitOperationAdd],
+    repo_type: str,
+    repo_id: str,
+    headers: Dict[str, str],
+    revision: str,
+    endpoint: Optional[str] = None,
+    create_pr: bool = False,
+    gitignore_content: Optional[str] = None,
+) -> None:
+    """
+    Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob
+    or as git LFS blob. Input `additions` are mutated in-place with the upload mode.
+    Args:
+        additions (`Iterable` of :class:`CommitOperationAdd`):
+            Iterable of :class:`CommitOperationAdd` describing the files to
+            upload to the Hub.
+        repo_type (`str`):
+            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
+        repo_id (`str`):
+            A namespace (user or an organization) and a repo name separated
+            by a `/`.
+        headers (`Dict[str, str]`):
+            Headers to use for the request, including authorization headers and user agent.
+        revision (`str`):
+            The git revision to upload the files to. Can be any valid git revision.
+        gitignore_content (`str`, *optional*):
+            The content of the `.gitignore` file to know which files should be ignored. The order of priority
+            is to first check if `gitignore_content` is passed, then check if the `.gitignore` file is present
+            in the list of files to commit and finally default to the `.gitignore` file already hosted on the Hub
+            (if any).
+    Raises:
+        [`~utils.HfHubHTTPError`]
+            If the Hub API returned an error.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If the Hub API response is improperly formatted.
+    """
+    endpoint = endpoint if endpoint is not None else constants.ENDPOINT
+    # Fetch upload mode (LFS or regular) chunk by chunk.
+    upload_modes: Dict[str, UploadMode] = {}
+    should_ignore_info: Dict[str, bool] = {}
+    oid_info: Dict[str, Optional[str]] = {}
+    for chunk in chunk_iterable(additions, 256):
+        payload: Dict = {
+            "files": [
+                {
+                    "path": op.path_in_repo,
+                    "sample": base64.b64encode(op.upload_info.sample).decode("ascii"),
+                    "size": op.upload_info.size,
+                }
+                for op in chunk
+            ]
+        }
+        if gitignore_content is not None:
+            payload["gitIgnore"] = gitignore_content
+        resp = get_session().post(
+            f"{endpoint}/api/{repo_type}s/{repo_id}/preupload/{revision}",
+            json=payload,
+            headers=headers,
+            params={"create_pr": "1"} if create_pr else None,
+        )
+        hf_raise_for_status(resp)
+        preupload_info = _validate_preupload_info(resp.json())
+        upload_modes.update(**{file["path"]: file["uploadMode"] for file in preupload_info["files"]})
+        should_ignore_info.update(**{file["path"]: file["shouldIgnore"] for file in preupload_info["files"]})
+        oid_info.update(**{file["path"]: file.get("oid") for file in preupload_info["files"]})
+    # Set upload mode for each addition operation
+    for addition in additions:
+        addition._upload_mode = upload_modes[addition.path_in_repo]
+        addition._should_ignore = should_ignore_info[addition.path_in_repo]
+        addition._remote_oid = oid_info[addition.path_in_repo]
+    # Empty files cannot be uploaded as LFS (S3 would fail with a 501 Not Implemented)
+    # => empty files are uploaded as "regular" to still allow users to commit them.
+    for addition in additions:
+        if addition.upload_info.size == 0:
+            addition._upload_mode = "regular"
+@validate_hf_hub_args
+def _fetch_files_to_copy(
+    copies: Iterable[CommitOperationCopy],
+    repo_type: str,
+    repo_id: str,
+    headers: Dict[str, str],
+    revision: str,
+    endpoint: Optional[str] = None,
+) -> Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]]:
+    """
+    Fetch information about the files to copy.
+    For LFS files, we only need their metadata (file size and sha256) while for regular files
+    we need to download the raw content from the Hub.
+    Args:
+        copies (`Iterable` of :class:`CommitOperationCopy`):
+            Iterable of :class:`CommitOperationCopy` describing the files to
+            copy on the Hub.
+        repo_type (`str`):
+            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
+        repo_id (`str`):
+            A namespace (user or an organization) and a repo name separated
+            by a `/`.
+        headers (`Dict[str, str]`):
+            Headers to use for the request, including authorization headers and user agent.
+        revision (`str`):
+            The git revision to upload the files to. Can be any valid git revision.
+    Returns: `Dict[Tuple[str, Optional[str]], Union[RepoFile, bytes]]]`
+        Key is the file path and revision of the file to copy.
+        Value is the raw content as bytes (for regular files) or the file information as a RepoFile (for LFS files).
+    Raises:
+        [`~utils.HfHubHTTPError`]
+            If the Hub API returned an error.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If the Hub API response is improperly formatted.
+    """
+    from .hf_api import HfApi, RepoFolder
+    hf_api = HfApi(endpoint=endpoint, headers=headers)
+    files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]] = {}
+    # Store (path, revision) -> oid mapping
+    oid_info: Dict[Tuple[str, Optional[str]], Optional[str]] = {}
+    # 1. Fetch OIDs for destination paths in batches.
+    dest_paths = [op.path_in_repo for op in copies]
+    for offset in range(0, len(dest_paths), FETCH_LFS_BATCH_SIZE):
+        dest_repo_files = hf_api.get_paths_info(
+            repo_id=repo_id,
+            paths=dest_paths[offset : offset + FETCH_LFS_BATCH_SIZE],
+            revision=revision,
+            repo_type=repo_type,
+        )
+        for file in dest_repo_files:
+            if not isinstance(file, RepoFolder):
+                oid_info[(file.path, revision)] = file.blob_id
+    # 2. Group by source revision and fetch source file info in batches.
+    for src_revision, operations in groupby(copies, key=lambda op: op.src_revision):
+        operations = list(operations)  # type: ignore
+        src_paths = [op.src_path_in_repo for op in operations]
+        for offset in range(0, len(src_paths), FETCH_LFS_BATCH_SIZE):
+            src_repo_files = hf_api.get_paths_info(
+                repo_id=repo_id,
+                paths=src_paths[offset : offset + FETCH_LFS_BATCH_SIZE],
+                revision=src_revision or revision,
+                repo_type=repo_type,
+            )
+            for src_repo_file in src_repo_files:
+                if isinstance(src_repo_file, RepoFolder):
+                    raise NotImplementedError("Copying a folder is not implemented.")
+                oid_info[(src_repo_file.path, src_revision)] = src_repo_file.blob_id
+                # If it's an LFS file, store the RepoFile object. Otherwise, download raw bytes.
+                if src_repo_file.lfs:
+                    files_to_copy[(src_repo_file.path, src_revision)] = src_repo_file
+                else:
+                    # TODO: (optimization) download regular files to copy concurrently
+                    url = hf_hub_url(
+                        endpoint=endpoint,
+                        repo_type=repo_type,
+                        repo_id=repo_id,
+                        revision=src_revision or revision,
+                        filename=src_repo_file.path,
+                    )
+                    response = get_session().get(url, headers=headers)
+                    hf_raise_for_status(response)
+                    files_to_copy[(src_repo_file.path, src_revision)] = response.content
+        # 3. Ensure all operations found a corresponding file in the Hub
+        #  and track src/dest OIDs for each operation.
+        for operation in operations:
+            if (operation.src_path_in_repo, src_revision) not in files_to_copy:
+                raise EntryNotFoundError(
+                    f"Cannot copy {operation.src_path_in_repo} at revision "
+                    f"{src_revision or revision}: file is missing on repo."
+                )
+            operation._src_oid = oid_info.get((operation.src_path_in_repo, operation.src_revision))
+            operation._dest_oid = oid_info.get((operation.path_in_repo, revision))
+    return files_to_copy
+def _prepare_commit_payload(
+    operations: Iterable[CommitOperation],
+    files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]],
+    commit_message: str,
+    commit_description: Optional[str] = None,
+    parent_commit: Optional[str] = None,
+) -> Iterable[Dict[str, Any]]:
+    """
+    Builds the payload to POST to the `/commit` API of the Hub.
+    Payload is returned as an iterator so that it can be streamed as a ndjson in the
+    POST request.
+    For more information, see:
+        - https://github.com/huggingface/huggingface_hub/issues/1085#issuecomment-1265208073
+        - http://ndjson.org/
+    """
+    commit_description = commit_description if commit_description is not None else ""
+    # 1. Send a header item with the commit metadata
+    header_value = {"summary": commit_message, "description": commit_description}
+    if parent_commit is not None:
+        header_value["parentCommit"] = parent_commit
+    yield {"key": "header", "value": header_value}
+    nb_ignored_files = 0
+    # 2. Send operations, one per line
+    for operation in operations:
+        # Skip ignored files
+        if isinstance(operation, CommitOperationAdd) and operation._should_ignore:
+            logger.debug(f"Skipping file '{operation.path_in_repo}' in commit (ignored by gitignore file).")
+            nb_ignored_files += 1
+            continue
+        # 2.a. Case adding a regular file
+        if isinstance(operation, CommitOperationAdd) and operation._upload_mode == "regular":
+            yield {
+                "key": "file",
+                "value": {
+                    "content": operation.b64content().decode(),
+                    "path": operation.path_in_repo,
+                    "encoding": "base64",
+                },
+            }
+        # 2.b. Case adding an LFS file
+        elif isinstance(operation, CommitOperationAdd) and operation._upload_mode == "lfs":
+            yield {
+                "key": "lfsFile",
+                "value": {
+                    "path": operation.path_in_repo,
+                    "algo": "sha256",
+                    "oid": operation.upload_info.sha256.hex(),
+                    "size": operation.upload_info.size,
+                },
+            }
+        # 2.c. Case deleting a file or folder
+        elif isinstance(operation, CommitOperationDelete):
+            yield {
+                "key": "deletedFolder" if operation.is_folder else "deletedFile",
+                "value": {"path": operation.path_in_repo},
+            }
+        # 2.d. Case copying a file or folder
+        elif isinstance(operation, CommitOperationCopy):
+            file_to_copy = files_to_copy[(operation.src_path_in_repo, operation.src_revision)]
+            if isinstance(file_to_copy, bytes):
+                yield {
+                    "key": "file",
+                    "value": {
+                        "content": base64.b64encode(file_to_copy).decode(),
+                        "path": operation.path_in_repo,
+                        "encoding": "base64",
+                    },
+                }
+            elif file_to_copy.lfs:
+                yield {
+                    "key": "lfsFile",
+                    "value": {
+                        "path": operation.path_in_repo,
+                        "algo": "sha256",
+                        "oid": file_to_copy.lfs.sha256,
+                    },
+                }
+            else:
+                raise ValueError(
+                    "Malformed files_to_copy (should be raw file content as bytes or RepoFile objects with LFS info."
+                )
+        # 2.e. Never expected to happen
+        else:
+            raise ValueError(
+                f"Unknown operation to commit. Operation: {operation}. Upload mode:"
+                f" {getattr(operation, '_upload_mode', None)}"
+            )
+    if nb_ignored_files > 0:
+        logger.info(f"Skipped {nb_ignored_files} file(s) in commit (ignored by gitignore file).")

.venv/lib/python3.11/site-packages/huggingface_hub/_commit_scheduler.py ADDED Viewed

	@@ -0,0 +1,353 @@

+import atexit
+import logging
+import os
+import time
+from concurrent.futures import Future
+from dataclasses import dataclass
+from io import SEEK_END, SEEK_SET, BytesIO
+from pathlib import Path
+from threading import Lock, Thread
+from typing import Dict, List, Optional, Union
+from .hf_api import DEFAULT_IGNORE_PATTERNS, CommitInfo, CommitOperationAdd, HfApi
+from .utils import filter_repo_objects
+logger = logging.getLogger(__name__)
+@dataclass(frozen=True)
+class _FileToUpload:
+    """Temporary dataclass to store info about files to upload. Not meant to be used directly."""
+    local_path: Path
+    path_in_repo: str
+    size_limit: int
+    last_modified: float
+class CommitScheduler:
+    """
+    Scheduler to upload a local folder to the Hub at regular intervals (e.g. push to hub every 5 minutes).
+    The recommended way to use the scheduler is to use it as a context manager. This ensures that the scheduler is
+    properly stopped and the last commit is triggered when the script ends. The scheduler can also be stopped manually
+    with the `stop` method. Checkout the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#scheduled-uploads)
+    to learn more about how to use it.
+    Args:
+        repo_id (`str`):
+            The id of the repo to commit to.
+        folder_path (`str` or `Path`):
+            Path to the local folder to upload regularly.
+        every (`int` or `float`, *optional*):
+            The number of minutes between each commit. Defaults to 5 minutes.
+        path_in_repo (`str`, *optional*):
+            Relative path of the directory in the repo, for example: `"checkpoints/"`. Defaults to the root folder
+            of the repository.
+        repo_type (`str`, *optional*):
+            The type of the repo to commit to. Defaults to `model`.
+        revision (`str`, *optional*):
+            The revision of the repo to commit to. Defaults to `main`.
+        private (`bool`, *optional*):
+            Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
+        token (`str`, *optional*):
+            The token to use to commit to the repo. Defaults to the token saved on the machine.
+        allow_patterns (`List[str]` or `str`, *optional*):
+            If provided, only files matching at least one pattern are uploaded.
+        ignore_patterns (`List[str]` or `str`, *optional*):
+            If provided, files matching any of the patterns are not uploaded.
+        squash_history (`bool`, *optional*):
+            Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
+            useful to avoid degraded performances on the repo when it grows too large.
+        hf_api (`HfApi`, *optional*):
+            The [`HfApi`] client to use to commit to the Hub. Can be set with custom settings (user agent, token,...).
+    Example:
+    ```py
+    >>> from pathlib import Path
+    >>> from huggingface_hub import CommitScheduler
+    # Scheduler uploads every 10 minutes
+    >>> csv_path = Path("watched_folder/data.csv")
+    >>> CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path=csv_path.parent, every=10)
+    >>> with csv_path.open("a") as f:
+    ...     f.write("first line")
+    # Some time later (...)
+    >>> with csv_path.open("a") as f:
+    ...     f.write("second line")
+    ```
+    Example using a context manager:
+    ```py
+    >>> from pathlib import Path
+    >>> from huggingface_hub import CommitScheduler
+    >>> with CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path="watched_folder", every=10) as scheduler:
+    ...     csv_path = Path("watched_folder/data.csv")
+    ...     with csv_path.open("a") as f:
+    ...         f.write("first line")
+    ...     (...)
+    ...     with csv_path.open("a") as f:
+    ...         f.write("second line")
+    # Scheduler is now stopped and last commit have been triggered
+    ```
+    """
+    def __init__(
+        self,
+        *,
+        repo_id: str,
+        folder_path: Union[str, Path],
+        every: Union[int, float] = 5,
+        path_in_repo: Optional[str] = None,
+        repo_type: Optional[str] = None,
+        revision: Optional[str] = None,
+        private: Optional[bool] = None,
+        token: Optional[str] = None,
+        allow_patterns: Optional[Union[List[str], str]] = None,
+        ignore_patterns: Optional[Union[List[str], str]] = None,
+        squash_history: bool = False,
+        hf_api: Optional["HfApi"] = None,
+    ) -> None:
+        self.api = hf_api or HfApi(token=token)
+        # Folder
+        self.folder_path = Path(folder_path).expanduser().resolve()
+        self.path_in_repo = path_in_repo or ""
+        self.allow_patterns = allow_patterns
+        if ignore_patterns is None:
+            ignore_patterns = []
+        elif isinstance(ignore_patterns, str):
+            ignore_patterns = [ignore_patterns]
+        self.ignore_patterns = ignore_patterns + DEFAULT_IGNORE_PATTERNS
+        if self.folder_path.is_file():
+            raise ValueError(f"'folder_path' must be a directory, not a file: '{self.folder_path}'.")
+        self.folder_path.mkdir(parents=True, exist_ok=True)
+        # Repository
+        repo_url = self.api.create_repo(repo_id=repo_id, private=private, repo_type=repo_type, exist_ok=True)
+        self.repo_id = repo_url.repo_id
+        self.repo_type = repo_type
+        self.revision = revision
+        self.token = token
+        # Keep track of already uploaded files
+        self.last_uploaded: Dict[Path, float] = {}  # key is local path, value is timestamp
+        # Scheduler
+        if not every > 0:
+            raise ValueError(f"'every' must be a positive integer, not '{every}'.")
+        self.lock = Lock()
+        self.every = every
+        self.squash_history = squash_history
+        logger.info(f"Scheduled job to push '{self.folder_path}' to '{self.repo_id}' every {self.every} minutes.")
+        self._scheduler_thread = Thread(target=self._run_scheduler, daemon=True)
+        self._scheduler_thread.start()
+        atexit.register(self._push_to_hub)
+        self.__stopped = False
+    def stop(self) -> None:
+        """Stop the scheduler.
+        A stopped scheduler cannot be restarted. Mostly for tests purposes.
+        """
+        self.__stopped = True
+    def __enter__(self) -> "CommitScheduler":
+        return self
+    def __exit__(self, exc_type, exc_value, traceback) -> None:
+        # Upload last changes before exiting
+        self.trigger().result()
+        self.stop()
+        return
+    def _run_scheduler(self) -> None:
+        """Dumb thread waiting between each scheduled push to Hub."""
+        while True:
+            self.last_future = self.trigger()
+            time.sleep(self.every * 60)
+            if self.__stopped:
+                break
+    def trigger(self) -> Future:
+        """Trigger a `push_to_hub` and return a future.
+        This method is automatically called every `every` minutes. You can also call it manually to trigger a commit
+        immediately, without waiting for the next scheduled commit.
+        """
+        return self.api.run_as_future(self._push_to_hub)
+    def _push_to_hub(self) -> Optional[CommitInfo]:
+        if self.__stopped:  # If stopped, already scheduled commits are ignored
+            return None
+        logger.info("(Background) scheduled commit triggered.")
+        try:
+            value = self.push_to_hub()
+            if self.squash_history:
+                logger.info("(Background) squashing repo history.")
+                self.api.super_squash_history(repo_id=self.repo_id, repo_type=self.repo_type, branch=self.revision)
+            return value
+        except Exception as e:
+            logger.error(f"Error while pushing to Hub: {e}")  # Depending on the setup, error might be silenced
+            raise
+    def push_to_hub(self) -> Optional[CommitInfo]:
+        """
+        Push folder to the Hub and return the commit info.
+        <Tip warning={true}>
+        This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
+        queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
+        issues.
+        </Tip>
+        The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and
+        uploads only changed files. If no changes are found, the method returns without committing anything. If you want
+        to change this behavior, you can inherit from [`CommitScheduler`] and override this method. This can be useful
+        for example to compress data together in a single file before committing. For more details and examples, check
+        out our [integration guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads).
+        """
+        # Check files to upload (with lock)
+        with self.lock:
+            logger.debug("Listing files to upload for scheduled commit.")
+            # List files from folder (taken from `_prepare_upload_folder_additions`)
+            relpath_to_abspath = {
+                path.relative_to(self.folder_path).as_posix(): path
+                for path in sorted(self.folder_path.glob("**/*"))  # sorted to be deterministic
+                if path.is_file()
+            }
+            prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else ""
+            # Filter with pattern + filter out unchanged files + retrieve current file size
+            files_to_upload: List[_FileToUpload] = []
+            for relpath in filter_repo_objects(
+                relpath_to_abspath.keys(), allow_patterns=self.allow_patterns, ignore_patterns=self.ignore_patterns
+            ):
+                local_path = relpath_to_abspath[relpath]
+                stat = local_path.stat()
+                if self.last_uploaded.get(local_path) is None or self.last_uploaded[local_path] != stat.st_mtime:
+                    files_to_upload.append(
+                        _FileToUpload(
+                            local_path=local_path,
+                            path_in_repo=prefix + relpath,
+                            size_limit=stat.st_size,
+                            last_modified=stat.st_mtime,
+                        )
+                    )
+        # Return if nothing to upload
+        if len(files_to_upload) == 0:
+            logger.debug("Dropping schedule commit: no changed file to upload.")
+            return None
+        # Convert `_FileToUpload` as `CommitOperationAdd` (=> compute file shas + limit to file size)
+        logger.debug("Removing unchanged files since previous scheduled commit.")
+        add_operations = [
+            CommitOperationAdd(
+                # Cap the file to its current size, even if the user append data to it while a scheduled commit is happening
+                path_or_fileobj=PartialFileIO(file_to_upload.local_path, size_limit=file_to_upload.size_limit),
+                path_in_repo=file_to_upload.path_in_repo,
+            )
+            for file_to_upload in files_to_upload
+        ]
+        # Upload files (append mode expected - no need for lock)
+        logger.debug("Uploading files for scheduled commit.")
+        commit_info = self.api.create_commit(
+            repo_id=self.repo_id,
+            repo_type=self.repo_type,
+            operations=add_operations,
+            commit_message="Scheduled Commit",
+            revision=self.revision,
+        )
+        # Successful commit: keep track of the latest "last_modified" for each file
+        for file in files_to_upload:
+            self.last_uploaded[file.local_path] = file.last_modified
+        return commit_info
+class PartialFileIO(BytesIO):
+    """A file-like object that reads only the first part of a file.
+    Useful to upload a file to the Hub when the user might still be appending data to it. Only the first part of the
+    file is uploaded (i.e. the part that was available when the filesystem was first scanned).
+    In practice, only used internally by the CommitScheduler to regularly push a folder to the Hub with minimal
+    disturbance for the user. The object is passed to `CommitOperationAdd`.
+    Only supports `read`, `tell` and `seek` methods.
+    Args:
+        file_path (`str` or `Path`):
+            Path to the file to read.
+        size_limit (`int`):
+            The maximum number of bytes to read from the file. If the file is larger than this, only the first part
+            will be read (and uploaded).
+    """
+    def __init__(self, file_path: Union[str, Path], size_limit: int) -> None:
+        self._file_path = Path(file_path)
+        self._file = self._file_path.open("rb")
+        self._size_limit = min(size_limit, os.fstat(self._file.fileno()).st_size)
+    def __del__(self) -> None:
+        self._file.close()
+        return super().__del__()
+    def __repr__(self) -> str:
+        return f"<PartialFileIO file_path={self._file_path} size_limit={self._size_limit}>"
+    def __len__(self) -> int:
+        return self._size_limit
+    def __getattribute__(self, name: str):
+        if name.startswith("_") or name in ("read", "tell", "seek"):  # only 3 public methods supported
+            return super().__getattribute__(name)
+        raise NotImplementedError(f"PartialFileIO does not support '{name}'.")
+    def tell(self) -> int:
+        """Return the current file position."""
+        return self._file.tell()
+    def seek(self, __offset: int, __whence: int = SEEK_SET) -> int:
+        """Change the stream position to the given offset.
+        Behavior is the same as a regular file, except that the position is capped to the size limit.
+        """
+        if __whence == SEEK_END:
+            # SEEK_END => set from the truncated end
+            __offset = len(self) + __offset
+            __whence = SEEK_SET
+        pos = self._file.seek(__offset, __whence)
+        if pos > self._size_limit:
+            return self._file.seek(self._size_limit)
+        return pos
+    def read(self, __size: Optional[int] = -1) -> bytes:
+        """Read at most `__size` bytes from the file.
+        Behavior is the same as a regular file, except that it is capped to the size limit.
+        """
+        current = self._file.tell()
+        if __size is None or __size < 0:
+            # Read until file limit
+            truncated_size = self._size_limit - current
+        else:
+            # Read until file limit or __size
+            truncated_size = min(__size, self._size_limit - current)
+        return self._file.read(truncated_size)

.venv/lib/python3.11/site-packages/huggingface_hub/_inference_endpoints.py ADDED Viewed

	@@ -0,0 +1,402 @@

+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import TYPE_CHECKING, Dict, Optional, Union
+from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError
+from .inference._client import InferenceClient
+from .inference._generated._async_client import AsyncInferenceClient
+from .utils import get_session, logging, parse_datetime
+if TYPE_CHECKING:
+    from .hf_api import HfApi
+logger = logging.get_logger(__name__)
+class InferenceEndpointStatus(str, Enum):
+    PENDING = "pending"
+    INITIALIZING = "initializing"
+    UPDATING = "updating"
+    UPDATE_FAILED = "updateFailed"
+    RUNNING = "running"
+    PAUSED = "paused"
+    FAILED = "failed"
+    SCALED_TO_ZERO = "scaledToZero"
+class InferenceEndpointType(str, Enum):
+    PUBlIC = "public"
+    PROTECTED = "protected"
+    PRIVATE = "private"
+@dataclass
+class InferenceEndpoint:
+    """
+    Contains information about a deployed Inference Endpoint.
+    Args:
+        name (`str`):
+            The unique name of the Inference Endpoint.
+        namespace (`str`):
+            The namespace where the Inference Endpoint is located.
+        repository (`str`):
+            The name of the model repository deployed on this Inference Endpoint.
+        status ([`InferenceEndpointStatus`]):
+            The current status of the Inference Endpoint.
+        url (`str`, *optional*):
+            The URL of the Inference Endpoint, if available. Only a deployed Inference Endpoint will have a URL.
+        framework (`str`):
+            The machine learning framework used for the model.
+        revision (`str`):
+            The specific model revision deployed on the Inference Endpoint.
+        task (`str`):
+            The task associated with the deployed model.
+        created_at (`datetime.datetime`):
+            The timestamp when the Inference Endpoint was created.
+        updated_at (`datetime.datetime`):
+            The timestamp of the last update of the Inference Endpoint.
+        type ([`InferenceEndpointType`]):
+            The type of the Inference Endpoint (public, protected, private).
+        raw (`Dict`):
+            The raw dictionary data returned from the API.
+        token (`str` or `bool`, *optional*):
+            Authentication token for the Inference Endpoint, if set when requesting the API. Will default to the
+            locally saved token if not provided. Pass `token=False` if you don't want to send your token to the server.
+    Example:
+        ```python
+        >>> from huggingface_hub import get_inference_endpoint
+        >>> endpoint = get_inference_endpoint("my-text-to-image")
+        >>> endpoint
+        InferenceEndpoint(name='my-text-to-image', ...)
+        # Get status
+        >>> endpoint.status
+        'running'
+        >>> endpoint.url
+        'https://my-text-to-image.region.vendor.endpoints.huggingface.cloud'
+        # Run inference
+        >>> endpoint.client.text_to_image(...)
+        # Pause endpoint to save $$$
+        >>> endpoint.pause()
+        # ...
+        # Resume and wait for deployment
+        >>> endpoint.resume()
+        >>> endpoint.wait()
+        >>> endpoint.client.text_to_image(...)
+        ```
+    """
+    # Field in __repr__
+    name: str = field(init=False)
+    namespace: str
+    repository: str = field(init=False)
+    status: InferenceEndpointStatus = field(init=False)
+    url: Optional[str] = field(init=False)
+    # Other fields
+    framework: str = field(repr=False, init=False)
+    revision: str = field(repr=False, init=False)
+    task: str = field(repr=False, init=False)
+    created_at: datetime = field(repr=False, init=False)
+    updated_at: datetime = field(repr=False, init=False)
+    type: InferenceEndpointType = field(repr=False, init=False)
+    # Raw dict from the API
+    raw: Dict = field(repr=False)
+    # Internal fields
+    _token: Union[str, bool, None] = field(repr=False, compare=False)
+    _api: "HfApi" = field(repr=False, compare=False)
+    @classmethod
+    def from_raw(
+        cls, raw: Dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None
+    ) -> "InferenceEndpoint":
+        """Initialize object from raw dictionary."""
+        if api is None:
+            from .hf_api import HfApi
+            api = HfApi()
+        if token is None:
+            token = api.token
+        # All other fields are populated in __post_init__
+        return cls(raw=raw, namespace=namespace, _token=token, _api=api)
+    def __post_init__(self) -> None:
+        """Populate fields from raw dictionary."""
+        self._populate_from_raw()
+    @property
+    def client(self) -> InferenceClient:
+        """Returns a client to make predictions on this Inference Endpoint.
+        Returns:
+            [`InferenceClient`]: an inference client pointing to the deployed endpoint.
+        Raises:
+            [`InferenceEndpointError`]: If the Inference Endpoint is not yet deployed.
+        """
+        if self.url is None:
+            raise InferenceEndpointError(
+                "Cannot create a client for this Inference Endpoint as it is not yet deployed. "
+                "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
+            )
+        return InferenceClient(
+            model=self.url,
+            token=self._token,  # type: ignore[arg-type] # boolean token shouldn't be possible. In practice it's ok.
+        )
+    @property
+    def async_client(self) -> AsyncInferenceClient:
+        """Returns a client to make predictions on this Inference Endpoint.
+        Returns:
+            [`AsyncInferenceClient`]: an asyncio-compatible inference client pointing to the deployed endpoint.
+        Raises:
+            [`InferenceEndpointError`]: If the Inference Endpoint is not yet deployed.
+        """
+        if self.url is None:
+            raise InferenceEndpointError(
+                "Cannot create a client for this Inference Endpoint as it is not yet deployed. "
+                "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
+            )
+        return AsyncInferenceClient(
+            model=self.url,
+            token=self._token,  # type: ignore[arg-type] # boolean token shouldn't be possible. In practice it's ok.
+        )
+    def wait(self, timeout: Optional[int] = None, refresh_every: int = 5) -> "InferenceEndpoint":
+        """Wait for the Inference Endpoint to be deployed.
+        Information from the server will be fetched every 1s. If the Inference Endpoint is not deployed after `timeout`
+        seconds, a [`InferenceEndpointTimeoutError`] will be raised. The [`InferenceEndpoint`] will be mutated in place with the latest
+        data.
+        Args:
+            timeout (`int`, *optional*):
+                The maximum time to wait for the Inference Endpoint to be deployed, in seconds. If `None`, will wait
+                indefinitely.
+            refresh_every (`int`, *optional*):
+                The time to wait between each fetch of the Inference Endpoint status, in seconds. Defaults to 5s.
+        Returns:
+            [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
+        Raises:
+            [`InferenceEndpointError`]
+                If the Inference Endpoint ended up in a failed state.
+            [`InferenceEndpointTimeoutError`]
+                If the Inference Endpoint is not deployed after `timeout` seconds.
+        """
+        if timeout is not None and timeout < 0:
+            raise ValueError("`timeout` cannot be negative.")
+        if refresh_every <= 0:
+            raise ValueError("`refresh_every` must be positive.")
+        start = time.time()
+        while True:
+            if self.url is not None:
+                # Means the URL is provisioned => check if the endpoint is reachable
+                response = get_session().get(self.url, headers=self._api._build_hf_headers(token=self._token))
+                if response.status_code == 200:
+                    logger.info("Inference Endpoint is ready to be used.")
+                    return self
+            if self.status == InferenceEndpointStatus.FAILED:
+                raise InferenceEndpointError(
+                    f"Inference Endpoint {self.name} failed to deploy. Please check the logs for more information."
+                )
+            if timeout is not None:
+                if time.time() - start > timeout:
+                    raise InferenceEndpointTimeoutError("Timeout while waiting for Inference Endpoint to be deployed.")
+            logger.info(f"Inference Endpoint is not deployed yet ({self.status}). Waiting {refresh_every}s...")
+            time.sleep(refresh_every)
+            self.fetch()
+    def fetch(self) -> "InferenceEndpoint":
+        """Fetch latest information about the Inference Endpoint.
+        Returns:
+            [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
+        """
+        obj = self._api.get_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)  # type: ignore [arg-type]
+        self.raw = obj.raw
+        self._populate_from_raw()
+        return self
+    def update(
+        self,
+        *,
+        # Compute update
+        accelerator: Optional[str] = None,
+        instance_size: Optional[str] = None,
+        instance_type: Optional[str] = None,
+        min_replica: Optional[int] = None,
+        max_replica: Optional[int] = None,
+        scale_to_zero_timeout: Optional[int] = None,
+        # Model update
+        repository: Optional[str] = None,
+        framework: Optional[str] = None,
+        revision: Optional[str] = None,
+        task: Optional[str] = None,
+        custom_image: Optional[Dict] = None,
+        secrets: Optional[Dict[str, str]] = None,
+    ) -> "InferenceEndpoint":
+        """Update the Inference Endpoint.
+        This method allows the update of either the compute configuration, the deployed model, or both. All arguments are
+        optional but at least one must be provided.
+        This is an alias for [`HfApi.update_inference_endpoint`]. The current object is mutated in place with the
+        latest data from the server.
+        Args:
+            accelerator (`str`, *optional*):
+                The hardware accelerator to be used for inference (e.g. `"cpu"`).
+            instance_size (`str`, *optional*):
+                The size or type of the instance to be used for hosting the model (e.g. `"x4"`).
+            instance_type (`str`, *optional*):
+                The cloud instance type where the Inference Endpoint will be deployed (e.g. `"intel-icl"`).
+            min_replica (`int`, *optional*):
+                The minimum number of replicas (instances) to keep running for the Inference Endpoint.
+            max_replica (`int`, *optional*):
+                The maximum number of replicas (instances) to scale to for the Inference Endpoint.
+            scale_to_zero_timeout (`int`, *optional*):
+                The duration in minutes before an inactive endpoint is scaled to zero.
+            repository (`str`, *optional*):
+                The name of the model repository associated with the Inference Endpoint (e.g. `"gpt2"`).
+            framework (`str`, *optional*):
+                The machine learning framework used for the model (e.g. `"custom"`).
+            revision (`str`, *optional*):
+                The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
+            task (`str`, *optional*):
+                The task on which to deploy the model (e.g. `"text-classification"`).
+            custom_image (`Dict`, *optional*):
+                A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
+                Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
+            secrets (`Dict[str, str]`, *optional*):
+                Secret values to inject in the container environment.
+        Returns:
+            [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
+        """
+        # Make API call
+        obj = self._api.update_inference_endpoint(
+            name=self.name,
+            namespace=self.namespace,
+            accelerator=accelerator,
+            instance_size=instance_size,
+            instance_type=instance_type,
+            min_replica=min_replica,
+            max_replica=max_replica,
+            scale_to_zero_timeout=scale_to_zero_timeout,
+            repository=repository,
+            framework=framework,
+            revision=revision,
+            task=task,
+            custom_image=custom_image,
+            secrets=secrets,
+            token=self._token,  # type: ignore [arg-type]
+        )
+        # Mutate current object
+        self.raw = obj.raw
+        self._populate_from_raw()
+        return self
+    def pause(self) -> "InferenceEndpoint":
+        """Pause the Inference Endpoint.
+        A paused Inference Endpoint will not be charged. It can be resumed at any time using [`InferenceEndpoint.resume`].
+        This is different than scaling the Inference Endpoint to zero with [`InferenceEndpoint.scale_to_zero`], which
+        would be automatically restarted when a request is made to it.
+        This is an alias for [`HfApi.pause_inference_endpoint`]. The current object is mutated in place with the
+        latest data from the server.
+        Returns:
+            [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
+        """
+        obj = self._api.pause_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)  # type: ignore [arg-type]
+        self.raw = obj.raw
+        self._populate_from_raw()
+        return self
+    def resume(self, running_ok: bool = True) -> "InferenceEndpoint":
+        """Resume the Inference Endpoint.
+        This is an alias for [`HfApi.resume_inference_endpoint`]. The current object is mutated in place with the
+        latest data from the server.
+        Args:
+            running_ok (`bool`, *optional*):
+                If `True`, the method will not raise an error if the Inference Endpoint is already running. Defaults to
+                `True`.
+        Returns:
+            [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
+        """
+        obj = self._api.resume_inference_endpoint(
+            name=self.name, namespace=self.namespace, running_ok=running_ok, token=self._token
+        )  # type: ignore [arg-type]
+        self.raw = obj.raw
+        self._populate_from_raw()
+        return self
+    def scale_to_zero(self) -> "InferenceEndpoint":
+        """Scale Inference Endpoint to zero.
+        An Inference Endpoint scaled to zero will not be charged. It will be resume on the next request to it, with a
+        cold start delay. This is different than pausing the Inference Endpoint with [`InferenceEndpoint.pause`], which
+        would require a manual resume with [`InferenceEndpoint.resume`].
+        This is an alias for [`HfApi.scale_to_zero_inference_endpoint`]. The current object is mutated in place with the
+        latest data from the server.
+        Returns:
+            [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
+        """
+        obj = self._api.scale_to_zero_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)  # type: ignore [arg-type]
+        self.raw = obj.raw
+        self._populate_from_raw()
+        return self
+    def delete(self) -> None:
+        """Delete the Inference Endpoint.
+        This operation is not reversible. If you don't want to be charged for an Inference Endpoint, it is preferable
+        to pause it with [`InferenceEndpoint.pause`] or scale it to zero with [`InferenceEndpoint.scale_to_zero`].
+        This is an alias for [`HfApi.delete_inference_endpoint`].
+        """
+        self._api.delete_inference_endpoint(name=self.name, namespace=self.namespace, token=self._token)  # type: ignore [arg-type]
+    def _populate_from_raw(self) -> None:
+        """Populate fields from raw dictionary.
+        Called in __post_init__ + each time the Inference Endpoint is updated.
+        """
+        # Repr fields
+        self.name = self.raw["name"]
+        self.repository = self.raw["model"]["repository"]
+        self.status = self.raw["status"]["state"]
+        self.url = self.raw["status"].get("url")
+        # Other fields
+        self.framework = self.raw["model"]["framework"]
+        self.revision = self.raw["model"]["revision"]
+        self.task = self.raw["model"]["task"]
+        self.created_at = parse_datetime(self.raw["status"]["createdAt"])
+        self.updated_at = parse_datetime(self.raw["status"]["updatedAt"])
+        self.type = self.raw["type"]

.venv/lib/python3.11/site-packages/huggingface_hub/_local_folder.py ADDED Viewed

	@@ -0,0 +1,432 @@

+# coding=utf-8
+# Copyright 2024-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains utilities to handle the `../.cache/huggingface` folder in local directories.
+First discussed in https://github.com/huggingface/huggingface_hub/issues/1738 to store
+download metadata when downloading files from the hub to a local directory (without
+using the cache).
+./.cache/huggingface folder structure:
+[4.0K]  data
+├── [4.0K]  .cache
+│   └── [4.0K]  huggingface
+│       └── [4.0K]  download
+│           ├── [  16]  file.parquet.metadata
+│           ├── [  16]  file.txt.metadata
+│           └── [4.0K]  folder
+│               └── [  16]  file.parquet.metadata
+│
+├── [6.5G]  file.parquet
+├── [1.5K]  file.txt
+└── [4.0K]  folder
+    └── [   16]  file.parquet
+Download metadata file structure:
+```
+# file.txt.metadata
+11c5a3d5811f50298f278a704980280950aedb10
+a16a55fda99d2f2e7b69cce5cf93ff4ad3049930
+1712656091.123
+# file.parquet.metadata
+11c5a3d5811f50298f278a704980280950aedb10
+7c5d3f4b8b76583b422fcb9189ad6c89d5d97a094541ce8932dce3ecabde1421
+1712656091.123
+}
+```
+"""
+import base64
+import hashlib
+import logging
+import os
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+from .utils import WeakFileLock
+logger = logging.getLogger(__name__)
+@dataclass
+class LocalDownloadFilePaths:
+    """
+    Paths to the files related to a download process in a local dir.
+    Returned by [`get_local_download_paths`].
+    Attributes:
+        file_path (`Path`):
+            Path where the file will be saved.
+        lock_path (`Path`):
+            Path to the lock file used to ensure atomicity when reading/writing metadata.
+        metadata_path (`Path`):
+            Path to the metadata file.
+    """
+    file_path: Path
+    lock_path: Path
+    metadata_path: Path
+    def incomplete_path(self, etag: str) -> Path:
+        """Return the path where a file will be temporarily downloaded before being moved to `file_path`."""
+        return self.metadata_path.parent / f"{_short_hash(self.metadata_path.name)}.{etag}.incomplete"
+@dataclass(frozen=True)
+class LocalUploadFilePaths:
+    """
+    Paths to the files related to an upload process in a local dir.
+    Returned by [`get_local_upload_paths`].
+    Attributes:
+        path_in_repo (`str`):
+            Path of the file in the repo.
+        file_path (`Path`):
+            Path where the file will be saved.
+        lock_path (`Path`):
+            Path to the lock file used to ensure atomicity when reading/writing metadata.
+        metadata_path (`Path`):
+            Path to the metadata file.
+    """
+    path_in_repo: str
+    file_path: Path
+    lock_path: Path
+    metadata_path: Path
+@dataclass
+class LocalDownloadFileMetadata:
+    """
+    Metadata about a file in the local directory related to a download process.
+    Attributes:
+        filename (`str`):
+            Path of the file in the repo.
+        commit_hash (`str`):
+            Commit hash of the file in the repo.
+        etag (`str`):
+            ETag of the file in the repo. Used to check if the file has changed.
+            For LFS files, this is the sha256 of the file. For regular files, it corresponds to the git hash.
+        timestamp (`int`):
+            Unix timestamp of when the metadata was saved i.e. when the metadata was accurate.
+    """
+    filename: str
+    commit_hash: str
+    etag: str
+    timestamp: float
+@dataclass
+class LocalUploadFileMetadata:
+    """
+    Metadata about a file in the local directory related to an upload process.
+    """
+    size: int
+    # Default values correspond to "we don't know yet"
+    timestamp: Optional[float] = None
+    should_ignore: Optional[bool] = None
+    sha256: Optional[str] = None
+    upload_mode: Optional[str] = None
+    is_uploaded: bool = False
+    is_committed: bool = False
+    def save(self, paths: LocalUploadFilePaths) -> None:
+        """Save the metadata to disk."""
+        with WeakFileLock(paths.lock_path):
+            with paths.metadata_path.open("w") as f:
+                new_timestamp = time.time()
+                f.write(str(new_timestamp) + "\n")
+                f.write(str(self.size))  # never None
+                f.write("\n")
+                if self.should_ignore is not None:
+                    f.write(str(int(self.should_ignore)))
+                f.write("\n")
+                if self.sha256 is not None:
+                    f.write(self.sha256)
+                f.write("\n")
+                if self.upload_mode is not None:
+                    f.write(self.upload_mode)
+                f.write("\n")
+                f.write(str(int(self.is_uploaded)) + "\n")
+                f.write(str(int(self.is_committed)) + "\n")
+            self.timestamp = new_timestamp
+def get_local_download_paths(local_dir: Path, filename: str) -> LocalDownloadFilePaths:
+    """Compute paths to the files related to a download process.
+    Folders containing the paths are all guaranteed to exist.
+    Args:
+        local_dir (`Path`):
+            Path to the local directory in which files are downloaded.
+        filename (`str`):
+            Path of the file in the repo.
+    Return:
+        [`LocalDownloadFilePaths`]: the paths to the files (file_path, lock_path, metadata_path, incomplete_path).
+    """
+    # filename is the path in the Hub repository (separated by '/')
+    # make sure to have a cross platform transcription
+    sanitized_filename = os.path.join(*filename.split("/"))
+    if os.name == "nt":
+        if sanitized_filename.startswith("..\\") or "\\..\\" in sanitized_filename:
+            raise ValueError(
+                f"Invalid filename: cannot handle filename '{sanitized_filename}' on Windows. Please ask the repository"
+                " owner to rename this file."
+            )
+    file_path = local_dir / sanitized_filename
+    metadata_path = _huggingface_dir(local_dir) / "download" / f"{sanitized_filename}.metadata"
+    lock_path = metadata_path.with_suffix(".lock")
+    # Some Windows versions do not allow for paths longer than 255 characters.
+    # In this case, we must specify it as an extended path by using the "\\?\" prefix
+    if os.name == "nt":
+        if not str(local_dir).startswith("\\\\?\\") and len(os.path.abspath(lock_path)) > 255:
+            file_path = Path("\\\\?\\" + os.path.abspath(file_path))
+            lock_path = Path("\\\\?\\" + os.path.abspath(lock_path))
+            metadata_path = Path("\\\\?\\" + os.path.abspath(metadata_path))
+    file_path.parent.mkdir(parents=True, exist_ok=True)
+    metadata_path.parent.mkdir(parents=True, exist_ok=True)
+    return LocalDownloadFilePaths(file_path=file_path, lock_path=lock_path, metadata_path=metadata_path)
+def get_local_upload_paths(local_dir: Path, filename: str) -> LocalUploadFilePaths:
+    """Compute paths to the files related to an upload process.
+    Folders containing the paths are all guaranteed to exist.
+    Args:
+        local_dir (`Path`):
+            Path to the local directory that is uploaded.
+        filename (`str`):
+            Path of the file in the repo.
+    Return:
+        [`LocalUploadFilePaths`]: the paths to the files (file_path, lock_path, metadata_path).
+    """
+    # filename is the path in the Hub repository (separated by '/')
+    # make sure to have a cross platform transcription
+    sanitized_filename = os.path.join(*filename.split("/"))
+    if os.name == "nt":
+        if sanitized_filename.startswith("..\\") or "\\..\\" in sanitized_filename:
+            raise ValueError(
+                f"Invalid filename: cannot handle filename '{sanitized_filename}' on Windows. Please ask the repository"
+                " owner to rename this file."
+            )
+    file_path = local_dir / sanitized_filename
+    metadata_path = _huggingface_dir(local_dir) / "upload" / f"{sanitized_filename}.metadata"
+    lock_path = metadata_path.with_suffix(".lock")
+    # Some Windows versions do not allow for paths longer than 255 characters.
+    # In this case, we must specify it as an extended path by using the "\\?\" prefix
+    if os.name == "nt":
+        if not str(local_dir).startswith("\\\\?\\") and len(os.path.abspath(lock_path)) > 255:
+            file_path = Path("\\\\?\\" + os.path.abspath(file_path))
+            lock_path = Path("\\\\?\\" + os.path.abspath(lock_path))
+            metadata_path = Path("\\\\?\\" + os.path.abspath(metadata_path))
+    file_path.parent.mkdir(parents=True, exist_ok=True)
+    metadata_path.parent.mkdir(parents=True, exist_ok=True)
+    return LocalUploadFilePaths(
+        path_in_repo=filename, file_path=file_path, lock_path=lock_path, metadata_path=metadata_path
+    )
+def read_download_metadata(local_dir: Path, filename: str) -> Optional[LocalDownloadFileMetadata]:
+    """Read metadata about a file in the local directory related to a download process.
+    Args:
+        local_dir (`Path`):
+            Path to the local directory in which files are downloaded.
+        filename (`str`):
+            Path of the file in the repo.
+    Return:
+        `[LocalDownloadFileMetadata]` or `None`: the metadata if it exists, `None` otherwise.
+    """
+    paths = get_local_download_paths(local_dir, filename)
+    with WeakFileLock(paths.lock_path):
+        if paths.metadata_path.exists():
+            try:
+                with paths.metadata_path.open() as f:
+                    commit_hash = f.readline().strip()
+                    etag = f.readline().strip()
+                    timestamp = float(f.readline().strip())
+                    metadata = LocalDownloadFileMetadata(
+                        filename=filename,
+                        commit_hash=commit_hash,
+                        etag=etag,
+                        timestamp=timestamp,
+                    )
+            except Exception as e:
+                # remove the metadata file if it is corrupted / not the right format
+                logger.warning(
+                    f"Invalid metadata file {paths.metadata_path}: {e}. Removing it from disk and continue."
+                )
+                try:
+                    paths.metadata_path.unlink()
+                except Exception as e:
+                    logger.warning(f"Could not remove corrupted metadata file {paths.metadata_path}: {e}")
+            try:
+                # check if the file exists and hasn't been modified since the metadata was saved
+                stat = paths.file_path.stat()
+                if (
+                    stat.st_mtime - 1 <= metadata.timestamp
+                ):  # allow 1s difference as stat.st_mtime might not be precise
+                    return metadata
+                logger.info(f"Ignored metadata for '{filename}' (outdated). Will re-compute hash.")
+            except FileNotFoundError:
+                # file does not exist => metadata is outdated
+                return None
+    return None
+def read_upload_metadata(local_dir: Path, filename: str) -> LocalUploadFileMetadata:
+    """Read metadata about a file in the local directory related to an upload process.
+    TODO: factorize logic with `read_download_metadata`.
+    Args:
+        local_dir (`Path`):
+            Path to the local directory in which files are downloaded.
+        filename (`str`):
+            Path of the file in the repo.
+    Return:
+        `[LocalUploadFileMetadata]` or `None`: the metadata if it exists, `None` otherwise.
+    """
+    paths = get_local_upload_paths(local_dir, filename)
+    with WeakFileLock(paths.lock_path):
+        if paths.metadata_path.exists():
+            try:
+                with paths.metadata_path.open() as f:
+                    timestamp = float(f.readline().strip())
+                    size = int(f.readline().strip())  # never None
+                    _should_ignore = f.readline().strip()
+                    should_ignore = None if _should_ignore == "" else bool(int(_should_ignore))
+                    _sha256 = f.readline().strip()
+                    sha256 = None if _sha256 == "" else _sha256
+                    _upload_mode = f.readline().strip()
+                    upload_mode = None if _upload_mode == "" else _upload_mode
+                    if upload_mode not in (None, "regular", "lfs"):
+                        raise ValueError(f"Invalid upload mode in metadata {paths.path_in_repo}: {upload_mode}")
+                    is_uploaded = bool(int(f.readline().strip()))
+                    is_committed = bool(int(f.readline().strip()))
+                    metadata = LocalUploadFileMetadata(
+                        timestamp=timestamp,
+                        size=size,
+                        should_ignore=should_ignore,
+                        sha256=sha256,
+                        upload_mode=upload_mode,
+                        is_uploaded=is_uploaded,
+                        is_committed=is_committed,
+                    )
+            except Exception as e:
+                # remove the metadata file if it is corrupted / not the right format
+                logger.warning(
+                    f"Invalid metadata file {paths.metadata_path}: {e}. Removing it from disk and continue."
+                )
+                try:
+                    paths.metadata_path.unlink()
+                except Exception as e:
+                    logger.warning(f"Could not remove corrupted metadata file {paths.metadata_path}: {e}")
+            # TODO: can we do better?
+            if (
+                metadata.timestamp is not None
+                and metadata.is_uploaded  # file was uploaded
+                and not metadata.is_committed  # but not committed
+                and time.time() - metadata.timestamp > 20 * 3600  # and it's been more than 20 hours
+            ):  # => we consider it as garbage-collected by S3
+                metadata.is_uploaded = False
+            # check if the file exists and hasn't been modified since the metadata was saved
+            try:
+                if metadata.timestamp is not None and paths.file_path.stat().st_mtime <= metadata.timestamp:
+                    return metadata
+                logger.info(f"Ignored metadata for '{filename}' (outdated). Will re-compute hash.")
+            except FileNotFoundError:
+                # file does not exist => metadata is outdated
+                pass
+    # empty metadata => we don't know anything expect its size
+    return LocalUploadFileMetadata(size=paths.file_path.stat().st_size)
+def write_download_metadata(local_dir: Path, filename: str, commit_hash: str, etag: str) -> None:
+    """Write metadata about a file in the local directory related to a download process.
+    Args:
+        local_dir (`Path`):
+            Path to the local directory in which files are downloaded.
+    """
+    paths = get_local_download_paths(local_dir, filename)
+    with WeakFileLock(paths.lock_path):
+        with paths.metadata_path.open("w") as f:
+            f.write(f"{commit_hash}\n{etag}\n{time.time()}\n")
+def _huggingface_dir(local_dir: Path) -> Path:
+    """Return the path to the `.cache/huggingface` directory in a local directory."""
+    # Wrap in lru_cache to avoid overwriting the .gitignore file if called multiple times
+    path = local_dir / ".cache" / "huggingface"
+    path.mkdir(exist_ok=True, parents=True)
+    # Create a .gitignore file in the .cache/huggingface directory if it doesn't exist
+    # Should be thread-safe enough like this.
+    gitignore = path / ".gitignore"
+    gitignore_lock = path / ".gitignore.lock"
+    if not gitignore.exists():
+        try:
+            with WeakFileLock(gitignore_lock, timeout=0.1):
+                gitignore.write_text("*")
+        except IndexError:
+            pass
+        except OSError:  # TimeoutError, FileNotFoundError, PermissionError, etc.
+            pass
+        try:
+            gitignore_lock.unlink()
+        except OSError:
+            pass
+    return path
+def _short_hash(filename: str) -> str:
+    return base64.urlsafe_b64encode(hashlib.sha1(filename.encode()).digest()).decode()

.venv/lib/python3.11/site-packages/huggingface_hub/_login.py ADDED Viewed

	@@ -0,0 +1,520 @@

+# Copyright 2020 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains methods to log in to the Hub."""
+import os
+import subprocess
+from getpass import getpass
+from pathlib import Path
+from typing import Optional
+from . import constants
+from .commands._cli_utils import ANSI
+from .utils import (
+    capture_output,
+    get_token,
+    is_google_colab,
+    is_notebook,
+    list_credential_helpers,
+    logging,
+    run_subprocess,
+    set_git_credential,
+    unset_git_credential,
+)
+from .utils._auth import (
+    _get_token_by_name,
+    _get_token_from_environment,
+    _get_token_from_file,
+    _get_token_from_google_colab,
+    _save_stored_tokens,
+    _save_token,
+    get_stored_tokens,
+)
+from .utils._deprecation import _deprecate_arguments, _deprecate_positional_args
+logger = logging.get_logger(__name__)
+_HF_LOGO_ASCII = """
+    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
+    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
+    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
+    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
+    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
+"""
+@_deprecate_arguments(
+    version="1.0",
+    deprecated_args="write_permission",
+    custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
+)
+@_deprecate_positional_args(version="1.0")
+def login(
+    token: Optional[str] = None,
+    *,
+    add_to_git_credential: bool = False,
+    new_session: bool = True,
+    write_permission: bool = False,
+) -> None:
+    """Login the machine to access the Hub.
+    The `token` is persisted in cache and set as a git credential. Once done, the machine
+    is logged in and the access token will be available across all `huggingface_hub`
+    components. If `token` is not provided, it will be prompted to the user either with
+    a widget (in a notebook) or via the terminal.
+    To log in from outside of a script, one can also use `huggingface-cli login` which is
+    a cli command that wraps [`login`].
+    <Tip>
+    [`login`] is a drop-in replacement method for [`notebook_login`] as it wraps and
+    extends its capabilities.
+    </Tip>
+    <Tip>
+    When the token is not passed, [`login`] will automatically detect if the script runs
+    in a notebook or not. However, this detection might not be accurate due to the
+    variety of notebooks that exists nowadays. If that is the case, you can always force
+    the UI by using [`notebook_login`] or [`interpreter_login`].
+    </Tip>
+    Args:
+        token (`str`, *optional*):
+            User access token to generate from https://huggingface.co/settings/token.
+        add_to_git_credential (`bool`, defaults to `False`):
+            If `True`, token will be set as git credential. If no git credential helper
+            is configured, a warning will be displayed to the user. If `token` is `None`,
+            the value of `add_to_git_credential` is ignored and will be prompted again
+            to the end user.
+        new_session (`bool`, defaults to `True`):
+            If `True`, will request a token even if one is already saved on the machine.
+        write_permission (`bool`):
+            Ignored and deprecated argument.
+    Raises:
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If an organization token is passed. Only personal account tokens are valid
+            to log in.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If token is invalid.
+        [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
+            If running in a notebook but `ipywidgets` is not installed.
+    """
+    if token is not None:
+        if not add_to_git_credential:
+            logger.info(
+                "The token has not been saved to the git credentials helper. Pass "
+                "`add_to_git_credential=True` in this function directly or "
+                "`--add-to-git-credential` if using via `huggingface-cli` if "
+                "you want to set the git credential as well."
+            )
+        _login(token, add_to_git_credential=add_to_git_credential)
+    elif is_notebook():
+        notebook_login(new_session=new_session)
+    else:
+        interpreter_login(new_session=new_session)
+def logout(token_name: Optional[str] = None) -> None:
+    """Logout the machine from the Hub.
+    Token is deleted from the machine and removed from git credential.
+    Args:
+        token_name (`str`, *optional*):
+            Name of the access token to logout from. If `None`, will logout from all saved access tokens.
+    Raises:
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError):
+            If the access token name is not found.
+    """
+    if get_token() is None and not get_stored_tokens():  # No active token and no saved access tokens
+        logger.warning("Not logged in!")
+        return
+    if not token_name:
+        # Delete all saved access tokens and token
+        for file_path in (constants.HF_TOKEN_PATH, constants.HF_STORED_TOKENS_PATH):
+            try:
+                Path(file_path).unlink()
+            except FileNotFoundError:
+                pass
+        logger.info("Successfully logged out from all access tokens.")
+    else:
+        _logout_from_token(token_name)
+        logger.info(f"Successfully logged out from access token: {token_name}.")
+    unset_git_credential()
+    # Check if still logged in
+    if _get_token_from_google_colab() is not None:
+        raise EnvironmentError(
+            "You are automatically logged in using a Google Colab secret.\n"
+            "To log out, you must unset the `HF_TOKEN` secret in your Colab settings."
+        )
+    if _get_token_from_environment() is not None:
+        raise EnvironmentError(
+            "Token has been deleted from your machine but you are still logged in.\n"
+            "To log out, you must clear out both `HF_TOKEN` and `HUGGING_FACE_HUB_TOKEN` environment variables."
+        )
+def auth_switch(token_name: str, add_to_git_credential: bool = False) -> None:
+    """Switch to a different access token.
+    Args:
+        token_name (`str`):
+            Name of the access token to switch to.
+        add_to_git_credential (`bool`, defaults to `False`):
+            If `True`, token will be set as git credential. If no git credential helper
+            is configured, a warning will be displayed to the user. If `token` is `None`,
+            the value of `add_to_git_credential` is ignored and will be prompted again
+            to the end user.
+    Raises:
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError):
+            If the access token name is not found.
+    """
+    token = _get_token_by_name(token_name)
+    if not token:
+        raise ValueError(f"Access token {token_name} not found in {constants.HF_STORED_TOKENS_PATH}")
+    # Write token to HF_TOKEN_PATH
+    _set_active_token(token_name, add_to_git_credential)
+    logger.info(f"The current active token is: {token_name}")
+    token_from_environment = _get_token_from_environment()
+    if token_from_environment is not None and token_from_environment != token:
+        logger.warning(
+            "The environment variable `HF_TOKEN` is set and will override the access token you've just switched to."
+        )
+def auth_list() -> None:
+    """List all stored access tokens."""
+    tokens = get_stored_tokens()
+    if not tokens:
+        logger.info("No access tokens found.")
+        return
+    # Find current token
+    current_token = get_token()
+    current_token_name = None
+    for token_name in tokens:
+        if tokens.get(token_name) == current_token:
+            current_token_name = token_name
+    # Print header
+    max_offset = max(len("token"), max(len(token) for token in tokens)) + 2
+    print(f"  {{:<{max_offset}}}| {{:<15}}".format("name", "token"))
+    print("-" * (max_offset + 2) + "|" + "-" * 15)
+    # Print saved access tokens
+    for token_name in tokens:
+        token = tokens.get(token_name, "<not set>")
+        masked_token = f"{token[:3]}****{token[-4:]}" if token != "<not set>" else token
+        is_current = "*" if token == current_token else " "
+        print(f"{is_current} {{:<{max_offset}}}| {{:<15}}".format(token_name, masked_token))
+    if _get_token_from_environment():
+        logger.warning(
+            "\nNote: Environment variable `HF_TOKEN` is set and is the current active token independently from the stored tokens listed above."
+        )
+    elif current_token_name is None:
+        logger.warning(
+            "\nNote: No active token is set and no environment variable `HF_TOKEN` is found. Use `huggingface-cli login` to log in."
+        )
+###
+# Interpreter-based login (text)
+###
+@_deprecate_arguments(
+    version="1.0",
+    deprecated_args="write_permission",
+    custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
+)
+@_deprecate_positional_args(version="1.0")
+def interpreter_login(*, new_session: bool = True, write_permission: bool = False) -> None:
+    """
+    Displays a prompt to log in to the HF website and store the token.
+    This is equivalent to [`login`] without passing a token when not run in a notebook.
+    [`interpreter_login`] is useful if you want to force the use of the terminal prompt
+    instead of a notebook widget.
+    For more details, see [`login`].
+    Args:
+        new_session (`bool`, defaults to `True`):
+            If `True`, will request a token even if one is already saved on the machine.
+        write_permission (`bool`):
+            Ignored and deprecated argument.
+    """
+    if not new_session and get_token() is not None:
+        logger.info("User is already logged in.")
+        return
+    from .commands.delete_cache import _ask_for_confirmation_no_tui
+    print(_HF_LOGO_ASCII)
+    if get_token() is not None:
+        logger.info(
+            "    A token is already saved on your machine. Run `huggingface-cli"
+            " whoami` to get more information or `huggingface-cli logout` if you want"
+            " to log out."
+        )
+        logger.info("    Setting a new token will erase the existing one.")
+    logger.info(
+        "    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens ."
+    )
+    if os.name == "nt":
+        logger.info("Token can be pasted using 'Right-Click'.")
+    token = getpass("Enter your token (input will not be visible): ")
+    add_to_git_credential = _ask_for_confirmation_no_tui("Add token as git credential?")
+    _login(token=token, add_to_git_credential=add_to_git_credential)
+###
+# Notebook-based login (widget)
+###
+NOTEBOOK_LOGIN_PASSWORD_HTML = """<center> <img
+src=https://huggingface.co/front/assets/huggingface_logo-noborder.svg
+alt='Hugging Face'> <br> Immediately click login after typing your password or
+it might be stored in plain text in this notebook file. </center>"""
+NOTEBOOK_LOGIN_TOKEN_HTML_START = """<center> <img
+src=https://huggingface.co/front/assets/huggingface_logo-noborder.svg
+alt='Hugging Face'> <br> Copy a token from <a
+href="https://huggingface.co/settings/tokens" target="_blank">your Hugging Face
+tokens page</a> and paste it below. <br> Immediately click login after copying
+your token or it might be stored in plain text in this notebook file. </center>"""
+NOTEBOOK_LOGIN_TOKEN_HTML_END = """
+<b>Pro Tip:</b> If you don't already have one, you can create a dedicated
+'notebooks' token with 'write' access, that you can then easily reuse for all
+notebooks. </center>"""
+@_deprecate_arguments(
+    version="1.0",
+    deprecated_args="write_permission",
+    custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
+)
+@_deprecate_positional_args(version="1.0")
+def notebook_login(*, new_session: bool = True, write_permission: bool = False) -> None:
+    """
+    Displays a widget to log in to the HF website and store the token.
+    This is equivalent to [`login`] without passing a token when run in a notebook.
+    [`notebook_login`] is useful if you want to force the use of the notebook widget
+    instead of a prompt in the terminal.
+    For more details, see [`login`].
+    Args:
+        new_session (`bool`, defaults to `True`):
+            If `True`, will request a token even if one is already saved on the machine.
+        write_permission (`bool`):
+            Ignored and deprecated argument.
+    """
+    try:
+        import ipywidgets.widgets as widgets  # type: ignore
+        from IPython.display import display  # type: ignore
+    except ImportError:
+        raise ImportError(
+            "The `notebook_login` function can only be used in a notebook (Jupyter or"
+            " Colab) and you need the `ipywidgets` module: `pip install ipywidgets`."
+        )
+    if not new_session and get_token() is not None:
+        logger.info("User is already logged in.")
+        return
+    box_layout = widgets.Layout(display="flex", flex_flow="column", align_items="center", width="50%")
+    token_widget = widgets.Password(description="Token:")
+    git_checkbox_widget = widgets.Checkbox(value=True, description="Add token as git credential?")
+    token_finish_button = widgets.Button(description="Login")
+    login_token_widget = widgets.VBox(
+        [
+            widgets.HTML(NOTEBOOK_LOGIN_TOKEN_HTML_START),
+            token_widget,
+            git_checkbox_widget,
+            token_finish_button,
+            widgets.HTML(NOTEBOOK_LOGIN_TOKEN_HTML_END),
+        ],
+        layout=box_layout,
+    )
+    display(login_token_widget)
+    # On click events
+    def login_token_event(t):
+        """Event handler for the login button."""
+        token = token_widget.value
+        add_to_git_credential = git_checkbox_widget.value
+        # Erase token and clear value to make sure it's not saved in the notebook.
+        token_widget.value = ""
+        # Hide inputs
+        login_token_widget.children = [widgets.Label("Connecting...")]
+        try:
+            with capture_output() as captured:
+                _login(token, add_to_git_credential=add_to_git_credential)
+            message = captured.getvalue()
+        except Exception as error:
+            message = str(error)
+        # Print result (success message or error)
+        login_token_widget.children = [widgets.Label(line) for line in message.split("\n") if line.strip()]
+    token_finish_button.on_click(login_token_event)
+###
+# Login private helpers
+###
+def _login(
+    token: str,
+    add_to_git_credential: bool,
+) -> None:
+    from .hf_api import whoami  # avoid circular import
+    if token.startswith("api_org"):
+        raise ValueError("You must use your personal account token, not an organization token.")
+    token_info = whoami(token)
+    permission = token_info["auth"]["accessToken"]["role"]
+    logger.info(f"Token is valid (permission: {permission}).")
+    token_name = token_info["auth"]["accessToken"]["displayName"]
+    # Store token locally
+    _save_token(token=token, token_name=token_name)
+    # Set active token
+    _set_active_token(token_name=token_name, add_to_git_credential=add_to_git_credential)
+    logger.info("Login successful.")
+    if _get_token_from_environment():
+        logger.warning(
+            "Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured."
+        )
+    else:
+        logger.info(f"The current active token is: `{token_name}`")
+def _logout_from_token(token_name: str) -> None:
+    """Logout from a specific access token.
+    Args:
+        token_name (`str`):
+            The name of the access token to logout from.
+    Raises:
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError):
+            If the access token name is not found.
+    """
+    stored_tokens = get_stored_tokens()
+    # If there is no access tokens saved or the access token name is not found, do nothing
+    if not stored_tokens or token_name not in stored_tokens:
+        return
+    token = stored_tokens.pop(token_name)
+    _save_stored_tokens(stored_tokens)
+    if token == _get_token_from_file():
+        logger.warning(f"Active token '{token_name}' has been deleted.")
+        Path(constants.HF_TOKEN_PATH).unlink(missing_ok=True)
+def _set_active_token(
+    token_name: str,
+    add_to_git_credential: bool,
+) -> None:
+    """Set the active access token.
+    Args:
+        token_name (`str`):
+            The name of the token to set as active.
+    """
+    token = _get_token_by_name(token_name)
+    if not token:
+        raise ValueError(f"Token {token_name} not found in {constants.HF_STORED_TOKENS_PATH}")
+    if add_to_git_credential:
+        if _is_git_credential_helper_configured():
+            set_git_credential(token)
+            logger.info(
+                "Your token has been saved in your configured git credential helpers"
+                + f" ({','.join(list_credential_helpers())})."
+            )
+        else:
+            logger.warning("Token has not been saved to git credential helper.")
+    # Write token to HF_TOKEN_PATH
+    path = Path(constants.HF_TOKEN_PATH)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(token)
+    logger.info(f"Your token has been saved to {constants.HF_TOKEN_PATH}")
+def _is_git_credential_helper_configured() -> bool:
+    """Check if a git credential helper is configured.
+    Warns user if not the case (except for Google Colab where "store" is set by default
+    by `huggingface_hub`).
+    """
+    helpers = list_credential_helpers()
+    if len(helpers) > 0:
+        return True  # Do not warn: at least 1 helper is set
+    # Only in Google Colab to avoid the warning message
+    # See https://github.com/huggingface/huggingface_hub/issues/1043#issuecomment-1247010710
+    if is_google_colab():
+        _set_store_as_git_credential_helper_globally()
+        return True  # Do not warn: "store" is used by default in Google Colab
+    # Otherwise, warn user
+    print(
+        ANSI.red(
+            "Cannot authenticate through git-credential as no helper is defined on your"
+            " machine.\nYou might have to re-authenticate when pushing to the Hugging"
+            " Face Hub.\nRun the following command in your terminal in case you want to"
+            " set the 'store' credential helper as default.\n\ngit config --global"
+            " credential.helper store\n\nRead"
+            " https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more"
+            " details."
+        )
+    )
+    return False
+def _set_store_as_git_credential_helper_globally() -> None:
+    """Set globally the credential.helper to `store`.
+    To be used only in Google Colab as we assume the user doesn't care about the git
+    credential config. It is the only particular case where we don't want to display the
+    warning message in [`notebook_login()`].
+    Related:
+    - https://github.com/huggingface/huggingface_hub/issues/1043
+    - https://github.com/huggingface/huggingface_hub/issues/1051
+    - https://git-scm.com/docs/git-credential-store
+    """
+    try:
+        run_subprocess("git config --global credential.helper store")
+    except subprocess.CalledProcessError as exc:
+        raise EnvironmentError(exc.stderr)

.venv/lib/python3.11/site-packages/huggingface_hub/_snapshot_download.py ADDED Viewed

	@@ -0,0 +1,307 @@

+import os
+from pathlib import Path
+from typing import Dict, List, Literal, Optional, Union
+import requests
+from tqdm.auto import tqdm as base_tqdm
+from tqdm.contrib.concurrent import thread_map
+from . import constants
+from .errors import GatedRepoError, LocalEntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
+from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
+from .hf_api import DatasetInfo, HfApi, ModelInfo, SpaceInfo
+from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args
+from .utils import tqdm as hf_tqdm
+logger = logging.get_logger(__name__)
+@validate_hf_hub_args
+def snapshot_download(
+    repo_id: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Optional[Union[Dict, str]] = None,
+    proxies: Optional[Dict] = None,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    force_download: bool = False,
+    token: Optional[Union[bool, str]] = None,
+    local_files_only: bool = False,
+    allow_patterns: Optional[Union[List[str], str]] = None,
+    ignore_patterns: Optional[Union[List[str], str]] = None,
+    max_workers: int = 8,
+    tqdm_class: Optional[base_tqdm] = None,
+    headers: Optional[Dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    # Deprecated args
+    local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
+    resume_download: Optional[bool] = None,
+) -> str:
+    """Download repo files.
+    Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from
+    a repo, because you don't know which ones you will need a priori. All files are nested inside a folder in order
+    to keep their actual filename relative to that folder. You can also filter which files to download using
+    `allow_patterns` and `ignore_patterns`.
+    If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this
+    option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir`
+    to store some metadata related to the downloaded files. While this mechanism is not as robust as the main
+    cache-system, it's optimized for regularly pulling the latest version of a repository.
+    An alternative would be to clone the repo but this requires git and git-lfs to be installed and properly
+    configured. It is also not possible to filter which files to download when cloning a repository using git.
+    Args:
+        repo_id (`str`):
+            A user or an organization name and a repo name separated by a `/`.
+        repo_type (`str`, *optional*):
+            Set to `"dataset"` or `"space"` if downloading from a dataset or space,
+            `None` or `"model"` if downloading from a model. Default is `None`.
+        revision (`str`, *optional*):
+            An optional Git revision id which can be a branch name, a tag, or a
+            commit hash.
+        cache_dir (`str`, `Path`, *optional*):
+            Path to the folder where cached files are stored.
+        local_dir (`str` or `Path`, *optional*):
+            If provided, the downloaded files will be placed under this directory.
+        library_name (`str`, *optional*):
+            The name of the library to which the object corresponds.
+        library_version (`str`, *optional*):
+            The version of the library.
+        user_agent (`str`, `dict`, *optional*):
+            The user-agent info in the form of a dictionary or a string.
+        proxies (`dict`, *optional*):
+            Dictionary mapping protocol to the URL of the proxy passed to
+            `requests.request`.
+        etag_timeout (`float`, *optional*, defaults to `10`):
+            When fetching ETag, how many seconds to wait for the server to send
+            data before giving up which is passed to `requests.request`.
+        force_download (`bool`, *optional*, defaults to `False`):
+            Whether the file should be downloaded even if it already exists in the local cache.
+        token (`str`, `bool`, *optional*):
+            A token to be used for the download.
+                - If `True`, the token is read from the HuggingFace config
+                  folder.
+                - If a string, it's used as the authentication token.
+        headers (`dict`, *optional*):
+            Additional headers to include in the request. Those headers take precedence over the others.
+        local_files_only (`bool`, *optional*, defaults to `False`):
+            If `True`, avoid downloading the file and return the path to the
+            local cached file if it exists.
+        allow_patterns (`List[str]` or `str`, *optional*):
+            If provided, only files matching at least one pattern are downloaded.
+        ignore_patterns (`List[str]` or `str`, *optional*):
+            If provided, files matching any of the patterns are not downloaded.
+        max_workers (`int`, *optional*):
+            Number of concurrent threads to download files (1 thread = 1 file download).
+            Defaults to 8.
+        tqdm_class (`tqdm`, *optional*):
+            If provided, overwrites the default behavior for the progress bar. Passed
+            argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior.
+            Note that the `tqdm_class` is not passed to each individual download.
+            Defaults to the custom HF progress bar that can be disabled by setting
+            `HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
+    Returns:
+        `str`: folder path of the repo snapshot.
+    Raises:
+        [`~utils.RepositoryNotFoundError`]
+            If the repository to download from cannot be found. This may be because it doesn't exist,
+            or because it is set to `private` and you do not have access.
+        [`~utils.RevisionNotFoundError`]
+            If the revision to download from cannot be found.
+        [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+            If `token=True` and the token cannot be found.
+        [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
+            ETag cannot be determined.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            if some parameter value is invalid.
+    """
+    if cache_dir is None:
+        cache_dir = constants.HF_HUB_CACHE
+    if revision is None:
+        revision = constants.DEFAULT_REVISION
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+    if repo_type is None:
+        repo_type = "model"
+    if repo_type not in constants.REPO_TYPES:
+        raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
+    storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type))
+    repo_info: Union[ModelInfo, DatasetInfo, SpaceInfo, None] = None
+    api_call_error: Optional[Exception] = None
+    if not local_files_only:
+        # try/except logic to handle different errors => taken from `hf_hub_download`
+        try:
+            # if we have internet connection we want to list files to download
+            api = HfApi(
+                library_name=library_name,
+                library_version=library_version,
+                user_agent=user_agent,
+                endpoint=endpoint,
+                headers=headers,
+            )
+            repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision, token=token)
+        except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
+            # Actually raise for those subclasses of ConnectionError
+            raise
+        except (
+            requests.exceptions.ConnectionError,
+            requests.exceptions.Timeout,
+            OfflineModeIsEnabled,
+        ) as error:
+            # Internet connection is down
+            # => will try to use local files only
+            api_call_error = error
+            pass
+        except RevisionNotFoundError:
+            # The repo was found but the revision doesn't exist on the Hub (never existed or got deleted)
+            raise
+        except requests.HTTPError as error:
+            # Multiple reasons for an http error:
+            # - Repository is private and invalid/missing token sent
+            # - Repository is gated and invalid/missing token sent
+            # - Hub is down (error 500 or 504)
+            # => let's switch to 'local_files_only=True' to check if the files are already cached.
+            #    (if it's not the case, the error will be re-raised)
+            api_call_error = error
+            pass
+    # At this stage, if `repo_info` is None it means either:
+    # - internet connection is down
+    # - internet connection is deactivated (local_files_only=True or HF_HUB_OFFLINE=True)
+    # - repo is private/gated and invalid/missing token sent
+    # - Hub is down
+    # => let's look if we can find the appropriate folder in the cache:
+    #    - if the specified revision is a commit hash, look inside "snapshots".
+    #    - f the specified revision is a branch or tag, look inside "refs".
+    # => if local_dir is not None, we will return the path to the local folder if it exists.
+    if repo_info is None:
+        # Try to get which commit hash corresponds to the specified revision
+        commit_hash = None
+        if REGEX_COMMIT_HASH.match(revision):
+            commit_hash = revision
+        else:
+            ref_path = os.path.join(storage_folder, "refs", revision)
+            if os.path.exists(ref_path):
+                # retrieve commit_hash from refs file
+                with open(ref_path) as f:
+                    commit_hash = f.read()
+        # Try to locate snapshot folder for this commit hash
+        if commit_hash is not None:
+            snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
+            if os.path.exists(snapshot_folder):
+                # Snapshot folder exists => let's return it
+                # (but we can't check if all the files are actually there)
+                return snapshot_folder
+        # If local_dir is not None, return it if it exists and is not empty
+        if local_dir is not None:
+            local_dir = Path(local_dir)
+            if local_dir.is_dir() and any(local_dir.iterdir()):
+                logger.warning(
+                    f"Returning existing local_dir `{local_dir}` as remote repo cannot be accessed in `snapshot_download` ({api_call_error})."
+                )
+                return str(local_dir.resolve())
+        # If we couldn't find the appropriate folder on disk, raise an error.
+        if local_files_only:
+            raise LocalEntryNotFoundError(
+                "Cannot find an appropriate cached snapshot folder for the specified revision on the local disk and "
+                "outgoing traffic has been disabled. To enable repo look-ups and downloads online, pass "
+                "'local_files_only=False' as input."
+            )
+        elif isinstance(api_call_error, OfflineModeIsEnabled):
+            raise LocalEntryNotFoundError(
+                "Cannot find an appropriate cached snapshot folder for the specified revision on the local disk and "
+                "outgoing traffic has been disabled. To enable repo look-ups and downloads online, set "
+                "'HF_HUB_OFFLINE=0' as environment variable."
+            ) from api_call_error
+        elif isinstance(api_call_error, RepositoryNotFoundError) or isinstance(api_call_error, GatedRepoError):
+            # Repo not found => let's raise the actual error
+            raise api_call_error
+        else:
+            # Otherwise: most likely a connection issue or Hub downtime => let's warn the user
+            raise LocalEntryNotFoundError(
+                "An error happened while trying to locate the files on the Hub and we cannot find the appropriate"
+                " snapshot folder for the specified revision on the local disk. Please check your internet connection"
+                " and try again."
+            ) from api_call_error
+    # At this stage, internet connection is up and running
+    # => let's download the files!
+    assert repo_info.sha is not None, "Repo info returned from server must have a revision sha."
+    assert repo_info.siblings is not None, "Repo info returned from server must have a siblings list."
+    filtered_repo_files = list(
+        filter_repo_objects(
+            items=[f.rfilename for f in repo_info.siblings],
+            allow_patterns=allow_patterns,
+            ignore_patterns=ignore_patterns,
+        )
+    )
+    commit_hash = repo_info.sha
+    snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
+    # if passed revision is not identical to commit_hash
+    # then revision has to be a branch name or tag name.
+    # In that case store a ref.
+    if revision != commit_hash:
+        ref_path = os.path.join(storage_folder, "refs", revision)
+        try:
+            os.makedirs(os.path.dirname(ref_path), exist_ok=True)
+            with open(ref_path, "w") as f:
+                f.write(commit_hash)
+        except OSError as e:
+            logger.warning(f"Ignored error while writing commit hash to {ref_path}: {e}.")
+    # we pass the commit_hash to hf_hub_download
+    # so no network call happens if we already
+    # have the file locally.
+    def _inner_hf_hub_download(repo_file: str):
+        return hf_hub_download(
+            repo_id,
+            filename=repo_file,
+            repo_type=repo_type,
+            revision=commit_hash,
+            endpoint=endpoint,
+            cache_dir=cache_dir,
+            local_dir=local_dir,
+            local_dir_use_symlinks=local_dir_use_symlinks,
+            library_name=library_name,
+            library_version=library_version,
+            user_agent=user_agent,
+            proxies=proxies,
+            etag_timeout=etag_timeout,
+            resume_download=resume_download,
+            force_download=force_download,
+            token=token,
+            headers=headers,
+        )
+    if constants.HF_HUB_ENABLE_HF_TRANSFER:
+        # when using hf_transfer we don't want extra parallelism
+        # from the one hf_transfer provides
+        for file in filtered_repo_files:
+            _inner_hf_hub_download(file)
+    else:
+        thread_map(
+            _inner_hf_hub_download,
+            filtered_repo_files,
+            desc=f"Fetching {len(filtered_repo_files)} files",
+            max_workers=max_workers,
+            # User can use its own tqdm class or the default one from `huggingface_hub.utils`
+            tqdm_class=tqdm_class or hf_tqdm,
+        )
+    if local_dir is not None:
+        return str(os.path.realpath(local_dir))
+    return snapshot_folder

.venv/lib/python3.11/site-packages/huggingface_hub/_space_api.py ADDED Viewed

	@@ -0,0 +1,160 @@

+# coding=utf-8
+# Copyright 2019-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass
+from datetime import datetime
+from enum import Enum
+from typing import Dict, Optional
+from huggingface_hub.utils import parse_datetime
+class SpaceStage(str, Enum):
+    """
+    Enumeration of possible stage of a Space on the Hub.
+    Value can be compared to a string:
+    ```py
+    assert SpaceStage.BUILDING == "BUILDING"
+    ```
+    Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L61 (private url).
+    """
+    # Copied from moon-landing > server > repo_types > SpaceInfo.ts (private repo)
+    NO_APP_FILE = "NO_APP_FILE"
+    CONFIG_ERROR = "CONFIG_ERROR"
+    BUILDING = "BUILDING"
+    BUILD_ERROR = "BUILD_ERROR"
+    RUNNING = "RUNNING"
+    RUNNING_BUILDING = "RUNNING_BUILDING"
+    RUNTIME_ERROR = "RUNTIME_ERROR"
+    DELETING = "DELETING"
+    STOPPED = "STOPPED"
+    PAUSED = "PAUSED"
+class SpaceHardware(str, Enum):
+    """
+    Enumeration of hardwares available to run your Space on the Hub.
+    Value can be compared to a string:
+    ```py
+    assert SpaceHardware.CPU_BASIC == "cpu-basic"
+    ```
+    Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L73 (private url).
+    """
+    CPU_BASIC = "cpu-basic"
+    CPU_UPGRADE = "cpu-upgrade"
+    T4_SMALL = "t4-small"
+    T4_MEDIUM = "t4-medium"
+    L4X1 = "l4x1"
+    L4X4 = "l4x4"
+    ZERO_A10G = "zero-a10g"
+    A10G_SMALL = "a10g-small"
+    A10G_LARGE = "a10g-large"
+    A10G_LARGEX2 = "a10g-largex2"
+    A10G_LARGEX4 = "a10g-largex4"
+    A100_LARGE = "a100-large"
+    V5E_1X1 = "v5e-1x1"
+    V5E_2X2 = "v5e-2x2"
+    V5E_2X4 = "v5e-2x4"
+class SpaceStorage(str, Enum):
+    """
+    Enumeration of persistent storage available for your Space on the Hub.
+    Value can be compared to a string:
+    ```py
+    assert SpaceStorage.SMALL == "small"
+    ```
+    Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceHardwareFlavor.ts#L24 (private url).
+    """
+    SMALL = "small"
+    MEDIUM = "medium"
+    LARGE = "large"
+@dataclass
+class SpaceRuntime:
+    """
+    Contains information about the current runtime of a Space.
+    Args:
+        stage (`str`):
+            Current stage of the space. Example: RUNNING.
+        hardware (`str` or `None`):
+            Current hardware of the space. Example: "cpu-basic". Can be `None` if Space
+            is `BUILDING` for the first time.
+        requested_hardware (`str` or `None`):
+            Requested hardware. Can be different than `hardware` especially if the request
+            has just been made. Example: "t4-medium". Can be `None` if no hardware has
+            been requested yet.
+        sleep_time (`int` or `None`):
+            Number of seconds the Space will be kept alive after the last request. By default (if value is `None`), the
+            Space will never go to sleep if it's running on an upgraded hardware, while it will go to sleep after 48
+            hours on a free 'cpu-basic' hardware. For more details, see https://huggingface.co/docs/hub/spaces-gpus#sleep-time.
+        raw (`dict`):
+            Raw response from the server. Contains more information about the Space
+            runtime like number of replicas, number of cpu, memory size,...
+    """
+    stage: SpaceStage
+    hardware: Optional[SpaceHardware]
+    requested_hardware: Optional[SpaceHardware]
+    sleep_time: Optional[int]
+    storage: Optional[SpaceStorage]
+    raw: Dict
+    def __init__(self, data: Dict) -> None:
+        self.stage = data["stage"]
+        self.hardware = data.get("hardware", {}).get("current")
+        self.requested_hardware = data.get("hardware", {}).get("requested")
+        self.sleep_time = data.get("gcTimeout")
+        self.storage = data.get("storage")
+        self.raw = data
+@dataclass
+class SpaceVariable:
+    """
+    Contains information about the current variables of a Space.
+    Args:
+        key (`str`):
+            Variable key. Example: `"MODEL_REPO_ID"`
+        value (`str`):
+            Variable value. Example: `"the_model_repo_id"`.
+        description (`str` or None):
+            Description of the variable. Example: `"Model Repo ID of the implemented model"`.
+        updatedAt (`datetime` or None):
+            datetime of the last update of the variable (if the variable has been updated at least once).
+    """
+    key: str
+    value: str
+    description: Optional[str]
+    updated_at: Optional[datetime]
+    def __init__(self, key: str, values: Dict) -> None:
+        self.key = key
+        self.value = values["value"]
+        self.description = values.get("description")
+        updated_at = values.get("updatedAt")
+        self.updated_at = parse_datetime(updated_at) if updated_at is not None else None

.venv/lib/python3.11/site-packages/huggingface_hub/_tensorboard_logger.py ADDED Viewed

	@@ -0,0 +1,194 @@

+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains a logger to push training logs to the Hub, using Tensorboard."""
+from pathlib import Path
+from typing import TYPE_CHECKING, List, Optional, Union
+from ._commit_scheduler import CommitScheduler
+from .errors import EntryNotFoundError
+from .repocard import ModelCard
+from .utils import experimental
+# Depending on user's setup, SummaryWriter can come either from 'tensorboardX'
+# or from 'torch.utils.tensorboard'. Both are compatible so let's try to load
+# from either of them.
+try:
+    from tensorboardX import SummaryWriter
+    is_summary_writer_available = True
+except ImportError:
+    try:
+        from torch.utils.tensorboard import SummaryWriter
+        is_summary_writer_available = False
+    except ImportError:
+        # Dummy class to avoid failing at import. Will raise on instance creation.
+        SummaryWriter = object
+        is_summary_writer_available = False
+if TYPE_CHECKING:
+    from tensorboardX import SummaryWriter
+class HFSummaryWriter(SummaryWriter):
+    """
+    Wrapper around the tensorboard's `SummaryWriter` to push training logs to the Hub.
+    Data is logged locally and then pushed to the Hub asynchronously. Pushing data to the Hub is done in a separate
+    thread to avoid blocking the training script. In particular, if the upload fails for any reason (e.g. a connection
+    issue), the main script will not be interrupted. Data is automatically pushed to the Hub every `commit_every`
+    minutes (default to every 5 minutes).
+    <Tip warning={true}>
+    `HFSummaryWriter` is experimental. Its API is subject to change in the future without prior notice.
+    </Tip>
+    Args:
+        repo_id (`str`):
+            The id of the repo to which the logs will be pushed.
+        logdir (`str`, *optional*):
+            The directory where the logs will be written. If not specified, a local directory will be created by the
+            underlying `SummaryWriter` object.
+        commit_every (`int` or `float`, *optional*):
+            The frequency (in minutes) at which the logs will be pushed to the Hub. Defaults to 5 minutes.
+        squash_history (`bool`, *optional*):
+            Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
+            useful to avoid degraded performances on the repo when it grows too large.
+        repo_type (`str`, *optional*):
+            The type of the repo to which the logs will be pushed. Defaults to "model".
+        repo_revision (`str`, *optional*):
+            The revision of the repo to which the logs will be pushed. Defaults to "main".
+        repo_private (`bool`, *optional*):
+            Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
+        path_in_repo (`str`, *optional*):
+            The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/".
+        repo_allow_patterns (`List[str]` or `str`, *optional*):
+            A list of patterns to include in the upload. Defaults to `"*.tfevents.*"`. Check out the
+            [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
+        repo_ignore_patterns (`List[str]` or `str`, *optional*):
+            A list of patterns to exclude in the upload. Check out the
+            [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
+        token (`str`, *optional*):
+            Authentication token. Will default to the stored token. See https://huggingface.co/settings/token for more
+            details
+        kwargs:
+            Additional keyword arguments passed to `SummaryWriter`.
+    Examples:
+    ```diff
+    # Taken from https://pytorch.org/docs/stable/tensorboard.html
+    - from torch.utils.tensorboard import SummaryWriter
+    + from huggingface_hub import HFSummaryWriter
+    import numpy as np
+    - writer = SummaryWriter()
+    + writer = HFSummaryWriter(repo_id="username/my-trained-model")
+    for n_iter in range(100):
+        writer.add_scalar('Loss/train', np.random.random(), n_iter)
+        writer.add_scalar('Loss/test', np.random.random(), n_iter)
+        writer.add_scalar('Accuracy/train', np.random.random(), n_iter)
+        writer.add_scalar('Accuracy/test', np.random.random(), n_iter)
+    ```
+    ```py
+    >>> from huggingface_hub import HFSummaryWriter
+    # Logs are automatically pushed every 15 minutes (5 by default) + when exiting the context manager
+    >>> with HFSummaryWriter(repo_id="test_hf_logger", commit_every=15) as logger:
+    ...     logger.add_scalar("a", 1)
+    ...     logger.add_scalar("b", 2)
+    ```
+    """
+    @experimental
+    def __new__(cls, *args, **kwargs) -> "HFSummaryWriter":
+        if not is_summary_writer_available:
+            raise ImportError(
+                "You must have `tensorboard` installed to use `HFSummaryWriter`. Please run `pip install --upgrade"
+                " tensorboardX` first."
+            )
+        return super().__new__(cls)
+    def __init__(
+        self,
+        repo_id: str,
+        *,
+        logdir: Optional[str] = None,
+        commit_every: Union[int, float] = 5,
+        squash_history: bool = False,
+        repo_type: Optional[str] = None,
+        repo_revision: Optional[str] = None,
+        repo_private: Optional[bool] = None,
+        path_in_repo: Optional[str] = "tensorboard",
+        repo_allow_patterns: Optional[Union[List[str], str]] = "*.tfevents.*",
+        repo_ignore_patterns: Optional[Union[List[str], str]] = None,
+        token: Optional[str] = None,
+        **kwargs,
+    ):
+        # Initialize SummaryWriter
+        super().__init__(logdir=logdir, **kwargs)
+        # Check logdir has been correctly initialized and fail early otherwise. In practice, SummaryWriter takes care of it.
+        if not isinstance(self.logdir, str):
+            raise ValueError(f"`self.logdir` must be a string. Got '{self.logdir}' of type {type(self.logdir)}.")
+        # Append logdir name to `path_in_repo`
+        if path_in_repo is None or path_in_repo == "":
+            path_in_repo = Path(self.logdir).name
+        else:
+            path_in_repo = path_in_repo.strip("/") + "/" + Path(self.logdir).name
+        # Initialize scheduler
+        self.scheduler = CommitScheduler(
+            folder_path=self.logdir,
+            path_in_repo=path_in_repo,
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=repo_revision,
+            private=repo_private,
+            token=token,
+            allow_patterns=repo_allow_patterns,
+            ignore_patterns=repo_ignore_patterns,
+            every=commit_every,
+            squash_history=squash_history,
+        )
+        # Exposing some high-level info at root level
+        self.repo_id = self.scheduler.repo_id
+        self.repo_type = self.scheduler.repo_type
+        self.repo_revision = self.scheduler.revision
+        # Add `hf-summary-writer` tag to the model card metadata
+        try:
+            card = ModelCard.load(repo_id_or_path=self.repo_id, repo_type=self.repo_type)
+        except EntryNotFoundError:
+            card = ModelCard("")
+        tags = card.data.get("tags", [])
+        if "hf-summary-writer" not in tags:
+            tags.append("hf-summary-writer")
+            card.data["tags"] = tags
+            card.push_to_hub(repo_id=self.repo_id, repo_type=self.repo_type)
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Push to hub in a non-blocking way when exiting the logger's context manager."""
+        super().__exit__(exc_type, exc_val, exc_tb)
+        future = self.scheduler.trigger()
+        future.result()

.venv/lib/python3.11/site-packages/huggingface_hub/_upload_large_folder.py ADDED Viewed

	@@ -0,0 +1,621 @@

+# coding=utf-8
+# Copyright 2024-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import enum
+import logging
+import os
+import queue
+import shutil
+import sys
+import threading
+import time
+import traceback
+from datetime import datetime
+from pathlib import Path
+from threading import Lock
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union
+from . import constants
+from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes
+from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_local_upload_paths, read_upload_metadata
+from .constants import DEFAULT_REVISION, REPO_TYPES
+from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm
+from .utils._cache_manager import _format_size
+from .utils.sha import sha_fileobj
+if TYPE_CHECKING:
+    from .hf_api import HfApi
+logger = logging.getLogger(__name__)
+WAITING_TIME_IF_NO_TASKS = 10  # seconds
+MAX_NB_REGULAR_FILES_PER_COMMIT = 75
+MAX_NB_LFS_FILES_PER_COMMIT = 150
+def upload_large_folder_internal(
+    api: "HfApi",
+    repo_id: str,
+    folder_path: Union[str, Path],
+    *,
+    repo_type: str,  # Repo type is required!
+    revision: Optional[str] = None,
+    private: Optional[bool] = None,
+    allow_patterns: Optional[Union[List[str], str]] = None,
+    ignore_patterns: Optional[Union[List[str], str]] = None,
+    num_workers: Optional[int] = None,
+    print_report: bool = True,
+    print_report_every: int = 60,
+):
+    """Upload a large folder to the Hub in the most resilient way possible.
+    See [`HfApi.upload_large_folder`] for the full documentation.
+    """
+    # 1. Check args and setup
+    if repo_type is None:
+        raise ValueError(
+            "For large uploads, `repo_type` is explicitly required. Please set it to `model`, `dataset` or `space`."
+            " If you are using the CLI, pass it as `--repo-type=model`."
+        )
+    if repo_type not in REPO_TYPES:
+        raise ValueError(f"Invalid repo type, must be one of {REPO_TYPES}")
+    if revision is None:
+        revision = DEFAULT_REVISION
+    folder_path = Path(folder_path).expanduser().resolve()
+    if not folder_path.is_dir():
+        raise ValueError(f"Provided path: '{folder_path}' is not a directory")
+    if ignore_patterns is None:
+        ignore_patterns = []
+    elif isinstance(ignore_patterns, str):
+        ignore_patterns = [ignore_patterns]
+    ignore_patterns += DEFAULT_IGNORE_PATTERNS
+    if num_workers is None:
+        nb_cores = os.cpu_count() or 1
+        num_workers = max(nb_cores - 2, 2)  # Use all but 2 cores, or at least 2 cores
+    # 2. Create repo if missing
+    repo_url = api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True)
+    logger.info(f"Repo created: {repo_url}")
+    repo_id = repo_url.repo_id
+    # 3. List files to upload
+    filtered_paths_list = filter_repo_objects(
+        (path.relative_to(folder_path).as_posix() for path in folder_path.glob("**/*") if path.is_file()),
+        allow_patterns=allow_patterns,
+        ignore_patterns=ignore_patterns,
+    )
+    paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list]
+    logger.info(f"Found {len(paths_list)} candidate files to upload")
+    # Read metadata for each file
+    items = [
+        (paths, read_upload_metadata(folder_path, paths.path_in_repo))
+        for paths in tqdm(paths_list, desc="Recovering from metadata files")
+    ]
+    # 4. Start workers
+    status = LargeUploadStatus(items)
+    threads = [
+        threading.Thread(
+            target=_worker_job,
+            kwargs={
+                "status": status,
+                "api": api,
+                "repo_id": repo_id,
+                "repo_type": repo_type,
+                "revision": revision,
+            },
+        )
+        for _ in range(num_workers)
+    ]
+    for thread in threads:
+        thread.start()
+    # 5. Print regular reports
+    if print_report:
+        print("\n\n" + status.current_report())
+    last_report_ts = time.time()
+    while True:
+        time.sleep(1)
+        if time.time() - last_report_ts >= print_report_every:
+            if print_report:
+                _print_overwrite(status.current_report())
+            last_report_ts = time.time()
+        if status.is_done():
+            logging.info("Is done: exiting main loop")
+            break
+    for thread in threads:
+        thread.join()
+    logger.info(status.current_report())
+    logging.info("Upload is complete!")
+####################
+# Logic to manage workers and synchronize tasks
+####################
+class WorkerJob(enum.Enum):
+    SHA256 = enum.auto()
+    GET_UPLOAD_MODE = enum.auto()
+    PREUPLOAD_LFS = enum.auto()
+    COMMIT = enum.auto()
+    WAIT = enum.auto()  # if no tasks are available but we don't want to exit
+JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
+class LargeUploadStatus:
+    """Contains information, queues and tasks for a large upload process."""
+    def __init__(self, items: List[JOB_ITEM_T]):
+        self.items = items
+        self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
+        self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
+        self.queue_preupload_lfs: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
+        self.queue_commit: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
+        self.lock = Lock()
+        self.nb_workers_sha256: int = 0
+        self.nb_workers_get_upload_mode: int = 0
+        self.nb_workers_preupload_lfs: int = 0
+        self.nb_workers_commit: int = 0
+        self.nb_workers_waiting: int = 0
+        self.last_commit_attempt: Optional[float] = None
+        self._started_at = datetime.now()
+        # Setup queues
+        for item in self.items:
+            paths, metadata = item
+            if metadata.sha256 is None:
+                self.queue_sha256.put(item)
+            elif metadata.upload_mode is None:
+                self.queue_get_upload_mode.put(item)
+            elif metadata.upload_mode == "lfs" and not metadata.is_uploaded:
+                self.queue_preupload_lfs.put(item)
+            elif not metadata.is_committed:
+                self.queue_commit.put(item)
+            else:
+                logger.debug(f"Skipping file {paths.path_in_repo} (already uploaded and committed)")
+    def current_report(self) -> str:
+        """Generate a report of the current status of the large upload."""
+        nb_hashed = 0
+        size_hashed = 0
+        nb_preuploaded = 0
+        nb_lfs = 0
+        nb_lfs_unsure = 0
+        size_preuploaded = 0
+        nb_committed = 0
+        size_committed = 0
+        total_size = 0
+        ignored_files = 0
+        total_files = 0
+        with self.lock:
+            for _, metadata in self.items:
+                if metadata.should_ignore:
+                    ignored_files += 1
+                    continue
+                total_size += metadata.size
+                total_files += 1
+                if metadata.sha256 is not None:
+                    nb_hashed += 1
+                    size_hashed += metadata.size
+                if metadata.upload_mode == "lfs":
+                    nb_lfs += 1
+                if metadata.upload_mode is None:
+                    nb_lfs_unsure += 1
+                if metadata.is_uploaded:
+                    nb_preuploaded += 1
+                    size_preuploaded += metadata.size
+                if metadata.is_committed:
+                    nb_committed += 1
+                    size_committed += metadata.size
+            total_size_str = _format_size(total_size)
+            now = datetime.now()
+            now_str = now.strftime("%Y-%m-%d %H:%M:%S")
+            elapsed = now - self._started_at
+            elapsed_str = str(elapsed).split(".")[0]  # remove milliseconds
+            message = "\n" + "-" * 10
+            message += f" {now_str} ({elapsed_str}) "
+            message += "-" * 10 + "\n"
+            message += "Files:   "
+            message += f"hashed {nb_hashed}/{total_files} ({_format_size(size_hashed)}/{total_size_str}) | "
+            message += f"pre-uploaded: {nb_preuploaded}/{nb_lfs} ({_format_size(size_preuploaded)}/{total_size_str})"
+            if nb_lfs_unsure > 0:
+                message += f" (+{nb_lfs_unsure} unsure)"
+            message += f" | committed: {nb_committed}/{total_files} ({_format_size(size_committed)}/{total_size_str})"
+            message += f" | ignored: {ignored_files}\n"
+            message += "Workers: "
+            message += f"hashing: {self.nb_workers_sha256} | "
+            message += f"get upload mode: {self.nb_workers_get_upload_mode} | "
+            message += f"pre-uploading: {self.nb_workers_preupload_lfs} | "
+            message += f"committing: {self.nb_workers_commit} | "
+            message += f"waiting: {self.nb_workers_waiting}\n"
+            message += "-" * 51
+            return message
+    def is_done(self) -> bool:
+        with self.lock:
+            return all(metadata.is_committed or metadata.should_ignore for _, metadata in self.items)
+def _worker_job(
+    status: LargeUploadStatus,
+    api: "HfApi",
+    repo_id: str,
+    repo_type: str,
+    revision: str,
+):
+    """
+    Main process for a worker. The worker will perform tasks based on the priority list until all files are uploaded
+    and committed. If no tasks are available, the worker will wait for 10 seconds before checking again.
+    If a task fails for any reason, the item(s) are put back in the queue for another worker to pick up.
+    Read `upload_large_folder` docstring for more information on how tasks are prioritized.
+    """
+    while True:
+        next_job: Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]] = None
+        # Determine next task
+        next_job = _determine_next_job(status)
+        if next_job is None:
+            return
+        job, items = next_job
+        # Perform task
+        if job == WorkerJob.SHA256:
+            item = items[0]  # single item
+            try:
+                _compute_sha256(item)
+                status.queue_get_upload_mode.put(item)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(f"Failed to compute sha256: {e}")
+                traceback.format_exc()
+                status.queue_sha256.put(item)
+            with status.lock:
+                status.nb_workers_sha256 -= 1
+        elif job == WorkerJob.GET_UPLOAD_MODE:
+            try:
+                _get_upload_mode(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(f"Failed to get upload mode: {e}")
+                traceback.format_exc()
+            # Items are either:
+            # - dropped (if should_ignore)
+            # - put in LFS queue (if LFS)
+            # - put in commit queue (if regular)
+            # - or put back (if error occurred).
+            for item in items:
+                _, metadata = item
+                if metadata.should_ignore:
+                    continue
+                if metadata.upload_mode == "lfs":
+                    status.queue_preupload_lfs.put(item)
+                elif metadata.upload_mode == "regular":
+                    status.queue_commit.put(item)
+                else:
+                    status.queue_get_upload_mode.put(item)
+            with status.lock:
+                status.nb_workers_get_upload_mode -= 1
+        elif job == WorkerJob.PREUPLOAD_LFS:
+            item = items[0]  # single item
+            try:
+                _preupload_lfs(item, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
+                status.queue_commit.put(item)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(f"Failed to preupload LFS: {e}")
+                traceback.format_exc()
+                status.queue_preupload_lfs.put(item)
+            with status.lock:
+                status.nb_workers_preupload_lfs -= 1
+        elif job == WorkerJob.COMMIT:
+            try:
+                _commit(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(f"Failed to commit: {e}")
+                traceback.format_exc()
+                for item in items:
+                    status.queue_commit.put(item)
+            with status.lock:
+                status.last_commit_attempt = time.time()
+                status.nb_workers_commit -= 1
+        elif job == WorkerJob.WAIT:
+            time.sleep(WAITING_TIME_IF_NO_TASKS)
+            with status.lock:
+                status.nb_workers_waiting -= 1
+def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]]:
+    with status.lock:
+        # 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file)
+        if (
+            status.nb_workers_commit == 0
+            and status.queue_commit.qsize() > 0
+            and status.last_commit_attempt is not None
+            and time.time() - status.last_commit_attempt > 5 * 60
+        ):
+            status.nb_workers_commit += 1
+            logger.debug("Job: commit (more than 5 minutes since last commit attempt)")
+            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+        # 2. Commit if at least 100 files are ready to commit
+        elif status.nb_workers_commit == 0 and status.queue_commit.qsize() >= 150:
+            status.nb_workers_commit += 1
+            logger.debug("Job: commit (>100 files ready)")
+            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+        # 3. Get upload mode if at least 10 files
+        elif status.queue_get_upload_mode.qsize() >= 10:
+            status.nb_workers_get_upload_mode += 1
+            logger.debug("Job: get upload mode (>10 files ready)")
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+        # 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
+        elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
+            status.nb_workers_preupload_lfs += 1
+            logger.debug("Job: preupload LFS (no other worker preuploading LFS)")
+            return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
+        # 5. Compute sha256 if at least 1 file and no worker is computing sha256
+        elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0:
+            status.nb_workers_sha256 += 1
+            logger.debug("Job: sha256 (no other worker computing sha256)")
+            return (WorkerJob.SHA256, _get_one(status.queue_sha256))
+        # 6. Get upload mode if at least 1 file and no worker is getting upload mode
+        elif status.queue_get_upload_mode.qsize() > 0 and status.nb_workers_get_upload_mode == 0:
+            status.nb_workers_get_upload_mode += 1
+            logger.debug("Job: get upload mode (no other worker getting upload mode)")
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+        # 7. Preupload LFS file if at least 1 file
+        #    Skip if hf_transfer is enabled and there is already a worker preuploading LFS
+        elif status.queue_preupload_lfs.qsize() > 0 and (
+            status.nb_workers_preupload_lfs == 0 or not constants.HF_HUB_ENABLE_HF_TRANSFER
+        ):
+            status.nb_workers_preupload_lfs += 1
+            logger.debug("Job: preupload LFS")
+            return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
+        # 8. Compute sha256 if at least 1 file
+        elif status.queue_sha256.qsize() > 0:
+            status.nb_workers_sha256 += 1
+            logger.debug("Job: sha256")
+            return (WorkerJob.SHA256, _get_one(status.queue_sha256))
+        # 9. Get upload mode if at least 1 file
+        elif status.queue_get_upload_mode.qsize() > 0:
+            status.nb_workers_get_upload_mode += 1
+            logger.debug("Job: get upload mode")
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+        # 10. Commit if at least 1 file and 1 min since last commit attempt
+        elif (
+            status.nb_workers_commit == 0
+            and status.queue_commit.qsize() > 0
+            and status.last_commit_attempt is not None
+            and time.time() - status.last_commit_attempt > 1 * 60
+        ):
+            status.nb_workers_commit += 1
+            logger.debug("Job: commit (1 min since last commit attempt)")
+            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+        # 11. Commit if at least 1 file all other queues are empty and all workers are waiting
+        #     e.g. when it's the last commit
+        elif (
+            status.nb_workers_commit == 0
+            and status.queue_commit.qsize() > 0
+            and status.queue_sha256.qsize() == 0
+            and status.queue_get_upload_mode.qsize() == 0
+            and status.queue_preupload_lfs.qsize() == 0
+            and status.nb_workers_sha256 == 0
+            and status.nb_workers_get_upload_mode == 0
+            and status.nb_workers_preupload_lfs == 0
+        ):
+            status.nb_workers_commit += 1
+            logger.debug("Job: commit")
+            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+        # 12. If all queues are empty, exit
+        elif all(metadata.is_committed or metadata.should_ignore for _, metadata in status.items):
+            logger.info("All files have been processed! Exiting worker.")
+            return None
+        # 13. If no task is available, wait
+        else:
+            status.nb_workers_waiting += 1
+            logger.debug(f"No task available, waiting... ({WAITING_TIME_IF_NO_TASKS}s)")
+            return (WorkerJob.WAIT, [])
+####################
+# Atomic jobs (sha256, get_upload_mode, preupload_lfs, commit)
+####################
+def _compute_sha256(item: JOB_ITEM_T) -> None:
+    """Compute sha256 of a file and save it in metadata."""
+    paths, metadata = item
+    if metadata.sha256 is None:
+        with paths.file_path.open("rb") as f:
+            metadata.sha256 = sha_fileobj(f).hex()
+    metadata.save(paths)
+def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+    """Get upload mode for each file and update metadata.
+    Also receive info if the file should be ignored.
+    """
+    additions = [_build_hacky_operation(item) for item in items]
+    _fetch_upload_modes(
+        additions=additions,
+        repo_type=repo_type,
+        repo_id=repo_id,
+        headers=api._build_hf_headers(),
+        revision=revision,
+    )
+    for item, addition in zip(items, additions):
+        paths, metadata = item
+        metadata.upload_mode = addition._upload_mode
+        metadata.should_ignore = addition._should_ignore
+        metadata.save(paths)
+def _preupload_lfs(item: JOB_ITEM_T, api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+    """Preupload LFS file and update metadata."""
+    paths, metadata = item
+    addition = _build_hacky_operation(item)
+    api.preupload_lfs_files(
+        repo_id=repo_id,
+        repo_type=repo_type,
+        revision=revision,
+        additions=[addition],
+    )
+    metadata.is_uploaded = True
+    metadata.save(paths)
+def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+    """Commit files to the repo."""
+    additions = [_build_hacky_operation(item) for item in items]
+    api.create_commit(
+        repo_id=repo_id,
+        repo_type=repo_type,
+        revision=revision,
+        operations=additions,
+        commit_message="Add files using upload-large-folder tool",
+    )
+    for paths, metadata in items:
+        metadata.is_committed = True
+        metadata.save(paths)
+####################
+# Hacks with CommitOperationAdd to bypass checks/sha256 calculation
+####################
+class HackyCommitOperationAdd(CommitOperationAdd):
+    def __post_init__(self) -> None:
+        if isinstance(self.path_or_fileobj, Path):
+            self.path_or_fileobj = str(self.path_or_fileobj)
+def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd:
+    paths, metadata = item
+    operation = HackyCommitOperationAdd(path_in_repo=paths.path_in_repo, path_or_fileobj=paths.file_path)
+    with paths.file_path.open("rb") as file:
+        sample = file.peek(512)[:512]
+    if metadata.sha256 is None:
+        raise ValueError("sha256 must have been computed by now!")
+    operation.upload_info = UploadInfo(sha256=bytes.fromhex(metadata.sha256), size=metadata.size, sample=sample)
+    return operation
+####################
+# Misc helpers
+####################
+def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
+    return [queue.get()]
+def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> List[JOB_ITEM_T]:
+    return [queue.get() for _ in range(min(queue.qsize(), n))]
+def _get_items_to_commit(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
+    """Special case for commit job: the number of items to commit depends on the type of files."""
+    # Can take at most 50 regular files and/or 100 LFS files in a single commit
+    items: List[JOB_ITEM_T] = []
+    nb_lfs, nb_regular = 0, 0
+    while True:
+        # If empty queue => commit everything
+        if queue.qsize() == 0:
+            return items
+        # If we have enough items => commit them
+        if nb_lfs >= MAX_NB_LFS_FILES_PER_COMMIT or nb_regular >= MAX_NB_REGULAR_FILES_PER_COMMIT:
+            return items
+        # Else, get a new item and increase counter
+        item = queue.get()
+        items.append(item)
+        _, metadata = item
+        if metadata.upload_mode == "lfs":
+            nb_lfs += 1
+        else:
+            nb_regular += 1
+def _print_overwrite(report: str) -> None:
+    """Print a report, overwriting the previous lines.
+    Since tqdm in using `sys.stderr` to (re-)write progress bars, we need to use `sys.stdout`
+    to print the report.
+    Note: works well only if no other process is writing to `sys.stdout`!
+    """
+    report += "\n"
+    # Get terminal width
+    terminal_width = shutil.get_terminal_size().columns
+    # Count number of lines that should be cleared
+    nb_lines = sum(len(line) // terminal_width + 1 for line in report.splitlines())
+    # Clear previous lines based on the number of lines in the report
+    for _ in range(nb_lines):
+        sys.stdout.write("\r\033[K")  # Clear line
+        sys.stdout.write("\033[F")  # Move cursor up one line
+    # Print the new report, filling remaining space with whitespace
+    sys.stdout.write(report)
+    sys.stdout.write(" " * (terminal_width - len(report.splitlines()[-1])))
+    sys.stdout.flush()

.venv/lib/python3.11/site-packages/huggingface_hub/_webhooks_payload.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# coding=utf-8
+# Copyright 2023-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains data structures to parse the webhooks payload."""
+from typing import List, Literal, Optional
+from .utils import is_pydantic_available
+if is_pydantic_available():
+    from pydantic import BaseModel
+else:
+    # Define a dummy BaseModel to avoid import errors when pydantic is not installed
+    # Import error will be raised when trying to use the class
+    class BaseModel:  # type: ignore [no-redef]
+        def __init__(self, *args, **kwargs) -> None:
+            raise ImportError(
+                "You must have `pydantic` installed to use `WebhookPayload`. This is an optional dependency that"
+                " should be installed separately. Please run `pip install --upgrade pydantic` and retry."
+            )
+# This is an adaptation of the ReportV3 interface implemented in moon-landing. V0, V1 and V2 have been ignored as they
+# are not in used anymore. To keep in sync when format is updated in
+# https://github.com/huggingface/moon-landing/blob/main/server/lib/HFWebhooks.ts (internal link).
+WebhookEvent_T = Literal[
+    "create",
+    "delete",
+    "move",
+    "update",
+]
+RepoChangeEvent_T = Literal[
+    "add",
+    "move",
+    "remove",
+    "update",
+]
+RepoType_T = Literal[
+    "dataset",
+    "model",
+    "space",
+]
+DiscussionStatus_T = Literal[
+    "closed",
+    "draft",
+    "open",
+    "merged",
+]
+SupportedWebhookVersion = Literal[3]
+class ObjectId(BaseModel):
+    id: str
+class WebhookPayloadUrl(BaseModel):
+    web: str
+    api: Optional[str] = None
+class WebhookPayloadMovedTo(BaseModel):
+    name: str
+    owner: ObjectId
+class WebhookPayloadWebhook(ObjectId):
+    version: SupportedWebhookVersion
+class WebhookPayloadEvent(BaseModel):
+    action: WebhookEvent_T
+    scope: str
+class WebhookPayloadDiscussionChanges(BaseModel):
+    base: str
+    mergeCommitId: Optional[str] = None
+class WebhookPayloadComment(ObjectId):
+    author: ObjectId
+    hidden: bool
+    content: Optional[str] = None
+    url: WebhookPayloadUrl
+class WebhookPayloadDiscussion(ObjectId):
+    num: int
+    author: ObjectId
+    url: WebhookPayloadUrl
+    title: str
+    isPullRequest: bool
+    status: DiscussionStatus_T
+    changes: Optional[WebhookPayloadDiscussionChanges] = None
+    pinned: Optional[bool] = None
+class WebhookPayloadRepo(ObjectId):
+    owner: ObjectId
+    head_sha: Optional[str] = None
+    name: str
+    private: bool
+    subdomain: Optional[str] = None
+    tags: Optional[List[str]] = None
+    type: Literal["dataset", "model", "space"]
+    url: WebhookPayloadUrl
+class WebhookPayloadUpdatedRef(BaseModel):
+    ref: str
+    oldSha: Optional[str] = None
+    newSha: Optional[str] = None
+class WebhookPayload(BaseModel):
+    event: WebhookPayloadEvent
+    repo: WebhookPayloadRepo
+    discussion: Optional[WebhookPayloadDiscussion] = None
+    comment: Optional[WebhookPayloadComment] = None
+    webhook: WebhookPayloadWebhook
+    movedTo: Optional[WebhookPayloadMovedTo] = None
+    updatedRefs: Optional[List[WebhookPayloadUpdatedRef]] = None

.venv/lib/python3.11/site-packages/huggingface_hub/_webhooks_server.py ADDED Viewed

	@@ -0,0 +1,386 @@

+# coding=utf-8
+# Copyright 2023-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains `WebhooksServer` and `webhook_endpoint` to create a webhook server easily."""
+import atexit
+import inspect
+import os
+from functools import wraps
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
+from .utils import experimental, is_fastapi_available, is_gradio_available
+if TYPE_CHECKING:
+    import gradio as gr
+    from fastapi import Request
+if is_fastapi_available():
+    from fastapi import FastAPI, Request
+    from fastapi.responses import JSONResponse
+else:
+    # Will fail at runtime if FastAPI is not available
+    FastAPI = Request = JSONResponse = None  # type: ignore [misc, assignment]
+_global_app: Optional["WebhooksServer"] = None
+_is_local = os.environ.get("SPACE_ID") is None
+@experimental
+class WebhooksServer:
+    """
+    The [`WebhooksServer`] class lets you create an instance of a Gradio app that can receive Huggingface webhooks.
+    These webhooks can be registered using the [`~WebhooksServer.add_webhook`] decorator. Webhook endpoints are added to
+    the app as a POST endpoint to the FastAPI router. Once all the webhooks are registered, the `launch` method has to be
+    called to start the app.
+    It is recommended to accept [`WebhookPayload`] as the first argument of the webhook function. It is a Pydantic
+    model that contains all the information about the webhook event. The data will be parsed automatically for you.
+    Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
+    WebhooksServer and deploy it on a Space.
+    <Tip warning={true}>
+    `WebhooksServer` is experimental. Its API is subject to change in the future.
+    </Tip>
+    <Tip warning={true}>
+    You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
+    </Tip>
+    Args:
+        ui (`gradio.Blocks`, optional):
+            A Gradio UI instance to be used as the Space landing page. If `None`, a UI displaying instructions
+            about the configured webhooks is created.
+        webhook_secret (`str`, optional):
+            A secret key to verify incoming webhook requests. You can set this value to any secret you want as long as
+            you also configure it in your [webhooks settings panel](https://huggingface.co/settings/webhooks). You
+            can also set this value as the `WEBHOOK_SECRET` environment variable. If no secret is provided, the
+            webhook endpoints are opened without any security.
+    Example:
+        ```python
+        import gradio as gr
+        from huggingface_hub import WebhooksServer, WebhookPayload
+        with gr.Blocks() as ui:
+            ...
+        app = WebhooksServer(ui=ui, webhook_secret="my_secret_key")
+        @app.add_webhook("/say_hello")
+        async def hello(payload: WebhookPayload):
+            return {"message": "hello"}
+        app.launch()
+        ```
+    """
+    def __new__(cls, *args, **kwargs) -> "WebhooksServer":
+        if not is_gradio_available():
+            raise ImportError(
+                "You must have `gradio` installed to use `WebhooksServer`. Please run `pip install --upgrade gradio`"
+                " first."
+            )
+        if not is_fastapi_available():
+            raise ImportError(
+                "You must have `fastapi` installed to use `WebhooksServer`. Please run `pip install --upgrade fastapi`"
+                " first."
+            )
+        return super().__new__(cls)
+    def __init__(
+        self,
+        ui: Optional["gr.Blocks"] = None,
+        webhook_secret: Optional[str] = None,
+    ) -> None:
+        self._ui = ui
+        self.webhook_secret = webhook_secret or os.getenv("WEBHOOK_SECRET")
+        self.registered_webhooks: Dict[str, Callable] = {}
+        _warn_on_empty_secret(self.webhook_secret)
+    def add_webhook(self, path: Optional[str] = None) -> Callable:
+        """
+        Decorator to add a webhook to the [`WebhooksServer`] server.
+        Args:
+            path (`str`, optional):
+                The URL path to register the webhook function. If not provided, the function name will be used as the
+                path. In any case, all webhooks are registered under `/webhooks`.
+        Raises:
+            ValueError: If the provided path is already registered as a webhook.
+        Example:
+            ```python
+            from huggingface_hub import WebhooksServer, WebhookPayload
+            app = WebhooksServer()
+            @app.add_webhook
+            async def trigger_training(payload: WebhookPayload):
+                if payload.repo.type == "dataset" and payload.event.action == "update":
+                    # Trigger a training job if a dataset is updated
+                    ...
+            app.launch()
+        ```
+        """
+        # Usage: directly as decorator. Example: `@app.add_webhook`
+        if callable(path):
+            # If path is a function, it means it was used as a decorator without arguments
+            return self.add_webhook()(path)
+        # Usage: provide a path. Example: `@app.add_webhook(...)`
+        @wraps(FastAPI.post)
+        def _inner_post(*args, **kwargs):
+            func = args[0]
+            abs_path = f"/webhooks/{(path or func.__name__).strip('/')}"
+            if abs_path in self.registered_webhooks:
+                raise ValueError(f"Webhook {abs_path} already exists.")
+            self.registered_webhooks[abs_path] = func
+        return _inner_post
+    def launch(self, prevent_thread_lock: bool = False, **launch_kwargs: Any) -> None:
+        """Launch the Gradio app and register webhooks to the underlying FastAPI server.
+        Input parameters are forwarded to Gradio when launching the app.
+        """
+        ui = self._ui or self._get_default_ui()
+        # Start Gradio App
+        #   - as non-blocking so that webhooks can be added afterwards
+        #   - as shared if launch locally (to debug webhooks)
+        launch_kwargs.setdefault("share", _is_local)
+        self.fastapi_app, _, _ = ui.launch(prevent_thread_lock=True, **launch_kwargs)
+        # Register webhooks to FastAPI app
+        for path, func in self.registered_webhooks.items():
+            # Add secret check if required
+            if self.webhook_secret is not None:
+                func = _wrap_webhook_to_check_secret(func, webhook_secret=self.webhook_secret)
+            # Add route to FastAPI app
+            self.fastapi_app.post(path)(func)
+        # Print instructions and block main thread
+        space_host = os.environ.get("SPACE_HOST")
+        url = "https://" + space_host if space_host is not None else (ui.share_url or ui.local_url)
+        url = url.strip("/")
+        message = "\nWebhooks are correctly setup and ready to use:"
+        message += "\n" + "\n".join(f"  - POST {url}{webhook}" for webhook in self.registered_webhooks)
+        message += "\nGo to https://huggingface.co/settings/webhooks to setup your webhooks."
+        print(message)
+        if not prevent_thread_lock:
+            ui.block_thread()
+    def _get_default_ui(self) -> "gr.Blocks":
+        """Default UI if not provided (lists webhooks and provides basic instructions)."""
+        import gradio as gr
+        with gr.Blocks() as ui:
+            gr.Markdown("# This is an app to process 🤗 Webhooks")
+            gr.Markdown(
+                "Webhooks are a foundation for MLOps-related features. They allow you to listen for new changes on"
+                " specific repos or to all repos belonging to particular set of users/organizations (not just your"
+                " repos, but any repo). Check out this [guide](https://huggingface.co/docs/hub/webhooks) to get to"
+                " know more about webhooks on the Huggingface Hub."
+            )
+            gr.Markdown(
+                f"{len(self.registered_webhooks)} webhook(s) are registered:"
+                + "\n\n"
+                + "\n ".join(
+                    f"- [{webhook_path}]({_get_webhook_doc_url(webhook.__name__, webhook_path)})"
+                    for webhook_path, webhook in self.registered_webhooks.items()
+                )
+            )
+            gr.Markdown(
+                "Go to https://huggingface.co/settings/webhooks to setup your webhooks."
+                + "\nYou app is running locally. Please look at the logs to check the full URL you need to set."
+                if _is_local
+                else (
+                    "\nThis app is running on a Space. You can find the corresponding URL in the options menu"
+                    " (top-right) > 'Embed the Space'. The URL looks like 'https://{username}-{repo_name}.hf.space'."
+                )
+            )
+        return ui
+@experimental
+def webhook_endpoint(path: Optional[str] = None) -> Callable:
+    """Decorator to start a [`WebhooksServer`] and register the decorated function as a webhook endpoint.
+    This is a helper to get started quickly. If you need more flexibility (custom landing page or webhook secret),
+    you can use [`WebhooksServer`] directly. You can register multiple webhook endpoints (to the same server) by using
+    this decorator multiple times.
+    Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
+    server and deploy it on a Space.
+    <Tip warning={true}>
+    `webhook_endpoint` is experimental. Its API is subject to change in the future.
+    </Tip>
+    <Tip warning={true}>
+    You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
+    </Tip>
+    Args:
+        path (`str`, optional):
+            The URL path to register the webhook function. If not provided, the function name will be used as the path.
+            In any case, all webhooks are registered under `/webhooks`.
+    Examples:
+        The default usage is to register a function as a webhook endpoint. The function name will be used as the path.
+        The server will be started automatically at exit (i.e. at the end of the script).
+        ```python
+        from huggingface_hub import webhook_endpoint, WebhookPayload
+        @webhook_endpoint
+        async def trigger_training(payload: WebhookPayload):
+            if payload.repo.type == "dataset" and payload.event.action == "update":
+                # Trigger a training job if a dataset is updated
+                ...
+        # Server is automatically started at the end of the script.
+        ```
+        Advanced usage: register a function as a webhook endpoint and start the server manually. This is useful if you
+        are running it in a notebook.
+        ```python
+        from huggingface_hub import webhook_endpoint, WebhookPayload
+        @webhook_endpoint
+        async def trigger_training(payload: WebhookPayload):
+            if payload.repo.type == "dataset" and payload.event.action == "update":
+                # Trigger a training job if a dataset is updated
+                ...
+        # Start the server manually
+        trigger_training.launch()
+        ```
+    """
+    if callable(path):
+        # If path is a function, it means it was used as a decorator without arguments
+        return webhook_endpoint()(path)
+    @wraps(WebhooksServer.add_webhook)
+    def _inner(func: Callable) -> Callable:
+        app = _get_global_app()
+        app.add_webhook(path)(func)
+        if len(app.registered_webhooks) == 1:
+            # Register `app.launch` to run at exit (only once)
+            atexit.register(app.launch)
+        @wraps(app.launch)
+        def _launch_now():
+            # Run the app directly (without waiting atexit)
+            atexit.unregister(app.launch)
+            app.launch()
+        func.launch = _launch_now  # type: ignore
+        return func
+    return _inner
+def _get_global_app() -> WebhooksServer:
+    global _global_app
+    if _global_app is None:
+        _global_app = WebhooksServer()
+    return _global_app
+def _warn_on_empty_secret(webhook_secret: Optional[str]) -> None:
+    if webhook_secret is None:
+        print("Webhook secret is not defined. This means your webhook endpoints will be open to everyone.")
+        print(
+            "To add a secret, set `WEBHOOK_SECRET` as environment variable or pass it at initialization: "
+            "\n\t`app = WebhooksServer(webhook_secret='my_secret', ...)`"
+        )
+        print(
+            "For more details about webhook secrets, please refer to"
+            " https://huggingface.co/docs/hub/webhooks#webhook-secret."
+        )
+    else:
+        print("Webhook secret is correctly defined.")
+def _get_webhook_doc_url(webhook_name: str, webhook_path: str) -> str:
+    """Returns the anchor to a given webhook in the docs (experimental)"""
+    return "/docs#/default/" + webhook_name + webhook_path.replace("/", "_") + "_post"
+def _wrap_webhook_to_check_secret(func: Callable, webhook_secret: str) -> Callable:
+    """Wraps a webhook function to check the webhook secret before calling the function.
+    This is a hacky way to add the `request` parameter to the function signature. Since FastAPI based itself on route
+    parameters to inject the values to the function, we need to hack the function signature to retrieve the `Request`
+    object (and hence the headers). A far cleaner solution would be to use a middleware. However, since
+    `fastapi==0.90.1`, a middleware cannot be added once the app has started. And since the FastAPI app is started by
+    Gradio internals (and not by us), we cannot add a middleware.
+    This method is called only when a secret has been defined by the user. If a request is sent without the
+    "x-webhook-secret", the function will return a 401 error (unauthorized). If the header is sent but is incorrect,
+    the function will return a 403 error (forbidden).
+    Inspired by https://stackoverflow.com/a/33112180.
+    """
+    initial_sig = inspect.signature(func)
+    @wraps(func)
+    async def _protected_func(request: Request, **kwargs):
+        request_secret = request.headers.get("x-webhook-secret")
+        if request_secret is None:
+            return JSONResponse({"error": "x-webhook-secret header not set."}, status_code=401)
+        if request_secret != webhook_secret:
+            return JSONResponse({"error": "Invalid webhook secret."}, status_code=403)
+        # Inject `request` in kwargs if required
+        if "request" in initial_sig.parameters:
+            kwargs["request"] = request
+        # Handle both sync and async routes
+        if inspect.iscoroutinefunction(func):
+            return await func(**kwargs)
+        else:
+            return func(**kwargs)
+    # Update signature to include request
+    if "request" not in initial_sig.parameters:
+        _protected_func.__signature__ = initial_sig.replace(  # type: ignore
+            parameters=(
+                inspect.Parameter(name="request", kind=inspect.Parameter.POSITIONAL_OR_KEYWORD, annotation=Request),
+            )
+            + tuple(initial_sig.parameters.values())
+        )
+    # Return protected route
+    return _protected_func

.venv/lib/python3.11/site-packages/huggingface_hub/community.py ADDED Viewed

	@@ -0,0 +1,355 @@

+"""
+Data structures to interact with Discussions and Pull Requests on the Hub.
+See [the Discussions and Pull Requests guide](https://huggingface.co/docs/hub/repositories-pull-requests-discussions)
+for more information on Pull Requests, Discussions, and the community tab.
+"""
+from dataclasses import dataclass
+from datetime import datetime
+from typing import List, Literal, Optional, Union
+from . import constants
+from .utils import parse_datetime
+DiscussionStatus = Literal["open", "closed", "merged", "draft"]
+@dataclass
+class Discussion:
+    """
+    A Discussion or Pull Request on the Hub.
+    This dataclass is not intended to be instantiated directly.
+    Attributes:
+        title (`str`):
+            The title of the Discussion / Pull Request
+        status (`str`):
+            The status of the Discussion / Pull Request.
+            It must be one of:
+                * `"open"`
+                * `"closed"`
+                * `"merged"` (only for Pull Requests )
+                * `"draft"` (only for Pull Requests )
+        num (`int`):
+            The number of the Discussion / Pull Request.
+        repo_id (`str`):
+            The id (`"{namespace}/{repo_name}"`) of the repo on which
+            the Discussion / Pull Request was open.
+        repo_type (`str`):
+            The type of the repo on which the Discussion / Pull Request was open.
+            Possible values are: `"model"`, `"dataset"`, `"space"`.
+        author (`str`):
+            The username of the Discussion / Pull Request author.
+            Can be `"deleted"` if the user has been deleted since.
+        is_pull_request (`bool`):
+            Whether or not this is a Pull Request.
+        created_at (`datetime`):
+            The `datetime` of creation of the Discussion / Pull Request.
+        endpoint (`str`):
+            Endpoint of the Hub. Default is https://huggingface.co.
+        git_reference (`str`, *optional*):
+            (property) Git reference to which changes can be pushed if this is a Pull Request, `None` otherwise.
+        url (`str`):
+            (property) URL of the discussion on the Hub.
+    """
+    title: str
+    status: DiscussionStatus
+    num: int
+    repo_id: str
+    repo_type: str
+    author: str
+    is_pull_request: bool
+    created_at: datetime
+    endpoint: str
+    @property
+    def git_reference(self) -> Optional[str]:
+        """
+        If this is a Pull Request , returns the git reference to which changes can be pushed.
+        Returns `None` otherwise.
+        """
+        if self.is_pull_request:
+            return f"refs/pr/{self.num}"
+        return None
+    @property
+    def url(self) -> str:
+        """Returns the URL of the discussion on the Hub."""
+        if self.repo_type is None or self.repo_type == constants.REPO_TYPE_MODEL:
+            return f"{self.endpoint}/{self.repo_id}/discussions/{self.num}"
+        return f"{self.endpoint}/{self.repo_type}s/{self.repo_id}/discussions/{self.num}"
+@dataclass
+class DiscussionWithDetails(Discussion):
+    """
+    Subclass of [`Discussion`].
+    Attributes:
+        title (`str`):
+            The title of the Discussion / Pull Request
+        status (`str`):
+            The status of the Discussion / Pull Request.
+            It can be one of:
+                * `"open"`
+                * `"closed"`
+                * `"merged"` (only for Pull Requests )
+                * `"draft"` (only for Pull Requests )
+        num (`int`):
+            The number of the Discussion / Pull Request.
+        repo_id (`str`):
+            The id (`"{namespace}/{repo_name}"`) of the repo on which
+            the Discussion / Pull Request was open.
+        repo_type (`str`):
+            The type of the repo on which the Discussion / Pull Request was open.
+            Possible values are: `"model"`, `"dataset"`, `"space"`.
+        author (`str`):
+            The username of the Discussion / Pull Request author.
+            Can be `"deleted"` if the user has been deleted since.
+        is_pull_request (`bool`):
+            Whether or not this is a Pull Request.
+        created_at (`datetime`):
+            The `datetime` of creation of the Discussion / Pull Request.
+        events (`list` of [`DiscussionEvent`])
+            The list of [`DiscussionEvents`] in this Discussion or Pull Request.
+        conflicting_files (`Union[List[str], bool, None]`, *optional*):
+            A list of conflicting files if this is a Pull Request.
+            `None` if `self.is_pull_request` is `False`.
+            `True` if there are conflicting files but the list can't be retrieved.
+        target_branch (`str`, *optional*):
+            The branch into which changes are to be merged if this is a
+            Pull Request . `None`  if `self.is_pull_request` is `False`.
+        merge_commit_oid (`str`, *optional*):
+            If this is a merged Pull Request , this is set to the OID / SHA of
+            the merge commit, `None` otherwise.
+        diff (`str`, *optional*):
+            The git diff if this is a Pull Request , `None` otherwise.
+        endpoint (`str`):
+            Endpoint of the Hub. Default is https://huggingface.co.
+        git_reference (`str`, *optional*):
+            (property) Git reference to which changes can be pushed if this is a Pull Request, `None` otherwise.
+        url (`str`):
+            (property) URL of the discussion on the Hub.
+    """
+    events: List["DiscussionEvent"]
+    conflicting_files: Union[List[str], bool, None]
+    target_branch: Optional[str]
+    merge_commit_oid: Optional[str]
+    diff: Optional[str]
+@dataclass
+class DiscussionEvent:
+    """
+    An event in a Discussion or Pull Request.
+    Use concrete classes:
+        * [`DiscussionComment`]
+        * [`DiscussionStatusChange`]
+        * [`DiscussionCommit`]
+        * [`DiscussionTitleChange`]
+    Attributes:
+        id (`str`):
+            The ID of the event. An hexadecimal string.
+        type (`str`):
+            The type of the event.
+        created_at (`datetime`):
+            A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime)
+            object holding the creation timestamp for the event.
+        author (`str`):
+            The username of the Discussion / Pull Request author.
+            Can be `"deleted"` if the user has been deleted since.
+    """
+    id: str
+    type: str
+    created_at: datetime
+    author: str
+    _event: dict
+    """Stores the original event data, in case we need to access it later."""
+@dataclass
+class DiscussionComment(DiscussionEvent):
+    """A comment in a Discussion / Pull Request.
+    Subclass of [`DiscussionEvent`].
+    Attributes:
+        id (`str`):
+            The ID of the event. An hexadecimal string.
+        type (`str`):
+            The type of the event.
+        created_at (`datetime`):
+            A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime)
+            object holding the creation timestamp for the event.
+        author (`str`):
+            The username of the Discussion / Pull Request author.
+            Can be `"deleted"` if the user has been deleted since.
+        content (`str`):
+            The raw markdown content of the comment. Mentions, links and images are not rendered.
+        edited (`bool`):
+            Whether or not this comment has been edited.
+        hidden (`bool`):
+            Whether or not this comment has been hidden.
+    """
+    content: str
+    edited: bool
+    hidden: bool
+    @property
+    def rendered(self) -> str:
+        """The rendered comment, as a HTML string"""
+        return self._event["data"]["latest"]["html"]
+    @property
+    def last_edited_at(self) -> datetime:
+        """The last edit time, as a `datetime` object."""
+        return parse_datetime(self._event["data"]["latest"]["updatedAt"])
+    @property
+    def last_edited_by(self) -> str:
+        """The last edit time, as a `datetime` object."""
+        return self._event["data"]["latest"].get("author", {}).get("name", "deleted")
+    @property
+    def edit_history(self) -> List[dict]:
+        """The edit history of the comment"""
+        return self._event["data"]["history"]
+    @property
+    def number_of_edits(self) -> int:
+        return len(self.edit_history)
+@dataclass
+class DiscussionStatusChange(DiscussionEvent):
+    """A change of status in a Discussion / Pull Request.
+    Subclass of [`DiscussionEvent`].
+    Attributes:
+        id (`str`):
+            The ID of the event. An hexadecimal string.
+        type (`str`):
+            The type of the event.
+        created_at (`datetime`):
+            A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime)
+            object holding the creation timestamp for the event.
+        author (`str`):
+            The username of the Discussion / Pull Request author.
+            Can be `"deleted"` if the user has been deleted since.
+        new_status (`str`):
+            The status of the Discussion / Pull Request after the change.
+            It can be one of:
+                * `"open"`
+                * `"closed"`
+                * `"merged"` (only for Pull Requests )
+    """
+    new_status: str
+@dataclass
+class DiscussionCommit(DiscussionEvent):
+    """A commit in a Pull Request.
+    Subclass of [`DiscussionEvent`].
+    Attributes:
+        id (`str`):
+            The ID of the event. An hexadecimal string.
+        type (`str`):
+            The type of the event.
+        created_at (`datetime`):
+            A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime)
+            object holding the creation timestamp for the event.
+        author (`str`):
+            The username of the Discussion / Pull Request author.
+            Can be `"deleted"` if the user has been deleted since.
+        summary (`str`):
+            The summary of the commit.
+        oid (`str`):
+            The OID / SHA of the commit, as a hexadecimal string.
+    """
+    summary: str
+    oid: str
+@dataclass
+class DiscussionTitleChange(DiscussionEvent):
+    """A rename event in a Discussion / Pull Request.
+    Subclass of [`DiscussionEvent`].
+    Attributes:
+        id (`str`):
+            The ID of the event. An hexadecimal string.
+        type (`str`):
+            The type of the event.
+        created_at (`datetime`):
+            A [`datetime`](https://docs.python.org/3/library/datetime.html?highlight=datetime#datetime.datetime)
+            object holding the creation timestamp for the event.
+        author (`str`):
+            The username of the Discussion / Pull Request author.
+            Can be `"deleted"` if the user has been deleted since.
+        old_title (`str`):
+            The previous title for the Discussion / Pull Request.
+        new_title (`str`):
+            The new title.
+    """
+    old_title: str
+    new_title: str
+def deserialize_event(event: dict) -> DiscussionEvent:
+    """Instantiates a [`DiscussionEvent`] from a dict"""
+    event_id: str = event["id"]
+    event_type: str = event["type"]
+    created_at = parse_datetime(event["createdAt"])
+    common_args = dict(
+        id=event_id,
+        type=event_type,
+        created_at=created_at,
+        author=event.get("author", {}).get("name", "deleted"),
+        _event=event,
+    )
+    if event_type == "comment":
+        return DiscussionComment(
+            **common_args,
+            edited=event["data"]["edited"],
+            hidden=event["data"]["hidden"],
+            content=event["data"]["latest"]["raw"],
+        )
+    if event_type == "status-change":
+        return DiscussionStatusChange(
+            **common_args,
+            new_status=event["data"]["status"],
+        )
+    if event_type == "commit":
+        return DiscussionCommit(
+            **common_args,
+            summary=event["data"]["subject"],
+            oid=event["data"]["oid"],
+        )
+    if event_type == "title-change":
+        return DiscussionTitleChange(
+            **common_args,
+            old_title=event["data"]["from"],
+            new_title=event["data"]["to"],
+        )
+    return DiscussionEvent(**common_args)

.venv/lib/python3.11/site-packages/huggingface_hub/constants.py ADDED Viewed

	@@ -0,0 +1,229 @@

+import os
+import re
+import typing
+from typing import Literal, Optional, Tuple
+# Possible values for env variables
+ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
+ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
+def _is_true(value: Optional[str]) -> bool:
+    if value is None:
+        return False
+    return value.upper() in ENV_VARS_TRUE_VALUES
+def _as_int(value: Optional[str]) -> Optional[int]:
+    if value is None:
+        return None
+    return int(value)
+# Constants for file downloads
+PYTORCH_WEIGHTS_NAME = "pytorch_model.bin"
+TF2_WEIGHTS_NAME = "tf_model.h5"
+TF_WEIGHTS_NAME = "model.ckpt"
+FLAX_WEIGHTS_NAME = "flax_model.msgpack"
+CONFIG_NAME = "config.json"
+REPOCARD_NAME = "README.md"
+DEFAULT_ETAG_TIMEOUT = 10
+DEFAULT_DOWNLOAD_TIMEOUT = 10
+DEFAULT_REQUEST_TIMEOUT = 10
+DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024
+HF_TRANSFER_CONCURRENCY = 100
+# Constants for serialization
+PYTORCH_WEIGHTS_FILE_PATTERN = "pytorch_model{suffix}.bin"  # Unsafe pickle: use safetensors instead
+SAFETENSORS_WEIGHTS_FILE_PATTERN = "model{suffix}.safetensors"
+TF2_WEIGHTS_FILE_PATTERN = "tf_model{suffix}.h5"
+# Constants for safetensors repos
+SAFETENSORS_SINGLE_FILE = "model.safetensors"
+SAFETENSORS_INDEX_FILE = "model.safetensors.index.json"
+SAFETENSORS_MAX_HEADER_LENGTH = 25_000_000
+# Timeout of aquiring file lock and logging the attempt
+FILELOCK_LOG_EVERY_SECONDS = 10
+# Git-related constants
+DEFAULT_REVISION = "main"
+REGEX_COMMIT_OID = re.compile(r"[A-Fa-f0-9]{5,40}")
+HUGGINGFACE_CO_URL_HOME = "https://huggingface.co/"
+_staging_mode = _is_true(os.environ.get("HUGGINGFACE_CO_STAGING"))
+_HF_DEFAULT_ENDPOINT = "https://huggingface.co"
+_HF_DEFAULT_STAGING_ENDPOINT = "https://hub-ci.huggingface.co"
+ENDPOINT = os.getenv("HF_ENDPOINT", "").rstrip("/") or (
+    _HF_DEFAULT_STAGING_ENDPOINT if _staging_mode else _HF_DEFAULT_ENDPOINT
+)
+HUGGINGFACE_CO_URL_TEMPLATE = ENDPOINT + "/{repo_id}/resolve/{revision}/{filename}"
+HUGGINGFACE_HEADER_X_REPO_COMMIT = "X-Repo-Commit"
+HUGGINGFACE_HEADER_X_LINKED_ETAG = "X-Linked-Etag"
+HUGGINGFACE_HEADER_X_LINKED_SIZE = "X-Linked-Size"
+INFERENCE_ENDPOINT = os.environ.get("HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co")
+# See https://huggingface.co/docs/inference-endpoints/index
+INFERENCE_ENDPOINTS_ENDPOINT = "https://api.endpoints.huggingface.cloud/v2"
+# Proxy for third-party providers
+INFERENCE_PROXY_TEMPLATE = ENDPOINT + "/api/inference-proxy/{provider}"
+REPO_ID_SEPARATOR = "--"
+# ^ this substring is not allowed in repo_ids on hf.co
+# and is the canonical one we use for serialization of repo ids elsewhere.
+REPO_TYPE_DATASET = "dataset"
+REPO_TYPE_SPACE = "space"
+REPO_TYPE_MODEL = "model"
+REPO_TYPES = [None, REPO_TYPE_MODEL, REPO_TYPE_DATASET, REPO_TYPE_SPACE]
+SPACES_SDK_TYPES = ["gradio", "streamlit", "docker", "static"]
+REPO_TYPES_URL_PREFIXES = {
+    REPO_TYPE_DATASET: "datasets/",
+    REPO_TYPE_SPACE: "spaces/",
+}
+REPO_TYPES_MAPPING = {
+    "datasets": REPO_TYPE_DATASET,
+    "spaces": REPO_TYPE_SPACE,
+    "models": REPO_TYPE_MODEL,
+}
+DiscussionTypeFilter = Literal["all", "discussion", "pull_request"]
+DISCUSSION_TYPES: Tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionTypeFilter)
+DiscussionStatusFilter = Literal["all", "open", "closed"]
+DISCUSSION_STATUS: Tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionStatusFilter)
+# Webhook subscription types
+WEBHOOK_DOMAIN_T = Literal["repo", "discussions"]
+# default cache
+default_home = os.path.join(os.path.expanduser("~"), ".cache")
+HF_HOME = os.path.expanduser(
+    os.getenv(
+        "HF_HOME",
+        os.path.join(os.getenv("XDG_CACHE_HOME", default_home), "huggingface"),
+    )
+)
+hf_cache_home = HF_HOME  # for backward compatibility. TODO: remove this in 1.0.0
+default_cache_path = os.path.join(HF_HOME, "hub")
+default_assets_cache_path = os.path.join(HF_HOME, "assets")
+# Legacy env variables
+HUGGINGFACE_HUB_CACHE = os.getenv("HUGGINGFACE_HUB_CACHE", default_cache_path)
+HUGGINGFACE_ASSETS_CACHE = os.getenv("HUGGINGFACE_ASSETS_CACHE", default_assets_cache_path)
+# New env variables
+HF_HUB_CACHE = os.getenv("HF_HUB_CACHE", HUGGINGFACE_HUB_CACHE)
+HF_ASSETS_CACHE = os.getenv("HF_ASSETS_CACHE", HUGGINGFACE_ASSETS_CACHE)
+HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE"))
+# Opt-out from telemetry requests
+HF_HUB_DISABLE_TELEMETRY = (
+    _is_true(os.environ.get("HF_HUB_DISABLE_TELEMETRY"))  # HF-specific env variable
+    or _is_true(os.environ.get("DISABLE_TELEMETRY"))
+    or _is_true(os.environ.get("DO_NOT_TRACK"))  # https://consoledonottrack.com/
+)
+# In the past, token was stored in a hardcoded location
+# `_OLD_HF_TOKEN_PATH` is deprecated and will be removed "at some point".
+# See https://github.com/huggingface/huggingface_hub/issues/1232
+_OLD_HF_TOKEN_PATH = os.path.expanduser("~/.huggingface/token")
+HF_TOKEN_PATH = os.environ.get("HF_TOKEN_PATH", os.path.join(HF_HOME, "token"))
+HF_STORED_TOKENS_PATH = os.path.join(os.path.dirname(HF_TOKEN_PATH), "stored_tokens")
+if _staging_mode:
+    # In staging mode, we use a different cache to ensure we don't mix up production and staging data or tokens
+    _staging_home = os.path.join(os.path.expanduser("~"), ".cache", "huggingface_staging")
+    HUGGINGFACE_HUB_CACHE = os.path.join(_staging_home, "hub")
+    _OLD_HF_TOKEN_PATH = os.path.join(_staging_home, "_old_token")
+    HF_TOKEN_PATH = os.path.join(_staging_home, "token")
+# Here, `True` will disable progress bars globally without possibility of enabling it
+# programmatically. `False` will enable them without possibility of disabling them.
+# If environment variable is not set (None), then the user is free to enable/disable
+# them programmatically.
+# TL;DR: env variable has priority over code
+__HF_HUB_DISABLE_PROGRESS_BARS = os.environ.get("HF_HUB_DISABLE_PROGRESS_BARS")
+HF_HUB_DISABLE_PROGRESS_BARS: Optional[bool] = (
+    _is_true(__HF_HUB_DISABLE_PROGRESS_BARS) if __HF_HUB_DISABLE_PROGRESS_BARS is not None else None
+)
+# Disable warning on machines that do not support symlinks (e.g. Windows non-developer)
+HF_HUB_DISABLE_SYMLINKS_WARNING: bool = _is_true(os.environ.get("HF_HUB_DISABLE_SYMLINKS_WARNING"))
+# Disable warning when using experimental features
+HF_HUB_DISABLE_EXPERIMENTAL_WARNING: bool = _is_true(os.environ.get("HF_HUB_DISABLE_EXPERIMENTAL_WARNING"))
+# Disable sending the cached token by default is all HTTP requests to the Hub
+HF_HUB_DISABLE_IMPLICIT_TOKEN: bool = _is_true(os.environ.get("HF_HUB_DISABLE_IMPLICIT_TOKEN"))
+# Enable fast-download using external dependency "hf_transfer"
+# See:
+# - https://pypi.org/project/hf-transfer/
+# - https://github.com/huggingface/hf_transfer (private)
+HF_HUB_ENABLE_HF_TRANSFER: bool = _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER"))
+# UNUSED
+# We don't use symlinks in local dir anymore.
+HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD: int = (
+    _as_int(os.environ.get("HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD")) or 5 * 1024 * 1024
+)
+# Used to override the etag timeout on a system level
+HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEFAULT_ETAG_TIMEOUT
+# Used to override the get request timeout on a system level
+HF_HUB_DOWNLOAD_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_DOWNLOAD_TIMEOUT")) or DEFAULT_DOWNLOAD_TIMEOUT
+# List frameworks that are handled by the InferenceAPI service. Useful to scan endpoints and check which models are
+# deployed and running. Since 95% of the models are using the top 4 frameworks listed below, we scan only those by
+# default. We still keep the full list of supported frameworks in case we want to scan all of them.
+MAIN_INFERENCE_API_FRAMEWORKS = [
+    "diffusers",
+    "sentence-transformers",
+    "text-generation-inference",
+    "transformers",
+]
+ALL_INFERENCE_API_FRAMEWORKS = MAIN_INFERENCE_API_FRAMEWORKS + [
+    "adapter-transformers",
+    "allennlp",
+    "asteroid",
+    "bertopic",
+    "doctr",
+    "espnet",
+    "fairseq",
+    "fastai",
+    "fasttext",
+    "flair",
+    "k2",
+    "keras",
+    "mindspore",
+    "nemo",
+    "open_clip",
+    "paddlenlp",
+    "peft",
+    "pyannote-audio",
+    "sklearn",
+    "spacy",
+    "span-marker",
+    "speechbrain",
+    "stanza",
+    "timm",
+]

.venv/lib/python3.11/site-packages/huggingface_hub/errors.py ADDED Viewed

	@@ -0,0 +1,329 @@

+"""Contains all custom errors."""
+from pathlib import Path
+from typing import Optional, Union
+from requests import HTTPError, Response
+# CACHE ERRORS
+class CacheNotFound(Exception):
+    """Exception thrown when the Huggingface cache is not found."""
+    cache_dir: Union[str, Path]
+    def __init__(self, msg: str, cache_dir: Union[str, Path], *args, **kwargs):
+        super().__init__(msg, *args, **kwargs)
+        self.cache_dir = cache_dir
+class CorruptedCacheException(Exception):
+    """Exception for any unexpected structure in the Huggingface cache-system."""
+# HEADERS ERRORS
+class LocalTokenNotFoundError(EnvironmentError):
+    """Raised if local token is required but not found."""
+# HTTP ERRORS
+class OfflineModeIsEnabled(ConnectionError):
+    """Raised when a request is made but `HF_HUB_OFFLINE=1` is set as environment variable."""
+class HfHubHTTPError(HTTPError):
+    """
+    HTTPError to inherit from for any custom HTTP Error raised in HF Hub.
+    Any HTTPError is converted at least into a `HfHubHTTPError`. If some information is
+    sent back by the server, it will be added to the error message.
+    Added details:
+    - Request id from "X-Request-Id" header if exists. If not, fallback to "X-Amzn-Trace-Id" header if exists.
+    - Server error message from the header "X-Error-Message".
+    - Server error message if we can found one in the response body.
+    Example:
+    ```py
+        import requests
+        from huggingface_hub.utils import get_session, hf_raise_for_status, HfHubHTTPError
+        response = get_session().post(...)
+        try:
+            hf_raise_for_status(response)
+        except HfHubHTTPError as e:
+            print(str(e)) # formatted message
+            e.request_id, e.server_message # details returned by server
+            # Complete the error message with additional information once it's raised
+            e.append_to_message("\n`create_commit` expects the repository to exist.")
+            raise
+    ```
+    """
+    def __init__(self, message: str, response: Optional[Response] = None, *, server_message: Optional[str] = None):
+        self.request_id = (
+            response.headers.get("x-request-id") or response.headers.get("X-Amzn-Trace-Id")
+            if response is not None
+            else None
+        )
+        self.server_message = server_message
+        super().__init__(
+            message,
+            response=response,  # type: ignore [arg-type]
+            request=response.request if response is not None else None,  # type: ignore [arg-type]
+        )
+    def append_to_message(self, additional_message: str) -> None:
+        """Append additional information to the `HfHubHTTPError` initial message."""
+        self.args = (self.args[0] + additional_message,) + self.args[1:]
+# INFERENCE CLIENT ERRORS
+class InferenceTimeoutError(HTTPError, TimeoutError):
+    """Error raised when a model is unavailable or the request times out."""
+# INFERENCE ENDPOINT ERRORS
+class InferenceEndpointError(Exception):
+    """Generic exception when dealing with Inference Endpoints."""
+class InferenceEndpointTimeoutError(InferenceEndpointError, TimeoutError):
+    """Exception for timeouts while waiting for Inference Endpoint."""
+# SAFETENSORS ERRORS
+class SafetensorsParsingError(Exception):
+    """Raised when failing to parse a safetensors file metadata.
+    This can be the case if the file is not a safetensors file or does not respect the specification.
+    """
+class NotASafetensorsRepoError(Exception):
+    """Raised when a repo is not a Safetensors repo i.e. doesn't have either a `model.safetensors` or a
+    `model.safetensors.index.json` file.
+    """
+# TEXT GENERATION ERRORS
+class TextGenerationError(HTTPError):
+    """Generic error raised if text-generation went wrong."""
+# Text Generation Inference Errors
+class ValidationError(TextGenerationError):
+    """Server-side validation error."""
+class GenerationError(TextGenerationError):
+    pass
+class OverloadedError(TextGenerationError):
+    pass
+class IncompleteGenerationError(TextGenerationError):
+    pass
+class UnknownError(TextGenerationError):
+    pass
+# VALIDATION ERRORS
+class HFValidationError(ValueError):
+    """Generic exception thrown by `huggingface_hub` validators.
+    Inherits from [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError).
+    """
+# FILE METADATA ERRORS
+class FileMetadataError(OSError):
+    """Error triggered when the metadata of a file on the Hub cannot be retrieved (missing ETag or commit_hash).
+    Inherits from `OSError` for backward compatibility.
+    """
+# REPOSITORY ERRORS
+class RepositoryNotFoundError(HfHubHTTPError):
+    """
+    Raised when trying to access a hf.co URL with an invalid repository name, or
+    with a private repo name the user does not have access to.
+    Example:
+    ```py
+    >>> from huggingface_hub import model_info
+    >>> model_info("<non_existent_repository>")
+    (...)
+    huggingface_hub.utils._errors.RepositoryNotFoundError: 401 Client Error. (Request ID: PvMw_VjBMjVdMz53WKIzP)
+    Repository Not Found for url: https://huggingface.co/api/models/%3Cnon_existent_repository%3E.
+    Please make sure you specified the correct `repo_id` and `repo_type`.
+    If the repo is private, make sure you are authenticated.
+    Invalid username or password.
+    ```
+    """
+class GatedRepoError(RepositoryNotFoundError):
+    """
+    Raised when trying to access a gated repository for which the user is not on the
+    authorized list.
+    Note: derives from `RepositoryNotFoundError` to ensure backward compatibility.
+    Example:
+    ```py
+    >>> from huggingface_hub import model_info
+    >>> model_info("<gated_repository>")
+    (...)
+    huggingface_hub.utils._errors.GatedRepoError: 403 Client Error. (Request ID: ViT1Bf7O_026LGSQuVqfa)
+    Cannot access gated repo for url https://huggingface.co/api/models/ardent-figment/gated-model.
+    Access to model ardent-figment/gated-model is restricted and you are not in the authorized list.
+    Visit https://huggingface.co/ardent-figment/gated-model to ask for access.
+    ```
+    """
+class DisabledRepoError(HfHubHTTPError):
+    """
+    Raised when trying to access a repository that has been disabled by its author.
+    Example:
+    ```py
+    >>> from huggingface_hub import dataset_info
+    >>> dataset_info("laion/laion-art")
+    (...)
+    huggingface_hub.utils._errors.DisabledRepoError: 403 Client Error. (Request ID: Root=1-659fc3fa-3031673e0f92c71a2260dbe2;bc6f4dfb-b30a-4862-af0a-5cfe827610d8)
+    Cannot access repository for url https://huggingface.co/api/datasets/laion/laion-art.
+    Access to this resource is disabled.
+    ```
+    """
+# REVISION ERROR
+class RevisionNotFoundError(HfHubHTTPError):
+    """
+    Raised when trying to access a hf.co URL with a valid repository but an invalid
+    revision.
+    Example:
+    ```py
+    >>> from huggingface_hub import hf_hub_download
+    >>> hf_hub_download('bert-base-cased', 'config.json', revision='<non-existent-revision>')
+    (...)
+    huggingface_hub.utils._errors.RevisionNotFoundError: 404 Client Error. (Request ID: Mwhe_c3Kt650GcdKEFomX)
+    Revision Not Found for url: https://huggingface.co/bert-base-cased/resolve/%3Cnon-existent-revision%3E/config.json.
+    ```
+    """
+# ENTRY ERRORS
+class EntryNotFoundError(HfHubHTTPError):
+    """
+    Raised when trying to access a hf.co URL with a valid repository and revision
+    but an invalid filename.
+    Example:
+    ```py
+    >>> from huggingface_hub import hf_hub_download
+    >>> hf_hub_download('bert-base-cased', '<non-existent-file>')
+    (...)
+    huggingface_hub.utils._errors.EntryNotFoundError: 404 Client Error. (Request ID: 53pNl6M0MxsnG5Sw8JA6x)
+    Entry Not Found for url: https://huggingface.co/bert-base-cased/resolve/main/%3Cnon-existent-file%3E.
+    ```
+    """
+class LocalEntryNotFoundError(EntryNotFoundError, FileNotFoundError, ValueError):
+    """
+    Raised when trying to access a file or snapshot that is not on the disk when network is
+    disabled or unavailable (connection issue). The entry may exist on the Hub.
+    Note: `ValueError` type is to ensure backward compatibility.
+    Note: `LocalEntryNotFoundError` derives from `HTTPError` because of `EntryNotFoundError`
+          even when it is not a network issue.
+    Example:
+    ```py
+    >>> from huggingface_hub import hf_hub_download
+    >>> hf_hub_download('bert-base-cased', '<non-cached-file>',  local_files_only=True)
+    (...)
+    huggingface_hub.utils._errors.LocalEntryNotFoundError: Cannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable hf.co look-ups and downloads online, set 'local_files_only' to False.
+    ```
+    """
+    def __init__(self, message: str):
+        super().__init__(message, response=None)
+# REQUEST ERROR
+class BadRequestError(HfHubHTTPError, ValueError):
+    """
+    Raised by `hf_raise_for_status` when the server returns a HTTP 400 error.
+    Example:
+    ```py
+    >>> resp = requests.post("hf.co/api/check", ...)
+    >>> hf_raise_for_status(resp, endpoint_name="check")
+    huggingface_hub.utils._errors.BadRequestError: Bad request for check endpoint: {details} (Request ID: XXX)
+    ```
+    """
+# DDUF file format ERROR
+class DDUFError(Exception):
+    """Base exception for errors related to the DDUF format."""
+class DDUFCorruptedFileError(DDUFError):
+    """Exception thrown when the DDUF file is corrupted."""
+class DDUFExportError(DDUFError):
+    """Base exception for errors during DDUF export."""
+class DDUFInvalidEntryNameError(DDUFExportError):
+    """Exception thrown when the entry name is invalid."""

.venv/lib/python3.11/site-packages/huggingface_hub/fastai_utils.py ADDED Viewed

	@@ -0,0 +1,425 @@

+import json
+import os
+from pathlib import Path
+from pickle import DEFAULT_PROTOCOL, PicklingError
+from typing import Any, Dict, List, Optional, Union
+from packaging import version
+from huggingface_hub import constants, snapshot_download
+from huggingface_hub.hf_api import HfApi
+from huggingface_hub.utils import (
+    SoftTemporaryDirectory,
+    get_fastai_version,
+    get_fastcore_version,
+    get_python_version,
+)
+from .utils import logging, validate_hf_hub_args
+from .utils._runtime import _PY_VERSION  # noqa: F401 # for backward compatibility...
+logger = logging.get_logger(__name__)
+def _check_fastai_fastcore_versions(
+    fastai_min_version: str = "2.4",
+    fastcore_min_version: str = "1.3.27",
+):
+    """
+    Checks that the installed fastai and fastcore versions are compatible for pickle serialization.
+    Args:
+        fastai_min_version (`str`, *optional*):
+            The minimum fastai version supported.
+        fastcore_min_version (`str`, *optional*):
+            The minimum fastcore version supported.
+    <Tip>
+    Raises the following error:
+        - [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
+          if the fastai or fastcore libraries are not available or are of an invalid version.
+    </Tip>
+    """
+    if (get_fastcore_version() or get_fastai_version()) == "N/A":
+        raise ImportError(
+            f"fastai>={fastai_min_version} and fastcore>={fastcore_min_version} are"
+            f" required. Currently using fastai=={get_fastai_version()} and"
+            f" fastcore=={get_fastcore_version()}."
+        )
+    current_fastai_version = version.Version(get_fastai_version())
+    current_fastcore_version = version.Version(get_fastcore_version())
+    if current_fastai_version < version.Version(fastai_min_version):
+        raise ImportError(
+            "`push_to_hub_fastai` and `from_pretrained_fastai` require a"
+            f" fastai>={fastai_min_version} version, but you are using fastai version"
+            f" {get_fastai_version()} which is incompatible. Upgrade with `pip install"
+            " fastai==2.5.6`."
+        )
+    if current_fastcore_version < version.Version(fastcore_min_version):
+        raise ImportError(
+            "`push_to_hub_fastai` and `from_pretrained_fastai` require a"
+            f" fastcore>={fastcore_min_version} version, but you are using fastcore"
+            f" version {get_fastcore_version()} which is incompatible. Upgrade with"
+            " `pip install fastcore==1.3.27`."
+        )
+def _check_fastai_fastcore_pyproject_versions(
+    storage_folder: str,
+    fastai_min_version: str = "2.4",
+    fastcore_min_version: str = "1.3.27",
+):
+    """
+    Checks that the `pyproject.toml` file in the directory `storage_folder` has fastai and fastcore versions
+    that are compatible with `from_pretrained_fastai` and `push_to_hub_fastai`. If `pyproject.toml` does not exist
+    or does not contain versions for fastai and fastcore, then it logs a warning.
+    Args:
+        storage_folder (`str`):
+            Folder to look for the `pyproject.toml` file.
+        fastai_min_version (`str`, *optional*):
+            The minimum fastai version supported.
+        fastcore_min_version (`str`, *optional*):
+            The minimum fastcore version supported.
+    <Tip>
+    Raises the following errors:
+        - [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
+          if the `toml` module is not installed.
+        - [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
+          if the `pyproject.toml` indicates a lower than minimum supported version of fastai or fastcore.
+    </Tip>
+    """
+    try:
+        import toml
+    except ModuleNotFoundError:
+        raise ImportError(
+            "`push_to_hub_fastai` and `from_pretrained_fastai` require the toml module."
+            " Install it with `pip install toml`."
+        )
+    # Checks that a `pyproject.toml`, with `build-system` and `requires` sections, exists in the repository. If so, get a list of required packages.
+    if not os.path.isfile(f"{storage_folder}/pyproject.toml"):
+        logger.warning(
+            "There is no `pyproject.toml` in the repository that contains the fastai"
+            " `Learner`. The `pyproject.toml` would allow us to verify that your fastai"
+            " and fastcore versions are compatible with those of the model you want to"
+            " load."
+        )
+        return
+    pyproject_toml = toml.load(f"{storage_folder}/pyproject.toml")
+    if "build-system" not in pyproject_toml.keys():
+        logger.warning(
+            "There is no `build-system` section in the pyproject.toml of the repository"
+            " that contains the fastai `Learner`. The `build-system` would allow us to"
+            " verify that your fastai and fastcore versions are compatible with those"
+            " of the model you want to load."
+        )
+        return
+    build_system_toml = pyproject_toml["build-system"]
+    if "requires" not in build_system_toml.keys():
+        logger.warning(
+            "There is no `requires` section in the pyproject.toml of the repository"
+            " that contains the fastai `Learner`. The `requires` would allow us to"
+            " verify that your fastai and fastcore versions are compatible with those"
+            " of the model you want to load."
+        )
+        return
+    package_versions = build_system_toml["requires"]
+    # Extracts contains fastai and fastcore versions from `pyproject.toml` if available.
+    # If the package is specified but not the version (e.g. "fastai" instead of "fastai=2.4"), the default versions are the highest.
+    fastai_packages = [pck for pck in package_versions if pck.startswith("fastai")]
+    if len(fastai_packages) == 0:
+        logger.warning("The repository does not have a fastai version specified in the `pyproject.toml`.")
+    # fastai_version is an empty string if not specified
+    else:
+        fastai_version = str(fastai_packages[0]).partition("=")[2]
+        if fastai_version != "" and version.Version(fastai_version) < version.Version(fastai_min_version):
+            raise ImportError(
+                "`from_pretrained_fastai` requires"
+                f" fastai>={fastai_min_version} version but the model to load uses"
+                f" {fastai_version} which is incompatible."
+            )
+    fastcore_packages = [pck for pck in package_versions if pck.startswith("fastcore")]
+    if len(fastcore_packages) == 0:
+        logger.warning("The repository does not have a fastcore version specified in the `pyproject.toml`.")
+    # fastcore_version is an empty string if not specified
+    else:
+        fastcore_version = str(fastcore_packages[0]).partition("=")[2]
+        if fastcore_version != "" and version.Version(fastcore_version) < version.Version(fastcore_min_version):
+            raise ImportError(
+                "`from_pretrained_fastai` requires"
+                f" fastcore>={fastcore_min_version} version, but you are using fastcore"
+                f" version {fastcore_version} which is incompatible."
+            )
+README_TEMPLATE = """---
+tags:
+- fastai
+---
+# Amazing!
+🥳 Congratulations on hosting your fastai model on the Hugging Face Hub!
+# Some next steps
+1. Fill out this model card with more information (see the template below and the [documentation here](https://huggingface.co/docs/hub/model-repos))!
+2. Create a demo in Gradio or Streamlit using 🤗 Spaces ([documentation here](https://huggingface.co/docs/hub/spaces)).
+3. Join the fastai community on the [Fastai Discord](https://discord.com/invite/YKrxeNn)!
+Greetings fellow fastlearner 🤝! Don't forget to delete this content from your model card.
+---
+# Model card
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+"""
+PYPROJECT_TEMPLATE = f"""[build-system]
+requires = ["setuptools>=40.8.0", "wheel", "python={get_python_version()}", "fastai={get_fastai_version()}", "fastcore={get_fastcore_version()}"]
+build-backend = "setuptools.build_meta:__legacy__"
+"""
+def _create_model_card(repo_dir: Path):
+    """
+    Creates a model card for the repository.
+    Args:
+        repo_dir (`Path`):
+            Directory where model card is created.
+    """
+    readme_path = repo_dir / "README.md"
+    if not readme_path.exists():
+        with readme_path.open("w", encoding="utf-8") as f:
+            f.write(README_TEMPLATE)
+def _create_model_pyproject(repo_dir: Path):
+    """
+    Creates a `pyproject.toml` for the repository.
+    Args:
+        repo_dir (`Path`):
+            Directory where `pyproject.toml` is created.
+    """
+    pyproject_path = repo_dir / "pyproject.toml"
+    if not pyproject_path.exists():
+        with pyproject_path.open("w", encoding="utf-8") as f:
+            f.write(PYPROJECT_TEMPLATE)
+def _save_pretrained_fastai(
+    learner,
+    save_directory: Union[str, Path],
+    config: Optional[Dict[str, Any]] = None,
+):
+    """
+    Saves a fastai learner to `save_directory` in pickle format using the default pickle protocol for the version of python used.
+    Args:
+        learner (`Learner`):
+            The `fastai.Learner` you'd like to save.
+        save_directory (`str` or `Path`):
+            Specific directory in which you want to save the fastai learner.
+        config (`dict`, *optional*):
+            Configuration object. Will be uploaded as a .json file. Example: 'https://huggingface.co/espejelomar/fastai-pet-breeds-classification/blob/main/config.json'.
+    <Tip>
+    Raises the following error:
+        - [`RuntimeError`](https://docs.python.org/3/library/exceptions.html#RuntimeError)
+          if the config file provided is not a dictionary.
+    </Tip>
+    """
+    _check_fastai_fastcore_versions()
+    os.makedirs(save_directory, exist_ok=True)
+    # if the user provides config then we update it with the fastai and fastcore versions in CONFIG_TEMPLATE.
+    if config is not None:
+        if not isinstance(config, dict):
+            raise RuntimeError(f"Provided config should be a dict. Got: '{type(config)}'")
+        path = os.path.join(save_directory, constants.CONFIG_NAME)
+        with open(path, "w") as f:
+            json.dump(config, f)
+    _create_model_card(Path(save_directory))
+    _create_model_pyproject(Path(save_directory))
+    # learner.export saves the model in `self.path`.
+    learner.path = Path(save_directory)
+    os.makedirs(save_directory, exist_ok=True)
+    try:
+        learner.export(
+            fname="model.pkl",
+            pickle_protocol=DEFAULT_PROTOCOL,
+        )
+    except PicklingError:
+        raise PicklingError(
+            "You are using a lambda function, i.e., an anonymous function. `pickle`"
+            " cannot pickle function objects and requires that all functions have"
+            " names. One possible solution is to name the function."
+        )
+@validate_hf_hub_args
+def from_pretrained_fastai(
+    repo_id: str,
+    revision: Optional[str] = None,
+):
+    """
+    Load pretrained fastai model from the Hub or from a local directory.
+    Args:
+        repo_id (`str`):
+            The location where the pickled fastai.Learner is. It can be either of the two:
+                - Hosted on the Hugging Face Hub. E.g.: 'espejelomar/fatai-pet-breeds-classification' or 'distilgpt2'.
+                  You can add a `revision` by appending `@` at the end of `repo_id`. E.g.: `dbmdz/bert-base-german-cased@main`.
+                  Revision is the specific model version to use. Since we use a git-based system for storing models and other
+                  artifacts on the Hugging Face Hub, it can be a branch name, a tag name, or a commit id.
+                - Hosted locally. `repo_id` would be a directory containing the pickle and a pyproject.toml
+                  indicating the fastai and fastcore versions used to build the `fastai.Learner`. E.g.: `./my_model_directory/`.
+        revision (`str`, *optional*):
+            Revision at which the repo's files are downloaded. See documentation of `snapshot_download`.
+    Returns:
+        The `fastai.Learner` model in the `repo_id` repo.
+    """
+    _check_fastai_fastcore_versions()
+    # Load the `repo_id` repo.
+    # `snapshot_download` returns the folder where the model was stored.
+    # `cache_dir` will be the default '/root/.cache/huggingface/hub'
+    if not os.path.isdir(repo_id):
+        storage_folder = snapshot_download(
+            repo_id=repo_id,
+            revision=revision,
+            library_name="fastai",
+            library_version=get_fastai_version(),
+        )
+    else:
+        storage_folder = repo_id
+    _check_fastai_fastcore_pyproject_versions(storage_folder)
+    from fastai.learner import load_learner  # type: ignore
+    return load_learner(os.path.join(storage_folder, "model.pkl"))
+@validate_hf_hub_args
+def push_to_hub_fastai(
+    learner,
+    *,
+    repo_id: str,
+    commit_message: str = "Push FastAI model using huggingface_hub.",
+    private: Optional[bool] = None,
+    token: Optional[str] = None,
+    config: Optional[dict] = None,
+    branch: Optional[str] = None,
+    create_pr: Optional[bool] = None,
+    allow_patterns: Optional[Union[List[str], str]] = None,
+    ignore_patterns: Optional[Union[List[str], str]] = None,
+    delete_patterns: Optional[Union[List[str], str]] = None,
+    api_endpoint: Optional[str] = None,
+):
+    """
+    Upload learner checkpoint files to the Hub.
+    Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use
+    `delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more
+    details.
+    Args:
+        learner (`Learner`):
+            The `fastai.Learner' you'd like to push to the Hub.
+        repo_id (`str`):
+            The repository id for your model in Hub in the format of "namespace/repo_name". The namespace can be your individual account or an organization to which you have write access (for example, 'stanfordnlp/stanza-de').
+        commit_message (`str`, *optional*):
+            Message to commit while pushing. Will default to :obj:`"add model"`.
+        private (`bool`, *optional*):
+            Whether or not the repository created should be private.
+            If `None` (default), will default to been public except if the organization's default is private.
+        token (`str`, *optional*):
+            The Hugging Face account token to use as HTTP bearer authorization for remote files. If :obj:`None`, the token will be asked by a prompt.
+        config (`dict`, *optional*):
+            Configuration object to be saved alongside the model weights.
+        branch (`str`, *optional*):
+            The git branch on which to push the model. This defaults to
+            the default branch as specified in your repository, which
+            defaults to `"main"`.
+        create_pr (`boolean`, *optional*):
+            Whether or not to create a Pull Request from `branch` with that commit.
+            Defaults to `False`.
+        api_endpoint (`str`, *optional*):
+            The API endpoint to use when pushing the model to the hub.
+        allow_patterns (`List[str]` or `str`, *optional*):
+            If provided, only files matching at least one pattern are pushed.
+        ignore_patterns (`List[str]` or `str`, *optional*):
+            If provided, files matching any of the patterns are not pushed.
+        delete_patterns (`List[str]` or `str`, *optional*):
+            If provided, remote files matching any of the patterns will be deleted from the repo.
+    Returns:
+        The url of the commit of your model in the given repository.
+    <Tip>
+    Raises the following error:
+        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+          if the user is not log on to the Hugging Face Hub.
+    </Tip>
+    """
+    _check_fastai_fastcore_versions()
+    api = HfApi(endpoint=api_endpoint)
+    repo_id = api.create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True).repo_id
+    # Push the files to the repo in a single commit
+    with SoftTemporaryDirectory() as tmp:
+        saved_path = Path(tmp) / repo_id
+        _save_pretrained_fastai(learner, saved_path, config=config)
+        return api.upload_folder(
+            repo_id=repo_id,
+            token=token,
+            folder_path=saved_path,
+            commit_message=commit_message,
+            revision=branch,
+            create_pr=create_pr,
+            allow_patterns=allow_patterns,
+            ignore_patterns=ignore_patterns,
+            delete_patterns=delete_patterns,
+        )

.venv/lib/python3.11/site-packages/huggingface_hub/file_download.py ADDED Viewed

	@@ -0,0 +1,1621 @@

+import contextlib
+import copy
+import errno
+import inspect
+import os
+import re
+import shutil
+import stat
+import time
+import uuid
+import warnings
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, BinaryIO, Dict, Literal, NoReturn, Optional, Tuple, Union
+from urllib.parse import quote, urlparse
+import requests
+from . import (
+    __version__,  # noqa: F401 # for backward compatibility
+    constants,
+)
+from ._local_folder import get_local_download_paths, read_download_metadata, write_download_metadata
+from .constants import (
+    HUGGINGFACE_CO_URL_TEMPLATE,  # noqa: F401 # for backward compatibility
+    HUGGINGFACE_HUB_CACHE,  # noqa: F401 # for backward compatibility
+)
+from .errors import (
+    EntryNotFoundError,
+    FileMetadataError,
+    GatedRepoError,
+    LocalEntryNotFoundError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+)
+from .utils import (
+    OfflineModeIsEnabled,
+    SoftTemporaryDirectory,
+    WeakFileLock,
+    build_hf_headers,
+    get_fastai_version,  # noqa: F401 # for backward compatibility
+    get_fastcore_version,  # noqa: F401 # for backward compatibility
+    get_graphviz_version,  # noqa: F401 # for backward compatibility
+    get_jinja_version,  # noqa: F401 # for backward compatibility
+    get_pydot_version,  # noqa: F401 # for backward compatibility
+    get_session,
+    get_tf_version,  # noqa: F401 # for backward compatibility
+    get_torch_version,  # noqa: F401 # for backward compatibility
+    hf_raise_for_status,
+    is_fastai_available,  # noqa: F401 # for backward compatibility
+    is_fastcore_available,  # noqa: F401 # for backward compatibility
+    is_graphviz_available,  # noqa: F401 # for backward compatibility
+    is_jinja_available,  # noqa: F401 # for backward compatibility
+    is_pydot_available,  # noqa: F401 # for backward compatibility
+    is_tf_available,  # noqa: F401 # for backward compatibility
+    is_torch_available,  # noqa: F401 # for backward compatibility
+    logging,
+    reset_sessions,
+    tqdm,
+    validate_hf_hub_args,
+)
+from .utils._runtime import _PY_VERSION  # noqa: F401 # for backward compatibility
+from .utils._typing import HTTP_METHOD_T
+from .utils.sha import sha_fileobj
+from .utils.tqdm import is_tqdm_disabled
+logger = logging.get_logger(__name__)
+# Return value when trying to load a file from cache but the file does not exist in the distant repo.
+_CACHED_NO_EXIST = object()
+_CACHED_NO_EXIST_T = Any
+# Regex to get filename from a "Content-Disposition" header for CDN-served files
+HEADER_FILENAME_PATTERN = re.compile(r'filename="(?P<filename>.*?)";')
+# Regex to check if the revision IS directly a commit_hash
+REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$")
+# Regex to check if the file etag IS a valid sha256
+REGEX_SHA256 = re.compile(r"^[0-9a-f]{64}$")
+_are_symlinks_supported_in_dir: Dict[str, bool] = {}
+def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
+    """Return whether the symlinks are supported on the machine.
+    Since symlinks support can change depending on the mounted disk, we need to check
+    on the precise cache folder. By default, the default HF cache directory is checked.
+    Args:
+        cache_dir (`str`, `Path`, *optional*):
+            Path to the folder where cached files are stored.
+    Returns: [bool] Whether symlinks are supported in the directory.
+    """
+    # Defaults to HF cache
+    if cache_dir is None:
+        cache_dir = constants.HF_HUB_CACHE
+    cache_dir = str(Path(cache_dir).expanduser().resolve())  # make it unique
+    # Check symlink compatibility only once (per cache directory) at first time use
+    if cache_dir not in _are_symlinks_supported_in_dir:
+        _are_symlinks_supported_in_dir[cache_dir] = True
+        os.makedirs(cache_dir, exist_ok=True)
+        with SoftTemporaryDirectory(dir=cache_dir) as tmpdir:
+            src_path = Path(tmpdir) / "dummy_file_src"
+            src_path.touch()
+            dst_path = Path(tmpdir) / "dummy_file_dst"
+            # Relative source path as in `_create_symlink``
+            relative_src = os.path.relpath(src_path, start=os.path.dirname(dst_path))
+            try:
+                os.symlink(relative_src, dst_path)
+            except OSError:
+                # Likely running on Windows
+                _are_symlinks_supported_in_dir[cache_dir] = False
+                if not constants.HF_HUB_DISABLE_SYMLINKS_WARNING:
+                    message = (
+                        "`huggingface_hub` cache-system uses symlinks by default to"
+                        " efficiently store duplicated files but your machine does not"
+                        f" support them in {cache_dir}. Caching files will still work"
+                        " but in a degraded version that might require more space on"
+                        " your disk. This warning can be disabled by setting the"
+                        " `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For"
+                        " more details, see"
+                        " https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations."
+                    )
+                    if os.name == "nt":
+                        message += (
+                            "\nTo support symlinks on Windows, you either need to"
+                            " activate Developer Mode or to run Python as an"
+                            " administrator. In order to activate developer mode,"
+                            " see this article:"
+                            " https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development"
+                        )
+                    warnings.warn(message)
+    return _are_symlinks_supported_in_dir[cache_dir]
+@dataclass(frozen=True)
+class HfFileMetadata:
+    """Data structure containing information about a file versioned on the Hub.
+    Returned by [`get_hf_file_metadata`] based on a URL.
+    Args:
+        commit_hash (`str`, *optional*):
+            The commit_hash related to the file.
+        etag (`str`, *optional*):
+            Etag of the file on the server.
+        location (`str`):
+            Location where to download the file. Can be a Hub url or not (CDN).
+        size (`size`):
+            Size of the file. In case of an LFS file, contains the size of the actual
+            LFS file, not the pointer.
+    """
+    commit_hash: Optional[str]
+    etag: Optional[str]
+    location: str
+    size: Optional[int]
+@validate_hf_hub_args
+def hf_hub_url(
+    repo_id: str,
+    filename: str,
+    *,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    endpoint: Optional[str] = None,
+) -> str:
+    """Construct the URL of a file from the given information.
+    The resolved address can either be a huggingface.co-hosted url, or a link to
+    Cloudfront (a Content Delivery Network, or CDN) for large files which are
+    more than a few MBs.
+    Args:
+        repo_id (`str`):
+            A namespace (user or an organization) name and a repo name separated
+            by a `/`.
+        filename (`str`):
+            The name of the file in the repo.
+        subfolder (`str`, *optional*):
+            An optional value corresponding to a folder inside the repo.
+        repo_type (`str`, *optional*):
+            Set to `"dataset"` or `"space"` if downloading from a dataset or space,
+            `None` or `"model"` if downloading from a model. Default is `None`.
+        revision (`str`, *optional*):
+            An optional Git revision id which can be a branch name, a tag, or a
+            commit hash.
+    Example:
+    ```python
+    >>> from huggingface_hub import hf_hub_url
+    >>> hf_hub_url(
+    ...     repo_id="julien-c/EsperBERTo-small", filename="pytorch_model.bin"
+    ... )
+    'https://huggingface.co/julien-c/EsperBERTo-small/resolve/main/pytorch_model.bin'
+    ```
+    <Tip>
+    Notes:
+        Cloudfront is replicated over the globe so downloads are way faster for
+        the end user (and it also lowers our bandwidth costs).
+        Cloudfront aggressively caches files by default (default TTL is 24
+        hours), however this is not an issue here because we implement a
+        git-based versioning system on huggingface.co, which means that we store
+        the files on S3/Cloudfront in a content-addressable way (i.e., the file
+        name is its hash). Using content-addressable filenames means cache can't
+        ever be stale.
+        In terms of client-side caching from this library, we base our caching
+        on the objects' entity tag (`ETag`), which is an identifier of a
+        specific version of a resource [1]_. An object's ETag is: its git-sha1
+        if stored in git, or its sha256 if stored in git-lfs.
+    </Tip>
+    References:
+    -  [1] https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag
+    """
+    if subfolder == "":
+        subfolder = None
+    if subfolder is not None:
+        filename = f"{subfolder}/{filename}"
+    if repo_type not in constants.REPO_TYPES:
+        raise ValueError("Invalid repo type")
+    if repo_type in constants.REPO_TYPES_URL_PREFIXES:
+        repo_id = constants.REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
+    if revision is None:
+        revision = constants.DEFAULT_REVISION
+    url = HUGGINGFACE_CO_URL_TEMPLATE.format(
+        repo_id=repo_id, revision=quote(revision, safe=""), filename=quote(filename)
+    )
+    # Update endpoint if provided
+    if endpoint is not None and url.startswith(constants.ENDPOINT):
+        url = endpoint + url[len(constants.ENDPOINT) :]
+    return url
+def _request_wrapper(
+    method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
+) -> requests.Response:
+    """Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
+    `allow_redirection=False`.
+    Args:
+        method (`str`):
+            HTTP method, such as 'GET' or 'HEAD'.
+        url (`str`):
+            The URL of the resource to fetch.
+        follow_relative_redirects (`bool`, *optional*, defaults to `False`)
+            If True, relative redirection (redirection to the same site) will be resolved even when `allow_redirection`
+            kwarg is set to False. Useful when we want to follow a redirection to a renamed repository without
+            following redirection to a CDN.
+        **params (`dict`, *optional*):
+            Params to pass to `requests.request`.
+    """
+    # Recursively follow relative redirects
+    if follow_relative_redirects:
+        response = _request_wrapper(
+            method=method,
+            url=url,
+            follow_relative_redirects=False,
+            **params,
+        )
+        # If redirection, we redirect only relative paths.
+        # This is useful in case of a renamed repository.
+        if 300 <= response.status_code <= 399:
+            parsed_target = urlparse(response.headers["Location"])
+            if parsed_target.netloc == "":
+                # This means it is a relative 'location' headers, as allowed by RFC 7231.
+                # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
+                # We want to follow this relative redirect !
+                #
+                # Highly inspired by `resolve_redirects` from requests library.
+                # See https://github.com/psf/requests/blob/main/requests/sessions.py#L159
+                next_url = urlparse(url)._replace(path=parsed_target.path).geturl()
+                return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params)
+        return response
+    # Perform request and return if status_code is not in the retry list.
+    response = get_session().request(method=method, url=url, **params)
+    hf_raise_for_status(response)
+    return response
+def http_get(
+    url: str,
+    temp_file: BinaryIO,
+    *,
+    proxies: Optional[Dict] = None,
+    resume_size: float = 0,
+    headers: Optional[Dict[str, str]] = None,
+    expected_size: Optional[int] = None,
+    displayed_filename: Optional[str] = None,
+    _nb_retries: int = 5,
+    _tqdm_bar: Optional[tqdm] = None,
+) -> None:
+    """
+    Download a remote file. Do not gobble up errors, and will return errors tailored to the Hugging Face Hub.
+    If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely a
+    transient error (network outage?). We log a warning message and try to resume the download a few times before
+    giving up. The method gives up after 5 attempts if no new data has being received from the server.
+    Args:
+        url (`str`):
+            The URL of the file to download.
+        temp_file (`BinaryIO`):
+            The file-like object where to save the file.
+        proxies (`dict`, *optional*):
+            Dictionary mapping protocol to the URL of the proxy passed to `requests.request`.
+        resume_size (`float`, *optional*):
+            The number of bytes already downloaded. If set to 0 (default), the whole file is download. If set to a
+            positive number, the download will resume at the given position.
+        headers (`dict`, *optional*):
+            Dictionary of HTTP Headers to send with the request.
+        expected_size (`int`, *optional*):
+            The expected size of the file to download. If set, the download will raise an error if the size of the
+            received content is different from the expected one.
+        displayed_filename (`str`, *optional*):
+            The filename of the file that is being downloaded. Value is used only to display a nice progress bar. If
+            not set, the filename is guessed from the URL or the `Content-Disposition` header.
+    """
+    if expected_size is not None and resume_size == expected_size:
+        # If the file is already fully downloaded, we don't need to download it again.
+        return
+    hf_transfer = None
+    if constants.HF_HUB_ENABLE_HF_TRANSFER:
+        if resume_size != 0:
+            warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
+        elif proxies is not None:
+            warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method")
+        else:
+            try:
+                import hf_transfer  # type: ignore[no-redef]
+            except ImportError:
+                raise ValueError(
+                    "Fast download using 'hf_transfer' is enabled"
+                    " (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is not"
+                    " available in your environment. Try `pip install hf_transfer`."
+                )
+    initial_headers = headers
+    headers = copy.deepcopy(headers) or {}
+    if resume_size > 0:
+        headers["Range"] = "bytes=%d-" % (resume_size,)
+    r = _request_wrapper(
+        method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT
+    )
+    hf_raise_for_status(r)
+    content_length = r.headers.get("Content-Length")
+    # NOTE: 'total' is the total number of bytes to download, not the number of bytes in the file.
+    #       If the file is compressed, the number of bytes in the saved file will be higher than 'total'.
+    total = resume_size + int(content_length) if content_length is not None else None
+    if displayed_filename is None:
+        displayed_filename = url
+        content_disposition = r.headers.get("Content-Disposition")
+        if content_disposition is not None:
+            match = HEADER_FILENAME_PATTERN.search(content_disposition)
+            if match is not None:
+                # Means file is on CDN
+                displayed_filename = match.groupdict()["filename"]
+    # Truncate filename if too long to display
+    if len(displayed_filename) > 40:
+        displayed_filename = f"(…){displayed_filename[-40:]}"
+    consistency_error_message = (
+        f"Consistency check failed: file should be of size {expected_size} but has size"
+        f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
+        " Please retry with `force_download=True`."
+    )
+    # Stream file to buffer
+    progress_cm: tqdm = (
+        tqdm(  # type: ignore[assignment]
+            unit="B",
+            unit_scale=True,
+            total=total,
+            initial=resume_size,
+            desc=displayed_filename,
+            disable=is_tqdm_disabled(logger.getEffectiveLevel()),
+            name="huggingface_hub.http_get",
+        )
+        if _tqdm_bar is None
+        else contextlib.nullcontext(_tqdm_bar)
+        # ^ `contextlib.nullcontext` mimics a context manager that does nothing
+        #   Makes it easier to use the same code path for both cases but in the later
+        #   case, the progress bar is not closed when exiting the context manager.
+    )
+    with progress_cm as progress:
+        if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
+            supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
+            if not supports_callback:
+                warnings.warn(
+                    "You are using an outdated version of `hf_transfer`. "
+                    "Consider upgrading to latest version to enable progress bars "
+                    "using `pip install -U hf_transfer`."
+                )
+            try:
+                hf_transfer.download(
+                    url=url,
+                    filename=temp_file.name,
+                    max_files=constants.HF_TRANSFER_CONCURRENCY,
+                    chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
+                    headers=headers,
+                    parallel_failures=3,
+                    max_retries=5,
+                    **({"callback": progress.update} if supports_callback else {}),
+                )
+            except Exception as e:
+                raise RuntimeError(
+                    "An error occurred while downloading using `hf_transfer`. Consider"
+                    " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
+                ) from e
+            if not supports_callback:
+                progress.update(total)
+            if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
+                raise EnvironmentError(
+                    consistency_error_message.format(
+                        actual_size=os.path.getsize(temp_file.name),
+                    )
+                )
+            return
+        new_resume_size = resume_size
+        try:
+            for chunk in r.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
+                if chunk:  # filter out keep-alive new chunks
+                    progress.update(len(chunk))
+                    temp_file.write(chunk)
+                    new_resume_size += len(chunk)
+                    # Some data has been downloaded from the server so we reset the number of retries.
+                    _nb_retries = 5
+        except (requests.ConnectionError, requests.ReadTimeout) as e:
+            # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
+            # a transient error (network outage?). We log a warning message and try to resume the download a few times
+            # before giving up. Tre retry mechanism is basic but should be enough in most cases.
+            if _nb_retries <= 0:
+                logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
+                raise
+            logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
+            time.sleep(1)
+            reset_sessions()  # In case of SSLError it's best to reset the shared requests.Session objects
+            return http_get(
+                url=url,
+                temp_file=temp_file,
+                proxies=proxies,
+                resume_size=new_resume_size,
+                headers=initial_headers,
+                expected_size=expected_size,
+                _nb_retries=_nb_retries - 1,
+                _tqdm_bar=_tqdm_bar,
+            )
+    if expected_size is not None and expected_size != temp_file.tell():
+        raise EnvironmentError(
+            consistency_error_message.format(
+                actual_size=temp_file.tell(),
+            )
+        )
+def _normalize_etag(etag: Optional[str]) -> Optional[str]:
+    """Normalize ETag HTTP header, so it can be used to create nice filepaths.
+    The HTTP spec allows two forms of ETag:
+      ETag: W/"<etag_value>"
+      ETag: "<etag_value>"
+    For now, we only expect the second form from the server, but we want to be future-proof so we support both. For
+    more context, see `TestNormalizeEtag` tests and https://github.com/huggingface/huggingface_hub/pull/1428.
+    Args:
+        etag (`str`, *optional*): HTTP header
+    Returns:
+        `str` or `None`: string that can be used as a nice directory name.
+        Returns `None` if input is None.
+    """
+    if etag is None:
+        return None
+    return etag.lstrip("W/").strip('"')
+def _create_relative_symlink(src: str, dst: str, new_blob: bool = False) -> None:
+    """Alias method used in `transformers` conversion script."""
+    return _create_symlink(src=src, dst=dst, new_blob=new_blob)
+def _create_symlink(src: str, dst: str, new_blob: bool = False) -> None:
+    """Create a symbolic link named dst pointing to src.
+    By default, it will try to create a symlink using a relative path. Relative paths have 2 advantages:
+    - If the cache_folder is moved (example: back-up on a shared drive), relative paths within the cache folder will
+      not break.
+    - Relative paths seems to be better handled on Windows. Issue was reported 3 times in less than a week when
+      changing from relative to absolute paths. See https://github.com/huggingface/huggingface_hub/issues/1398,
+      https://github.com/huggingface/diffusers/issues/2729 and https://github.com/huggingface/transformers/pull/22228.
+      NOTE: The issue with absolute paths doesn't happen on admin mode.
+    When creating a symlink from the cache to a local folder, it is possible that a relative path cannot be created.
+    This happens when paths are not on the same volume. In that case, we use absolute paths.
+    The result layout looks something like
+        └── [ 128]  snapshots
+            ├── [ 128]  2439f60ef33a0d46d85da5001d52aeda5b00ce9f
+            │   ├── [  52]  README.md -> ../../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812
+            │   └── [  76]  pytorch_model.bin -> ../../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
+    If symlinks cannot be created on this platform (most likely to be Windows), the workaround is to avoid symlinks by
+    having the actual file in `dst`. If it is a new file (`new_blob=True`), we move it to `dst`. If it is not a new file
+    (`new_blob=False`), we don't know if the blob file is already referenced elsewhere. To avoid breaking existing
+    cache, the file is duplicated on the disk.
+    In case symlinks are not supported, a warning message is displayed to the user once when loading `huggingface_hub`.
+    The warning message can be disabled with the `DISABLE_SYMLINKS_WARNING` environment variable.
+    """
+    try:
+        os.remove(dst)
+    except OSError:
+        pass
+    abs_src = os.path.abspath(os.path.expanduser(src))
+    abs_dst = os.path.abspath(os.path.expanduser(dst))
+    abs_dst_folder = os.path.dirname(abs_dst)
+    # Use relative_dst in priority
+    try:
+        relative_src = os.path.relpath(abs_src, abs_dst_folder)
+    except ValueError:
+        # Raised on Windows if src and dst are not on the same volume. This is the case when creating a symlink to a
+        # local_dir instead of within the cache directory.
+        # See https://docs.python.org/3/library/os.path.html#os.path.relpath
+        relative_src = None
+    try:
+        commonpath = os.path.commonpath([abs_src, abs_dst])
+        _support_symlinks = are_symlinks_supported(commonpath)
+    except ValueError:
+        # Raised if src and dst are not on the same volume. Symlinks will still work on Linux/Macos.
+        # See https://docs.python.org/3/library/os.path.html#os.path.commonpath
+        _support_symlinks = os.name != "nt"
+    except PermissionError:
+        # Permission error means src and dst are not in the same volume (e.g. destination path has been provided
+        # by the user via `local_dir`. Let's test symlink support there)
+        _support_symlinks = are_symlinks_supported(abs_dst_folder)
+    except OSError as e:
+        # OS error (errno=30) means that the commonpath is readonly on Linux/MacOS.
+        if e.errno == errno.EROFS:
+            _support_symlinks = are_symlinks_supported(abs_dst_folder)
+        else:
+            raise
+    # Symlinks are supported => let's create a symlink.
+    if _support_symlinks:
+        src_rel_or_abs = relative_src or abs_src
+        logger.debug(f"Creating pointer from {src_rel_or_abs} to {abs_dst}")
+        try:
+            os.symlink(src_rel_or_abs, abs_dst)
+            return
+        except FileExistsError:
+            if os.path.islink(abs_dst) and os.path.realpath(abs_dst) == os.path.realpath(abs_src):
+                # `abs_dst` already exists and is a symlink to the `abs_src` blob. It is most likely that the file has
+                # been cached twice concurrently (exactly between `os.remove` and `os.symlink`). Do nothing.
+                return
+            else:
+                # Very unlikely to happen. Means a file `dst` has been created exactly between `os.remove` and
+                # `os.symlink` and is not a symlink to the `abs_src` blob file. Raise exception.
+                raise
+        except PermissionError:
+            # Permission error means src and dst are not in the same volume (e.g. download to local dir) and symlink
+            # is supported on both volumes but not between them. Let's just make a hard copy in that case.
+            pass
+    # Symlinks are not supported => let's move or copy the file.
+    if new_blob:
+        logger.info(f"Symlink not supported. Moving file from {abs_src} to {abs_dst}")
+        shutil.move(abs_src, abs_dst, copy_function=_copy_no_matter_what)
+    else:
+        logger.info(f"Symlink not supported. Copying file from {abs_src} to {abs_dst}")
+        shutil.copyfile(abs_src, abs_dst)
+def _cache_commit_hash_for_specific_revision(storage_folder: str, revision: str, commit_hash: str) -> None:
+    """Cache reference between a revision (tag, branch or truncated commit hash) and the corresponding commit hash.
+    Does nothing if `revision` is already a proper `commit_hash` or reference is already cached.
+    """
+    if revision != commit_hash:
+        ref_path = Path(storage_folder) / "refs" / revision
+        ref_path.parent.mkdir(parents=True, exist_ok=True)
+        if not ref_path.exists() or commit_hash != ref_path.read_text():
+            # Update ref only if has been updated. Could cause useless error in case
+            # repo is already cached and user doesn't have write access to cache folder.
+            # See https://github.com/huggingface/huggingface_hub/issues/1216.
+            ref_path.write_text(commit_hash)
+@validate_hf_hub_args
+def repo_folder_name(*, repo_id: str, repo_type: str) -> str:
+    """Return a serialized version of a hf.co repo name and type, safe for disk storage
+    as a single non-nested folder.
+    Example: models--julien-c--EsperBERTo-small
+    """
+    # remove all `/` occurrences to correctly convert repo to directory name
+    parts = [f"{repo_type}s", *repo_id.split("/")]
+    return constants.REPO_ID_SEPARATOR.join(parts)
+def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
+    """Check disk usage and log a warning if there is not enough disk space to download the file.
+    Args:
+        expected_size (`int`):
+            The expected size of the file in bytes.
+        target_dir (`str`):
+            The directory where the file will be stored after downloading.
+    """
+    target_dir = Path(target_dir)  # format as `Path`
+    for path in [target_dir] + list(target_dir.parents):  # first check target_dir, then each parents one by one
+        try:
+            target_dir_free = shutil.disk_usage(path).free
+            if target_dir_free < expected_size:
+                warnings.warn(
+                    "Not enough free disk space to download the file. "
+                    f"The expected file size is: {expected_size / 1e6:.2f} MB. "
+                    f"The target location {target_dir} only has {target_dir_free / 1e6:.2f} MB free disk space."
+                )
+            return
+        except OSError:  # raise on anything: file does not exist or space disk cannot be checked
+            pass
+@validate_hf_hub_args
+def hf_hub_download(
+    repo_id: str,
+    filename: str,
+    *,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    user_agent: Union[Dict, str, None] = None,
+    force_download: bool = False,
+    proxies: Optional[Dict] = None,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    token: Union[bool, str, None] = None,
+    local_files_only: bool = False,
+    headers: Optional[Dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    resume_download: Optional[bool] = None,
+    force_filename: Optional[str] = None,
+    local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
+) -> str:
+    """Download a given file if it's not already present in the local cache.
+    The new cache file layout looks like this:
+    - The cache directory contains one subfolder per repo_id (namespaced by repo type)
+    - inside each repo folder:
+        - refs is a list of the latest known revision => commit_hash pairs
+        - blobs contains the actual file blobs (identified by their git-sha or sha256, depending on
+          whether they're LFS files or not)
+        - snapshots contains one subfolder per commit, each "commit" contains the subset of the files
+          that have been resolved at that particular commit. Each filename is a symlink to the blob
+          at that particular commit.
+    ```
+    [  96]  .
+    └── [ 160]  models--julien-c--EsperBERTo-small
+        ├── [ 160]  blobs
+        │   ├── [321M]  403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
+        │   ├── [ 398]  7cb18dc9bafbfcf74629a4b760af1b160957a83e
+        │   └── [1.4K]  d7edf6bd2a681fb0175f7735299831ee1b22b812
+        ├── [  96]  refs
+        │   └── [  40]  main
+        └── [ 128]  snapshots
+            ├── [ 128]  2439f60ef33a0d46d85da5001d52aeda5b00ce9f
+            │   ├── [  52]  README.md -> ../../blobs/d7edf6bd2a681fb0175f7735299831ee1b22b812
+            │   └── [  76]  pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
+            └── [ 128]  bbc77c8132af1cc5cf678da3f1ddf2de43606d48
+                ├── [  52]  README.md -> ../../blobs/7cb18dc9bafbfcf74629a4b760af1b160957a83e
+                └── [  76]  pytorch_model.bin -> ../../blobs/403450e234d65943a7dcf7e05a771ce3c92faa84dd07db4ac20f592037a1e4bd
+    ```
+    If `local_dir` is provided, the file structure from the repo will be replicated in this location. When using this
+    option, the `cache_dir` will not be used and a `.cache/huggingface/` folder will be created at the root of `local_dir`
+    to store some metadata related to the downloaded files. While this mechanism is not as robust as the main
+    cache-system, it's optimized for regularly pulling the latest version of a repository.
+    Args:
+        repo_id (`str`):
+            A user or an organization name and a repo name separated by a `/`.
+        filename (`str`):
+            The name of the file in the repo.
+        subfolder (`str`, *optional*):
+            An optional value corresponding to a folder inside the model repo.
+        repo_type (`str`, *optional*):
+            Set to `"dataset"` or `"space"` if downloading from a dataset or space,
+            `None` or `"model"` if downloading from a model. Default is `None`.
+        revision (`str`, *optional*):
+            An optional Git revision id which can be a branch name, a tag, or a
+            commit hash.
+        library_name (`str`, *optional*):
+            The name of the library to which the object corresponds.
+        library_version (`str`, *optional*):
+            The version of the library.
+        cache_dir (`str`, `Path`, *optional*):
+            Path to the folder where cached files are stored.
+        local_dir (`str` or `Path`, *optional*):
+            If provided, the downloaded file will be placed under this directory.
+        user_agent (`dict`, `str`, *optional*):
+            The user-agent info in the form of a dictionary or a string.
+        force_download (`bool`, *optional*, defaults to `False`):
+            Whether the file should be downloaded even if it already exists in
+            the local cache.
+        proxies (`dict`, *optional*):
+            Dictionary mapping protocol to the URL of the proxy passed to
+            `requests.request`.
+        etag_timeout (`float`, *optional*, defaults to `10`):
+            When fetching ETag, how many seconds to wait for the server to send
+            data before giving up which is passed to `requests.request`.
+        token (`str`, `bool`, *optional*):
+            A token to be used for the download.
+                - If `True`, the token is read from the HuggingFace config
+                  folder.
+                - If a string, it's used as the authentication token.
+        local_files_only (`bool`, *optional*, defaults to `False`):
+            If `True`, avoid downloading the file and return the path to the
+            local cached file if it exists.
+        headers (`dict`, *optional*):
+            Additional headers to be sent with the request.
+    Returns:
+        `str`: Local path of file or if networking is off, last version of file cached on disk.
+    Raises:
+        [`~utils.RepositoryNotFoundError`]
+            If the repository to download from cannot be found. This may be because it doesn't exist,
+            or because it is set to `private` and you do not have access.
+        [`~utils.RevisionNotFoundError`]
+            If the revision to download from cannot be found.
+        [`~utils.EntryNotFoundError`]
+            If the file to download cannot be found.
+        [`~utils.LocalEntryNotFoundError`]
+            If network is disabled or unavailable and file is not found in cache.
+        [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+            If `token=True` but the token cannot be found.
+        [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
+            If ETag cannot be determined.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If some parameter value is invalid.
+    """
+    if constants.HF_HUB_ETAG_TIMEOUT != constants.DEFAULT_ETAG_TIMEOUT:
+        # Respect environment variable above user value
+        etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
+    if force_filename is not None:
+        warnings.warn(
+            "The `force_filename` parameter is deprecated as a new caching system, "
+            "which keeps the filenames as they are on the Hub, is now in place.",
+            FutureWarning,
+        )
+    if resume_download is not None:
+        warnings.warn(
+            "`resume_download` is deprecated and will be removed in version 1.0.0. "
+            "Downloads always resume when possible. "
+            "If you want to force a new download, use `force_download=True`.",
+            FutureWarning,
+        )
+    if cache_dir is None:
+        cache_dir = constants.HF_HUB_CACHE
+    if revision is None:
+        revision = constants.DEFAULT_REVISION
+    if isinstance(cache_dir, Path):
+        cache_dir = str(cache_dir)
+    if isinstance(local_dir, Path):
+        local_dir = str(local_dir)
+    if subfolder == "":
+        subfolder = None
+    if subfolder is not None:
+        # This is used to create a URL, and not a local path, hence the forward slash.
+        filename = f"{subfolder}/{filename}"
+    if repo_type is None:
+        repo_type = "model"
+    if repo_type not in constants.REPO_TYPES:
+        raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
+    hf_headers = build_hf_headers(
+        token=token,
+        library_name=library_name,
+        library_version=library_version,
+        user_agent=user_agent,
+        headers=headers,
+    )
+    if local_dir is not None:
+        if local_dir_use_symlinks != "auto":
+            warnings.warn(
+                "`local_dir_use_symlinks` parameter is deprecated and will be ignored. "
+                "The process to download files to a local folder has been updated and do "
+                "not rely on symlinks anymore. You only need to pass a destination folder "
+                "as`local_dir`.\n"
+                "For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder."
+            )
+        return _hf_hub_download_to_local_dir(
+            # Destination
+            local_dir=local_dir,
+            # File info
+            repo_id=repo_id,
+            repo_type=repo_type,
+            filename=filename,
+            revision=revision,
+            # HTTP info
+            endpoint=endpoint,
+            etag_timeout=etag_timeout,
+            headers=hf_headers,
+            proxies=proxies,
+            token=token,
+            # Additional options
+            cache_dir=cache_dir,
+            force_download=force_download,
+            local_files_only=local_files_only,
+        )
+    else:
+        return _hf_hub_download_to_cache_dir(
+            # Destination
+            cache_dir=cache_dir,
+            # File info
+            repo_id=repo_id,
+            filename=filename,
+            repo_type=repo_type,
+            revision=revision,
+            # HTTP info
+            endpoint=endpoint,
+            etag_timeout=etag_timeout,
+            headers=hf_headers,
+            proxies=proxies,
+            token=token,
+            # Additional options
+            local_files_only=local_files_only,
+            force_download=force_download,
+        )
+def _hf_hub_download_to_cache_dir(
+    *,
+    # Destination
+    cache_dir: str,
+    # File info
+    repo_id: str,
+    filename: str,
+    repo_type: str,
+    revision: str,
+    # HTTP info
+    endpoint: Optional[str],
+    etag_timeout: float,
+    headers: Dict[str, str],
+    proxies: Optional[Dict],
+    token: Optional[Union[bool, str]],
+    # Additional options
+    local_files_only: bool,
+    force_download: bool,
+) -> str:
+    """Download a given file to a cache folder, if not already present.
+    Method should not be called directly. Please use `hf_hub_download` instead.
+    """
+    locks_dir = os.path.join(cache_dir, ".locks")
+    storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type))
+    # cross platform transcription of filename, to be used as a local file path.
+    relative_filename = os.path.join(*filename.split("/"))
+    if os.name == "nt":
+        if relative_filename.startswith("..\\") or "\\..\\" in relative_filename:
+            raise ValueError(
+                f"Invalid filename: cannot handle filename '{relative_filename}' on Windows. Please ask the repository"
+                " owner to rename this file."
+            )
+    # if user provides a commit_hash and they already have the file on disk, shortcut everything.
+    if REGEX_COMMIT_HASH.match(revision):
+        pointer_path = _get_pointer_path(storage_folder, revision, relative_filename)
+        if os.path.exists(pointer_path) and not force_download:
+            return pointer_path
+    # Try to get metadata (etag, commit_hash, url, size) from the server.
+    # If we can't, a HEAD request error is returned.
+    (url_to_download, etag, commit_hash, expected_size, head_call_error) = _get_metadata_or_catch_error(
+        repo_id=repo_id,
+        filename=filename,
+        repo_type=repo_type,
+        revision=revision,
+        endpoint=endpoint,
+        proxies=proxies,
+        etag_timeout=etag_timeout,
+        headers=headers,
+        token=token,
+        local_files_only=local_files_only,
+        storage_folder=storage_folder,
+        relative_filename=relative_filename,
+    )
+    # etag can be None for several reasons:
+    # 1. we passed local_files_only.
+    # 2. we don't have a connection
+    # 3. Hub is down (HTTP 500, 503, 504)
+    # 4. repo is not found -for example private or gated- and invalid/missing token sent
+    # 5. Hub is blocked by a firewall or proxy is not set correctly.
+    # => Try to get the last downloaded one from the specified revision.
+    #
+    # If the specified revision is a commit hash, look inside "snapshots".
+    # If the specified revision is a branch or tag, look inside "refs".
+    if head_call_error is not None:
+        # Couldn't make a HEAD call => let's try to find a local file
+        if not force_download:
+            commit_hash = None
+            if REGEX_COMMIT_HASH.match(revision):
+                commit_hash = revision
+            else:
+                ref_path = os.path.join(storage_folder, "refs", revision)
+                if os.path.isfile(ref_path):
+                    with open(ref_path) as f:
+                        commit_hash = f.read()
+            # Return pointer file if exists
+            if commit_hash is not None:
+                pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
+                if os.path.exists(pointer_path) and not force_download:
+                    return pointer_path
+        # Otherwise, raise appropriate error
+        _raise_on_head_call_error(head_call_error, force_download, local_files_only)
+    # From now on, etag, commit_hash, url and size are not None.
+    assert etag is not None, "etag must have been retrieved from server"
+    assert commit_hash is not None, "commit_hash must have been retrieved from server"
+    assert url_to_download is not None, "file location must have been retrieved from server"
+    assert expected_size is not None, "expected_size must have been retrieved from server"
+    blob_path = os.path.join(storage_folder, "blobs", etag)
+    pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
+    os.makedirs(os.path.dirname(blob_path), exist_ok=True)
+    os.makedirs(os.path.dirname(pointer_path), exist_ok=True)
+    # if passed revision is not identical to commit_hash
+    # then revision has to be a branch name or tag name.
+    # In that case store a ref.
+    _cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash)
+    # If file already exists, return it (except if force_download=True)
+    if not force_download:
+        if os.path.exists(pointer_path):
+            return pointer_path
+        if os.path.exists(blob_path):
+            # we have the blob already, but not the pointer
+            _create_symlink(blob_path, pointer_path, new_blob=False)
+            return pointer_path
+    # Prevent parallel downloads of the same file with a lock.
+    # etag could be duplicated across repos,
+    lock_path = os.path.join(locks_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type), f"{etag}.lock")
+    # Some Windows versions do not allow for paths longer than 255 characters.
+    # In this case, we must specify it as an extended path by using the "\\?\" prefix.
+    if os.name == "nt" and len(os.path.abspath(lock_path)) > 255:
+        lock_path = "\\\\?\\" + os.path.abspath(lock_path)
+    if os.name == "nt" and len(os.path.abspath(blob_path)) > 255:
+        blob_path = "\\\\?\\" + os.path.abspath(blob_path)
+    Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
+    with WeakFileLock(lock_path):
+        _download_to_tmp_and_move(
+            incomplete_path=Path(blob_path + ".incomplete"),
+            destination_path=Path(blob_path),
+            url_to_download=url_to_download,
+            proxies=proxies,
+            headers=headers,
+            expected_size=expected_size,
+            filename=filename,
+            force_download=force_download,
+        )
+        if not os.path.exists(pointer_path):
+            _create_symlink(blob_path, pointer_path, new_blob=True)
+    return pointer_path
+def _hf_hub_download_to_local_dir(
+    *,
+    # Destination
+    local_dir: Union[str, Path],
+    # File info
+    repo_id: str,
+    repo_type: str,
+    filename: str,
+    revision: str,
+    # HTTP info
+    endpoint: Optional[str],
+    etag_timeout: float,
+    headers: Dict[str, str],
+    proxies: Optional[Dict],
+    token: Union[bool, str, None],
+    # Additional options
+    cache_dir: str,
+    force_download: bool,
+    local_files_only: bool,
+) -> str:
+    """Download a given file to a local folder, if not already present.
+    Method should not be called directly. Please use `hf_hub_download` instead.
+    """
+    # Some Windows versions do not allow for paths longer than 255 characters.
+    # In this case, we must specify it as an extended path by using the "\\?\" prefix.
+    if os.name == "nt" and len(os.path.abspath(local_dir)) > 255:
+        local_dir = "\\\\?\\" + os.path.abspath(local_dir)
+    local_dir = Path(local_dir)
+    paths = get_local_download_paths(local_dir=local_dir, filename=filename)
+    local_metadata = read_download_metadata(local_dir=local_dir, filename=filename)
+    # Local file exists + metadata exists + commit_hash matches => return file
+    if (
+        not force_download
+        and REGEX_COMMIT_HASH.match(revision)
+        and paths.file_path.is_file()
+        and local_metadata is not None
+        and local_metadata.commit_hash == revision
+    ):
+        return str(paths.file_path)
+    # Local file doesn't exist or commit_hash doesn't match => we need the etag
+    (url_to_download, etag, commit_hash, expected_size, head_call_error) = _get_metadata_or_catch_error(
+        repo_id=repo_id,
+        filename=filename,
+        repo_type=repo_type,
+        revision=revision,
+        endpoint=endpoint,
+        proxies=proxies,
+        etag_timeout=etag_timeout,
+        headers=headers,
+        token=token,
+        local_files_only=local_files_only,
+    )
+    if head_call_error is not None:
+        # No HEAD call but local file exists => default to local file
+        if not force_download and paths.file_path.is_file():
+            logger.warning(
+                f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})"
+            )
+            return str(paths.file_path)
+        # Otherwise => raise
+        _raise_on_head_call_error(head_call_error, force_download, local_files_only)
+    # From now on, etag, commit_hash, url and size are not None.
+    assert etag is not None, "etag must have been retrieved from server"
+    assert commit_hash is not None, "commit_hash must have been retrieved from server"
+    assert url_to_download is not None, "file location must have been retrieved from server"
+    assert expected_size is not None, "expected_size must have been retrieved from server"
+    # Local file exists => check if it's up-to-date
+    if not force_download and paths.file_path.is_file():
+        # etag matches => update metadata and return file
+        if local_metadata is not None and local_metadata.etag == etag:
+            write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
+            return str(paths.file_path)
+        # metadata is outdated + etag is a sha256
+        # => means it's an LFS file (large)
+        # => let's compute local hash and compare
+        # => if match, update metadata and return file
+        if local_metadata is None and REGEX_SHA256.match(etag) is not None:
+            with open(paths.file_path, "rb") as f:
+                file_hash = sha_fileobj(f).hex()
+            if file_hash == etag:
+                write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
+                return str(paths.file_path)
+    # Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache)
+    # If we are lucky enough, the file is already in the cache => copy it
+    if not force_download:
+        cached_path = try_to_load_from_cache(
+            repo_id=repo_id,
+            filename=filename,
+            cache_dir=cache_dir,
+            revision=commit_hash,
+            repo_type=repo_type,
+        )
+        if isinstance(cached_path, str):
+            with WeakFileLock(paths.lock_path):
+                paths.file_path.parent.mkdir(parents=True, exist_ok=True)
+                shutil.copyfile(cached_path, paths.file_path)
+            write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
+            return str(paths.file_path)
+    # Otherwise, let's download the file!
+    with WeakFileLock(paths.lock_path):
+        paths.file_path.unlink(missing_ok=True)  # delete outdated file first
+        _download_to_tmp_and_move(
+            incomplete_path=paths.incomplete_path(etag),
+            destination_path=paths.file_path,
+            url_to_download=url_to_download,
+            proxies=proxies,
+            headers=headers,
+            expected_size=expected_size,
+            filename=filename,
+            force_download=force_download,
+        )
+    write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
+    return str(paths.file_path)
+@validate_hf_hub_args
+def try_to_load_from_cache(
+    repo_id: str,
+    filename: str,
+    cache_dir: Union[str, Path, None] = None,
+    revision: Optional[str] = None,
+    repo_type: Optional[str] = None,
+) -> Union[str, _CACHED_NO_EXIST_T, None]:
+    """
+    Explores the cache to return the latest cached file for a given revision if found.
+    This function will not raise any exception if the file in not cached.
+    Args:
+        cache_dir (`str` or `os.PathLike`):
+            The folder where the cached files lie.
+        repo_id (`str`):
+            The ID of the repo on huggingface.co.
+        filename (`str`):
+            The filename to look for inside `repo_id`.
+        revision (`str`, *optional*):
+            The specific model version to use. Will default to `"main"` if it's not provided and no `commit_hash` is
+            provided either.
+        repo_type (`str`, *optional*):
+            The type of the repository. Will default to `"model"`.
+    Returns:
+        `Optional[str]` or `_CACHED_NO_EXIST`:
+            Will return `None` if the file was not cached. Otherwise:
+            - The exact path to the cached file if it's found in the cache
+            - A special value `_CACHED_NO_EXIST` if the file does not exist at the given commit hash and this fact was
+              cached.
+    Example:
+    ```python
+    from huggingface_hub import try_to_load_from_cache, _CACHED_NO_EXIST
+    filepath = try_to_load_from_cache()
+    if isinstance(filepath, str):
+        # file exists and is cached
+        ...
+    elif filepath is _CACHED_NO_EXIST:
+        # non-existence of file is cached
+        ...
+    else:
+        # file is not cached
+        ...
+    ```
+    """
+    if revision is None:
+        revision = "main"
+    if repo_type is None:
+        repo_type = "model"
+    if repo_type not in constants.REPO_TYPES:
+        raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(constants.REPO_TYPES)}")
+    if cache_dir is None:
+        cache_dir = constants.HF_HUB_CACHE
+    object_id = repo_id.replace("/", "--")
+    repo_cache = os.path.join(cache_dir, f"{repo_type}s--{object_id}")
+    if not os.path.isdir(repo_cache):
+        # No cache for this model
+        return None
+    refs_dir = os.path.join(repo_cache, "refs")
+    snapshots_dir = os.path.join(repo_cache, "snapshots")
+    no_exist_dir = os.path.join(repo_cache, ".no_exist")
+    # Resolve refs (for instance to convert main to the associated commit sha)
+    if os.path.isdir(refs_dir):
+        revision_file = os.path.join(refs_dir, revision)
+        if os.path.isfile(revision_file):
+            with open(revision_file) as f:
+                revision = f.read()
+    # Check if file is cached as "no_exist"
+    if os.path.isfile(os.path.join(no_exist_dir, revision, filename)):
+        return _CACHED_NO_EXIST
+    # Check if revision folder exists
+    if not os.path.exists(snapshots_dir):
+        return None
+    cached_shas = os.listdir(snapshots_dir)
+    if revision not in cached_shas:
+        # No cache for this revision and we won't try to return a random revision
+        return None
+    # Check if file exists in cache
+    cached_file = os.path.join(snapshots_dir, revision, filename)
+    return cached_file if os.path.isfile(cached_file) else None
+@validate_hf_hub_args
+def get_hf_file_metadata(
+    url: str,
+    token: Union[bool, str, None] = None,
+    proxies: Optional[Dict] = None,
+    timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Union[Dict, str, None] = None,
+    headers: Optional[Dict[str, str]] = None,
+) -> HfFileMetadata:
+    """Fetch metadata of a file versioned on the Hub for a given url.
+    Args:
+        url (`str`):
+            File url, for example returned by [`hf_hub_url`].
+        token (`str` or `bool`, *optional*):
+            A token to be used for the download.
+                - If `True`, the token is read from the HuggingFace config
+                  folder.
+                - If `False` or `None`, no token is provided.
+                - If a string, it's used as the authentication token.
+        proxies (`dict`, *optional*):
+            Dictionary mapping protocol to the URL of the proxy passed to
+            `requests.request`.
+        timeout (`float`, *optional*, defaults to 10):
+            How many seconds to wait for the server to send metadata before giving up.
+        library_name (`str`, *optional*):
+            The name of the library to which the object corresponds.
+        library_version (`str`, *optional*):
+            The version of the library.
+        user_agent (`dict`, `str`, *optional*):
+            The user-agent info in the form of a dictionary or a string.
+        headers (`dict`, *optional*):
+            Additional headers to be sent with the request.
+    Returns:
+        A [`HfFileMetadata`] object containing metadata such as location, etag, size and
+        commit_hash.
+    """
+    hf_headers = build_hf_headers(
+        token=token,
+        library_name=library_name,
+        library_version=library_version,
+        user_agent=user_agent,
+        headers=headers,
+    )
+    hf_headers["Accept-Encoding"] = "identity"  # prevent any compression => we want to know the real size of the file
+    # Retrieve metadata
+    r = _request_wrapper(
+        method="HEAD",
+        url=url,
+        headers=hf_headers,
+        allow_redirects=False,
+        follow_relative_redirects=True,
+        proxies=proxies,
+        timeout=timeout,
+    )
+    hf_raise_for_status(r)
+    # Return
+    return HfFileMetadata(
+        commit_hash=r.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
+        # We favor a custom header indicating the etag of the linked resource, and
+        # we fallback to the regular etag header.
+        etag=_normalize_etag(r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
+        # Either from response headers (if redirected) or defaults to request url
+        # Do not use directly `url`, as `_request_wrapper` might have followed relative
+        # redirects.
+        location=r.headers.get("Location") or r.request.url,  # type: ignore
+        size=_int_or_none(
+            r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length")
+        ),
+    )
+def _get_metadata_or_catch_error(
+    *,
+    repo_id: str,
+    filename: str,
+    repo_type: str,
+    revision: str,
+    endpoint: Optional[str],
+    proxies: Optional[Dict],
+    etag_timeout: Optional[float],
+    headers: Dict[str, str],  # mutated inplace!
+    token: Union[bool, str, None],
+    local_files_only: bool,
+    relative_filename: Optional[str] = None,  # only used to store `.no_exists` in cache
+    storage_folder: Optional[str] = None,  # only used to store `.no_exists` in cache
+) -> Union[
+    # Either an exception is caught and returned
+    Tuple[None, None, None, None, Exception],
+    # Or the metadata is returned as
+    # `(url_to_download, etag, commit_hash, expected_size, None)`
+    Tuple[str, str, str, int, None],
+]:
+    """Get metadata for a file on the Hub, safely handling network issues.
+    Returns either the etag, commit_hash and expected size of the file, or the error
+    raised while fetching the metadata.
+    NOTE: This function mutates `headers` inplace! It removes the `authorization` header
+          if the file is a LFS blob and the domain of the url is different from the
+          domain of the location (typically an S3 bucket).
+    """
+    if local_files_only:
+        return (
+            None,
+            None,
+            None,
+            None,
+            OfflineModeIsEnabled(
+                f"Cannot access file since 'local_files_only=True' as been set. (repo_id: {repo_id}, repo_type: {repo_type}, revision: {revision}, filename: {filename})"
+            ),
+        )
+    url = hf_hub_url(repo_id, filename, repo_type=repo_type, revision=revision, endpoint=endpoint)
+    url_to_download: str = url
+    etag: Optional[str] = None
+    commit_hash: Optional[str] = None
+    expected_size: Optional[int] = None
+    head_error_call: Optional[Exception] = None
+    # Try to get metadata from the server.
+    # Do not raise yet if the file is not found or not accessible.
+    if not local_files_only:
+        try:
+            try:
+                metadata = get_hf_file_metadata(
+                    url=url, proxies=proxies, timeout=etag_timeout, headers=headers, token=token
+                )
+            except EntryNotFoundError as http_error:
+                if storage_folder is not None and relative_filename is not None:
+                    # Cache the non-existence of the file
+                    commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT)
+                    if commit_hash is not None:
+                        no_exist_file_path = Path(storage_folder) / ".no_exist" / commit_hash / relative_filename
+                        try:
+                            no_exist_file_path.parent.mkdir(parents=True, exist_ok=True)
+                            no_exist_file_path.touch()
+                        except OSError as e:
+                            logger.error(
+                                f"Could not cache non-existence of file. Will ignore error and continue. Error: {e}"
+                            )
+                        _cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash)
+                raise
+            # Commit hash must exist
+            commit_hash = metadata.commit_hash
+            if commit_hash is None:
+                raise FileMetadataError(
+                    "Distant resource does not seem to be on huggingface.co. It is possible that a configuration issue"
+                    " prevents you from downloading resources from https://huggingface.co. Please check your firewall"
+                    " and proxy settings and make sure your SSL certificates are updated."
+                )
+            # Etag must exist
+            # If we don't have any of those, raise an error.
+            etag = metadata.etag
+            if etag is None:
+                raise FileMetadataError(
+                    "Distant resource does not have an ETag, we won't be able to reliably ensure reproducibility."
+                )
+            # Size must exist
+            expected_size = metadata.size
+            if expected_size is None:
+                raise FileMetadataError("Distant resource does not have a Content-Length.")
+            # In case of a redirect, save an extra redirect on the request.get call,
+            # and ensure we download the exact atomic version even if it changed
+            # between the HEAD and the GET (unlikely, but hey).
+            #
+            # If url domain is different => we are downloading from a CDN => url is signed => don't send auth
+            # If url domain is the same => redirect due to repo rename AND downloading a regular file => keep auth
+            if url != metadata.location:
+                url_to_download = metadata.location
+                if urlparse(url).netloc != urlparse(metadata.location).netloc:
+                    # Remove authorization header when downloading a LFS blob
+                    headers.pop("authorization", None)
+        except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
+            # Actually raise for those subclasses of ConnectionError
+            raise
+        except (
+            requests.exceptions.ConnectionError,
+            requests.exceptions.Timeout,
+            OfflineModeIsEnabled,
+        ) as error:
+            # Otherwise, our Internet connection is down.
+            # etag is None
+            head_error_call = error
+        except (RevisionNotFoundError, EntryNotFoundError):
+            # The repo was found but the revision or entry doesn't exist on the Hub (never existed or got deleted)
+            raise
+        except requests.HTTPError as error:
+            # Multiple reasons for an http error:
+            # - Repository is private and invalid/missing token sent
+            # - Repository is gated and invalid/missing token sent
+            # - Hub is down (error 500 or 504)
+            # => let's switch to 'local_files_only=True' to check if the files are already cached.
+            #    (if it's not the case, the error will be re-raised)
+            head_error_call = error
+        except FileMetadataError as error:
+            # Multiple reasons for a FileMetadataError:
+            # - Wrong network configuration (proxy, firewall, SSL certificates)
+            # - Inconsistency on the Hub
+            # => let's switch to 'local_files_only=True' to check if the files are already cached.
+            #    (if it's not the case, the error will be re-raised)
+            head_error_call = error
+    if not (local_files_only or etag is not None or head_error_call is not None):
+        raise RuntimeError("etag is empty due to uncovered problems")
+    return (url_to_download, etag, commit_hash, expected_size, head_error_call)  # type: ignore [return-value]
+def _raise_on_head_call_error(head_call_error: Exception, force_download: bool, local_files_only: bool) -> NoReturn:
+    """Raise an appropriate error when the HEAD call failed and we cannot locate a local file."""
+    # No head call => we cannot force download.
+    if force_download:
+        if local_files_only:
+            raise ValueError("Cannot pass 'force_download=True' and 'local_files_only=True' at the same time.")
+        elif isinstance(head_call_error, OfflineModeIsEnabled):
+            raise ValueError("Cannot pass 'force_download=True' when offline mode is enabled.") from head_call_error
+        else:
+            raise ValueError("Force download failed due to the above error.") from head_call_error
+    # No head call + couldn't find an appropriate file on disk => raise an error.
+    if local_files_only:
+        raise LocalEntryNotFoundError(
+            "Cannot find the requested files in the disk cache and outgoing traffic has been disabled. To enable"
+            " hf.co look-ups and downloads online, set 'local_files_only' to False."
+        )
+    elif isinstance(head_call_error, RepositoryNotFoundError) or isinstance(head_call_error, GatedRepoError):
+        # Repo not found or gated => let's raise the actual error
+        raise head_call_error
+    else:
+        # Otherwise: most likely a connection issue or Hub downtime => let's warn the user
+        raise LocalEntryNotFoundError(
+            "An error happened while trying to locate the file on the Hub and we cannot find the requested files"
+            " in the local cache. Please check your connection and try again or make sure your Internet connection"
+            " is on."
+        ) from head_call_error
+def _download_to_tmp_and_move(
+    incomplete_path: Path,
+    destination_path: Path,
+    url_to_download: str,
+    proxies: Optional[Dict],
+    headers: Dict[str, str],
+    expected_size: Optional[int],
+    filename: str,
+    force_download: bool,
+) -> None:
+    """Download content from a URL to a destination path.
+    Internal logic:
+    - return early if file is already downloaded
+    - resume download if possible (from incomplete file)
+    - do not resume download if `force_download=True` or `HF_HUB_ENABLE_HF_TRANSFER=True`
+    - check disk space before downloading
+    - download content to a temporary file
+    - set correct permissions on temporary file
+    - move the temporary file to the destination path
+    Both `incomplete_path` and `destination_path` must be on the same volume to avoid a local copy.
+    """
+    if destination_path.exists() and not force_download:
+        # Do nothing if already exists (except if force_download=True)
+        return
+    if incomplete_path.exists() and (force_download or (constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies)):
+        # By default, we will try to resume the download if possible.
+        # However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should
+        # not resume the download => delete the incomplete file.
+        message = f"Removing incomplete file '{incomplete_path}'"
+        if force_download:
+            message += " (force_download=True)"
+        elif constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies:
+            message += " (hf_transfer=True)"
+        logger.info(message)
+        incomplete_path.unlink(missing_ok=True)
+    with incomplete_path.open("ab") as f:
+        resume_size = f.tell()
+        message = f"Downloading '{filename}' to '{incomplete_path}'"
+        if resume_size > 0 and expected_size is not None:
+            message += f" (resume from {resume_size}/{expected_size})"
+        logger.info(message)
+        if expected_size is not None:  # might be None if HTTP header not set correctly
+            # Check disk space in both tmp and destination path
+            _check_disk_space(expected_size, incomplete_path.parent)
+            _check_disk_space(expected_size, destination_path.parent)
+        http_get(
+            url_to_download,
+            f,
+            proxies=proxies,
+            resume_size=resume_size,
+            headers=headers,
+            expected_size=expected_size,
+        )
+    logger.info(f"Download complete. Moving file to {destination_path}")
+    _chmod_and_move(incomplete_path, destination_path)
+def _int_or_none(value: Optional[str]) -> Optional[int]:
+    try:
+        return int(value)  # type: ignore
+    except (TypeError, ValueError):
+        return None
+def _chmod_and_move(src: Path, dst: Path) -> None:
+    """Set correct permission before moving a blob from tmp directory to cache dir.
+    Do not take into account the `umask` from the process as there is no convenient way
+    to get it that is thread-safe.
+    See:
+    - About umask: https://docs.python.org/3/library/os.html#os.umask
+    - Thread-safety: https://stackoverflow.com/a/70343066
+    - About solution: https://github.com/huggingface/huggingface_hub/pull/1220#issuecomment-1326211591
+    - Fix issue: https://github.com/huggingface/huggingface_hub/issues/1141
+    - Fix issue: https://github.com/huggingface/huggingface_hub/issues/1215
+    """
+    # Get umask by creating a temporary file in the cached repo folder.
+    tmp_file = dst.parent.parent / f"tmp_{uuid.uuid4()}"
+    try:
+        tmp_file.touch()
+        cache_dir_mode = Path(tmp_file).stat().st_mode
+        os.chmod(str(src), stat.S_IMODE(cache_dir_mode))
+    except OSError as e:
+        logger.warning(
+            f"Could not set the permissions on the file '{src}'. Error: {e}.\nContinuing without setting permissions."
+        )
+    finally:
+        try:
+            tmp_file.unlink()
+        except OSError:
+            # fails if `tmp_file.touch()` failed => do nothing
+            # See https://github.com/huggingface/huggingface_hub/issues/2359
+            pass
+    shutil.move(str(src), str(dst), copy_function=_copy_no_matter_what)
+def _copy_no_matter_what(src: str, dst: str) -> None:
+    """Copy file from src to dst.
+    If `shutil.copy2` fails, fallback to `shutil.copyfile`.
+    """
+    try:
+        # Copy file with metadata and permission
+        # Can fail e.g. if dst is an S3 mount
+        shutil.copy2(src, dst)
+    except OSError:
+        # Copy only file content
+        shutil.copyfile(src, dst)
+def _get_pointer_path(storage_folder: str, revision: str, relative_filename: str) -> str:
+    # Using `os.path.abspath` instead of `Path.resolve()` to avoid resolving symlinks
+    snapshot_path = os.path.join(storage_folder, "snapshots")
+    pointer_path = os.path.join(snapshot_path, revision, relative_filename)
+    if Path(os.path.abspath(snapshot_path)) not in Path(os.path.abspath(pointer_path)).parents:
+        raise ValueError(
+            "Invalid pointer path: cannot create pointer path in snapshot folder if"
+            f" `storage_folder='{storage_folder}'`, `revision='{revision}'` and"
+            f" `relative_filename='{relative_filename}'`."
+        )
+    return pointer_path

.venv/lib/python3.11/site-packages/huggingface_hub/hf_api.py ADDED Viewed

The diff for this file is too large to render. See raw diff

.venv/lib/python3.11/site-packages/huggingface_hub/hf_file_system.py ADDED Viewed

	@@ -0,0 +1,1140 @@

+import os
+import re
+import tempfile
+from collections import deque
+from dataclasses import dataclass, field
+from datetime import datetime
+from itertools import chain
+from pathlib import Path
+from typing import Any, Dict, Iterator, List, NoReturn, Optional, Tuple, Union
+from urllib.parse import quote, unquote
+import fsspec
+from fsspec.callbacks import _DEFAULT_CALLBACK, NoOpCallback, TqdmCallback
+from fsspec.utils import isfilelike
+from requests import Response
+from . import constants
+from ._commit_api import CommitOperationCopy, CommitOperationDelete
+from .errors import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
+from .file_download import hf_hub_url, http_get
+from .hf_api import HfApi, LastCommitInfo, RepoFile
+from .utils import HFValidationError, hf_raise_for_status, http_backoff
+# Regex used to match special revisions with "/" in them (see #1710)
+SPECIAL_REFS_REVISION_REGEX = re.compile(
+    r"""
+    (^refs\/convert\/\w+)     # `refs/convert/parquet` revisions
+    |
+    (^refs\/pr\/\d+)          # PR revisions
+    """,
+    re.VERBOSE,
+)
+@dataclass
+class HfFileSystemResolvedPath:
+    """Data structure containing information about a resolved Hugging Face file system path."""
+    repo_type: str
+    repo_id: str
+    revision: str
+    path_in_repo: str
+    # The part placed after '@' in the initial path. It can be a quoted or unquoted refs revision.
+    # Used to reconstruct the unresolved path to return to the user.
+    _raw_revision: Optional[str] = field(default=None, repr=False)
+    def unresolve(self) -> str:
+        repo_path = constants.REPO_TYPES_URL_PREFIXES.get(self.repo_type, "") + self.repo_id
+        if self._raw_revision:
+            return f"{repo_path}@{self._raw_revision}/{self.path_in_repo}".rstrip("/")
+        elif self.revision != constants.DEFAULT_REVISION:
+            return f"{repo_path}@{safe_revision(self.revision)}/{self.path_in_repo}".rstrip("/")
+        else:
+            return f"{repo_path}/{self.path_in_repo}".rstrip("/")
+class HfFileSystem(fsspec.AbstractFileSystem):
+    """
+    Access a remote Hugging Face Hub repository as if were a local file system.
+    <Tip warning={true}>
+        [`HfFileSystem`] provides fsspec compatibility, which is useful for libraries that require it (e.g., reading
+        Hugging Face datasets directly with `pandas`). However, it introduces additional overhead due to this compatibility
+        layer. For better performance and reliability, it's recommended to use `HfApi` methods when possible.
+    </Tip>
+    Args:
+        token (`str` or `bool`, *optional*):
+            A valid user access token (string). Defaults to the locally saved
+            token, which is the recommended method for authentication (see
+            https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
+            To disable authentication, pass `False`.
+        endpoint (`str`, *optional*):
+            Endpoint of the Hub. Defaults to <https://huggingface.co>.
+    Usage:
+    ```python
+    >>> from huggingface_hub import HfFileSystem
+    >>> fs = HfFileSystem()
+    >>> # List files
+    >>> fs.glob("my-username/my-model/*.bin")
+    ['my-username/my-model/pytorch_model.bin']
+    >>> fs.ls("datasets/my-username/my-dataset", detail=False)
+    ['datasets/my-username/my-dataset/.gitattributes', 'datasets/my-username/my-dataset/README.md', 'datasets/my-username/my-dataset/data.json']
+    >>> # Read/write files
+    >>> with fs.open("my-username/my-model/pytorch_model.bin") as f:
+    ...     data = f.read()
+    >>> with fs.open("my-username/my-model/pytorch_model.bin", "wb") as f:
+    ...     f.write(data)
+    ```
+    """
+    root_marker = ""
+    protocol = "hf"
+    def __init__(
+        self,
+        *args,
+        endpoint: Optional[str] = None,
+        token: Union[bool, str, None] = None,
+        **storage_options,
+    ):
+        super().__init__(*args, **storage_options)
+        self.endpoint = endpoint or constants.ENDPOINT
+        self.token = token
+        self._api = HfApi(endpoint=endpoint, token=token)
+        # Maps (repo_type, repo_id, revision) to a 2-tuple with:
+        #  * the 1st element indicating whether the repositoy and the revision exist
+        #  * the 2nd element being the exception raised if the repository or revision doesn't exist
+        self._repo_and_revision_exists_cache: Dict[
+            Tuple[str, str, Optional[str]], Tuple[bool, Optional[Exception]]
+        ] = {}
+    def _repo_and_revision_exist(
+        self, repo_type: str, repo_id: str, revision: Optional[str]
+    ) -> Tuple[bool, Optional[Exception]]:
+        if (repo_type, repo_id, revision) not in self._repo_and_revision_exists_cache:
+            try:
+                self._api.repo_info(
+                    repo_id, revision=revision, repo_type=repo_type, timeout=constants.HF_HUB_ETAG_TIMEOUT
+                )
+            except (RepositoryNotFoundError, HFValidationError) as e:
+                self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = False, e
+                self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = False, e
+            except RevisionNotFoundError as e:
+                self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = False, e
+                self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = True, None
+            else:
+                self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = True, None
+                self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = True, None
+        return self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)]
+    def resolve_path(self, path: str, revision: Optional[str] = None) -> HfFileSystemResolvedPath:
+        """
+        Resolve a Hugging Face file system path into its components.
+        Args:
+            path (`str`):
+                Path to resolve.
+            revision (`str`, *optional*):
+                The revision of the repo to resolve. Defaults to the revision specified in the path.
+        Returns:
+            [`HfFileSystemResolvedPath`]: Resolved path information containing `repo_type`, `repo_id`, `revision` and `path_in_repo`.
+        Raises:
+            `ValueError`:
+                If path contains conflicting revision information.
+            `NotImplementedError`:
+                If trying to list repositories.
+        """
+        def _align_revision_in_path_with_revision(
+            revision_in_path: Optional[str], revision: Optional[str]
+        ) -> Optional[str]:
+            if revision is not None:
+                if revision_in_path is not None and revision_in_path != revision:
+                    raise ValueError(
+                        f'Revision specified in path ("{revision_in_path}") and in `revision` argument ("{revision}")'
+                        " are not the same."
+                    )
+            else:
+                revision = revision_in_path
+            return revision
+        path = self._strip_protocol(path)
+        if not path:
+            # can't list repositories at root
+            raise NotImplementedError("Access to repositories lists is not implemented.")
+        elif path.split("/")[0] + "/" in constants.REPO_TYPES_URL_PREFIXES.values():
+            if "/" not in path:
+                # can't list repositories at the repository type level
+                raise NotImplementedError("Access to repositories lists is not implemented.")
+            repo_type, path = path.split("/", 1)
+            repo_type = constants.REPO_TYPES_MAPPING[repo_type]
+        else:
+            repo_type = constants.REPO_TYPE_MODEL
+        if path.count("/") > 0:
+            if "@" in path:
+                repo_id, revision_in_path = path.split("@", 1)
+                if "/" in revision_in_path:
+                    match = SPECIAL_REFS_REVISION_REGEX.search(revision_in_path)
+                    if match is not None and revision in (None, match.group()):
+                        # Handle `refs/convert/parquet` and PR revisions separately
+                        path_in_repo = SPECIAL_REFS_REVISION_REGEX.sub("", revision_in_path).lstrip("/")
+                        revision_in_path = match.group()
+                    else:
+                        revision_in_path, path_in_repo = revision_in_path.split("/", 1)
+                else:
+                    path_in_repo = ""
+                revision = _align_revision_in_path_with_revision(unquote(revision_in_path), revision)
+                repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
+                if not repo_and_revision_exist:
+                    _raise_file_not_found(path, err)
+            else:
+                revision_in_path = None
+                repo_id_with_namespace = "/".join(path.split("/")[:2])
+                path_in_repo_with_namespace = "/".join(path.split("/")[2:])
+                repo_id_without_namespace = path.split("/")[0]
+                path_in_repo_without_namespace = "/".join(path.split("/")[1:])
+                repo_id = repo_id_with_namespace
+                path_in_repo = path_in_repo_with_namespace
+                repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
+                if not repo_and_revision_exist:
+                    if isinstance(err, (RepositoryNotFoundError, HFValidationError)):
+                        repo_id = repo_id_without_namespace
+                        path_in_repo = path_in_repo_without_namespace
+                        repo_and_revision_exist, _ = self._repo_and_revision_exist(repo_type, repo_id, revision)
+                        if not repo_and_revision_exist:
+                            _raise_file_not_found(path, err)
+                    else:
+                        _raise_file_not_found(path, err)
+        else:
+            repo_id = path
+            path_in_repo = ""
+            if "@" in path:
+                repo_id, revision_in_path = path.split("@", 1)
+                revision = _align_revision_in_path_with_revision(unquote(revision_in_path), revision)
+            else:
+                revision_in_path = None
+            repo_and_revision_exist, _ = self._repo_and_revision_exist(repo_type, repo_id, revision)
+            if not repo_and_revision_exist:
+                raise NotImplementedError("Access to repositories lists is not implemented.")
+        revision = revision if revision is not None else constants.DEFAULT_REVISION
+        return HfFileSystemResolvedPath(repo_type, repo_id, revision, path_in_repo, _raw_revision=revision_in_path)
+    def invalidate_cache(self, path: Optional[str] = None) -> None:
+        """
+        Clear the cache for a given path.
+        For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.invalidate_cache).
+        Args:
+            path (`str`, *optional*):
+                Path to clear from cache. If not provided, clear the entire cache.
+        """
+        if not path:
+            self.dircache.clear()
+            self._repo_and_revision_exists_cache.clear()
+        else:
+            resolved_path = self.resolve_path(path)
+            path = resolved_path.unresolve()
+            while path:
+                self.dircache.pop(path, None)
+                path = self._parent(path)
+            # Only clear repo cache if path is to repo root
+            if not resolved_path.path_in_repo:
+                self._repo_and_revision_exists_cache.pop((resolved_path.repo_type, resolved_path.repo_id, None), None)
+                self._repo_and_revision_exists_cache.pop(
+                    (resolved_path.repo_type, resolved_path.repo_id, resolved_path.revision), None
+                )
+    def _open(
+        self,
+        path: str,
+        mode: str = "rb",
+        revision: Optional[str] = None,
+        block_size: Optional[int] = None,
+        **kwargs,
+    ) -> "HfFileSystemFile":
+        if "a" in mode:
+            raise NotImplementedError("Appending to remote files is not yet supported.")
+        if block_size == 0:
+            return HfFileSystemStreamFile(self, path, mode=mode, revision=revision, block_size=block_size, **kwargs)
+        else:
+            return HfFileSystemFile(self, path, mode=mode, revision=revision, block_size=block_size, **kwargs)
+    def _rm(self, path: str, revision: Optional[str] = None, **kwargs) -> None:
+        resolved_path = self.resolve_path(path, revision=revision)
+        self._api.delete_file(
+            path_in_repo=resolved_path.path_in_repo,
+            repo_id=resolved_path.repo_id,
+            token=self.token,
+            repo_type=resolved_path.repo_type,
+            revision=resolved_path.revision,
+            commit_message=kwargs.get("commit_message"),
+            commit_description=kwargs.get("commit_description"),
+        )
+        self.invalidate_cache(path=resolved_path.unresolve())
+    def rm(
+        self,
+        path: str,
+        recursive: bool = False,
+        maxdepth: Optional[int] = None,
+        revision: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """
+        Delete files from a repository.
+        For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.rm).
+        <Tip warning={true}>
+            Note: When possible, use `HfApi.delete_file()` for better performance.
+        </Tip>
+        Args:
+            path (`str`):
+                Path to delete.
+            recursive (`bool`, *optional*):
+                If True, delete directory and all its contents. Defaults to False.
+            maxdepth (`int`, *optional*):
+                Maximum number of subdirectories to visit when deleting recursively.
+            revision (`str`, *optional*):
+                The git revision to delete from.
+        """
+        resolved_path = self.resolve_path(path, revision=revision)
+        paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth, revision=revision)
+        paths_in_repo = [self.resolve_path(path).path_in_repo for path in paths if not self.isdir(path)]
+        operations = [CommitOperationDelete(path_in_repo=path_in_repo) for path_in_repo in paths_in_repo]
+        commit_message = f"Delete {path} "
+        commit_message += "recursively " if recursive else ""
+        commit_message += f"up to depth {maxdepth} " if maxdepth is not None else ""
+        # TODO: use `commit_description` to list all the deleted paths?
+        self._api.create_commit(
+            repo_id=resolved_path.repo_id,
+            repo_type=resolved_path.repo_type,
+            token=self.token,
+            operations=operations,
+            revision=resolved_path.revision,
+            commit_message=kwargs.get("commit_message", commit_message),
+            commit_description=kwargs.get("commit_description"),
+        )
+        self.invalidate_cache(path=resolved_path.unresolve())
+    def ls(
+        self, path: str, detail: bool = True, refresh: bool = False, revision: Optional[str] = None, **kwargs
+    ) -> List[Union[str, Dict[str, Any]]]:
+        """
+        List the contents of a directory.
+        For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.ls).
+        <Tip warning={true}>
+            Note: When possible, use `HfApi.list_repo_tree()` for better performance.
+        </Tip>
+        Args:
+            path (`str`):
+                Path to the directory.
+            detail (`bool`, *optional*):
+                If True, returns a list of dictionaries containing file information. If False,
+                returns a list of file paths. Defaults to True.
+            refresh (`bool`, *optional*):
+                If True, bypass the cache and fetch the latest data. Defaults to False.
+            revision (`str`, *optional*):
+                The git revision to list from.
+        Returns:
+            `List[Union[str, Dict[str, Any]]]`: List of file paths (if detail=False) or list of file information
+            dictionaries (if detail=True).
+        """
+        resolved_path = self.resolve_path(path, revision=revision)
+        path = resolved_path.unresolve()
+        kwargs = {"expand_info": detail, **kwargs}
+        try:
+            out = self._ls_tree(path, refresh=refresh, revision=revision, **kwargs)
+        except EntryNotFoundError:
+            # Path could be a file
+            if not resolved_path.path_in_repo:
+                _raise_file_not_found(path, None)
+            out = self._ls_tree(self._parent(path), refresh=refresh, revision=revision, **kwargs)
+            out = [o for o in out if o["name"] == path]
+            if len(out) == 0:
+                _raise_file_not_found(path, None)
+        return out if detail else [o["name"] for o in out]
+    def _ls_tree(
+        self,
+        path: str,
+        recursive: bool = False,
+        refresh: bool = False,
+        revision: Optional[str] = None,
+        expand_info: bool = True,
+    ):
+        resolved_path = self.resolve_path(path, revision=revision)
+        path = resolved_path.unresolve()
+        root_path = HfFileSystemResolvedPath(
+            resolved_path.repo_type,
+            resolved_path.repo_id,
+            resolved_path.revision,
+            path_in_repo="",
+            _raw_revision=resolved_path._raw_revision,
+        ).unresolve()
+        out = []
+        if path in self.dircache and not refresh:
+            cached_path_infos = self.dircache[path]
+            out.extend(cached_path_infos)
+            dirs_not_in_dircache = []
+            if recursive:
+                # Use BFS to traverse the cache and build the "recursive "output
+                # (The Hub uses a so-called "tree first" strategy for the tree endpoint but we sort the output to follow the spec so the result is (eventually) the same)
+                dirs_to_visit = deque(
+                    [path_info for path_info in cached_path_infos if path_info["type"] == "directory"]
+                )
+                while dirs_to_visit:
+                    dir_info = dirs_to_visit.popleft()
+                    if dir_info["name"] not in self.dircache:
+                        dirs_not_in_dircache.append(dir_info["name"])
+                    else:
+                        cached_path_infos = self.dircache[dir_info["name"]]
+                        out.extend(cached_path_infos)
+                        dirs_to_visit.extend(
+                            [path_info for path_info in cached_path_infos if path_info["type"] == "directory"]
+                        )
+            dirs_not_expanded = []
+            if expand_info:
+                # Check if there are directories with non-expanded entries
+                dirs_not_expanded = [self._parent(o["name"]) for o in out if o["last_commit"] is None]
+            if (recursive and dirs_not_in_dircache) or (expand_info and dirs_not_expanded):
+                # If the dircache is incomplete, find the common path of the missing and non-expanded entries
+                # and extend the output with the result of `_ls_tree(common_path, recursive=True)`
+                common_prefix = os.path.commonprefix(dirs_not_in_dircache + dirs_not_expanded)
+                # Get the parent directory if the common prefix itself is not a directory
+                common_path = (
+                    common_prefix.rstrip("/")
+                    if common_prefix.endswith("/")
+                    or common_prefix == root_path
+                    or common_prefix in chain(dirs_not_in_dircache, dirs_not_expanded)
+                    else self._parent(common_prefix)
+                )
+                out = [o for o in out if not o["name"].startswith(common_path + "/")]
+                for cached_path in self.dircache:
+                    if cached_path.startswith(common_path + "/"):
+                        self.dircache.pop(cached_path, None)
+                self.dircache.pop(common_path, None)
+                out.extend(
+                    self._ls_tree(
+                        common_path,
+                        recursive=recursive,
+                        refresh=True,
+                        revision=revision,
+                        expand_info=expand_info,
+                    )
+                )
+        else:
+            tree = self._api.list_repo_tree(
+                resolved_path.repo_id,
+                resolved_path.path_in_repo,
+                recursive=recursive,
+                expand=expand_info,
+                revision=resolved_path.revision,
+                repo_type=resolved_path.repo_type,
+            )
+            for path_info in tree:
+                if isinstance(path_info, RepoFile):
+                    cache_path_info = {
+                        "name": root_path + "/" + path_info.path,
+                        "size": path_info.size,
+                        "type": "file",
+                        "blob_id": path_info.blob_id,
+                        "lfs": path_info.lfs,
+                        "last_commit": path_info.last_commit,
+                        "security": path_info.security,
+                    }
+                else:
+                    cache_path_info = {
+                        "name": root_path + "/" + path_info.path,
+                        "size": 0,
+                        "type": "directory",
+                        "tree_id": path_info.tree_id,
+                        "last_commit": path_info.last_commit,
+                    }
+                parent_path = self._parent(cache_path_info["name"])
+                self.dircache.setdefault(parent_path, []).append(cache_path_info)
+                out.append(cache_path_info)
+        return out
+    def walk(self, path: str, *args, **kwargs) -> Iterator[Tuple[str, List[str], List[str]]]:
+        """
+        Return all files below the given path.
+        For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.walk).
+        Args:
+            path (`str`):
+                Root path to list files from.
+        Returns:
+            `Iterator[Tuple[str, List[str], List[str]]]`: An iterator of (path, list of directory names, list of file names) tuples.
+        """
+        # Set expand_info=False by default to get a x10 speed boost
+        kwargs = {"expand_info": kwargs.get("detail", False), **kwargs}
+        path = self.resolve_path(path, revision=kwargs.get("revision")).unresolve()
+        yield from super().walk(path, *args, **kwargs)
+    def glob(self, path: str, **kwargs) -> List[str]:
+        """
+        Find files by glob-matching.
+        For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.glob).
+        Args:
+            path (`str`):
+                Path pattern to match.
+        Returns:
+            `List[str]`: List of paths matching the pattern.
+        """
+        # Set expand_info=False by default to get a x10 speed boost
+        kwargs = {"expand_info": kwargs.get("detail", False), **kwargs}
+        path = self.resolve_path(path, revision=kwargs.get("revision")).unresolve()
+        return super().glob(path, **kwargs)
+    def find(
+        self,
+        path: str,
+        maxdepth: Optional[int] = None,
+        withdirs: bool = False,
+        detail: bool = False,
+        refresh: bool = False,
+        revision: Optional[str] = None,
+        **kwargs,
+    ) -> Union[List[str], Dict[str, Dict[str, Any]]]:
+        """
+        List all files below path.
+        For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.find).
+        Args:
+            path (`str`):
+                Root path to list files from.
+            maxdepth (`int`, *optional*):
+                Maximum depth to descend into subdirectories.
+            withdirs (`bool`, *optional*):
+                Include directory paths in the output. Defaults to False.
+            detail (`bool`, *optional*):
+                If True, returns a dict mapping paths to file information. Defaults to False.
+            refresh (`bool`, *optional*):
+                If True, bypass the cache and fetch the latest data. Defaults to False.
+            revision (`str`, *optional*):
+                The git revision to list from.
+        Returns:
+            `Union[List[str], Dict[str, Dict[str, Any]]]`: List of paths or dict of file information.
+        """
+        if maxdepth:
+            return super().find(
+                path, maxdepth=maxdepth, withdirs=withdirs, detail=detail, refresh=refresh, revision=revision, **kwargs
+            )
+        resolved_path = self.resolve_path(path, revision=revision)
+        path = resolved_path.unresolve()
+        kwargs = {"expand_info": detail, **kwargs}
+        try:
+            out = self._ls_tree(path, recursive=True, refresh=refresh, revision=resolved_path.revision, **kwargs)
+        except EntryNotFoundError:
+            # Path could be a file
+            if self.info(path, revision=revision, **kwargs)["type"] == "file":
+                out = {path: {}}
+            else:
+                out = {}
+        else:
+            if not withdirs:
+                out = [o for o in out if o["type"] != "directory"]
+            else:
+                # If `withdirs=True`, include the directory itself to be consistent with the spec
+                path_info = self.info(path, revision=resolved_path.revision, **kwargs)
+                out = [path_info] + out if path_info["type"] == "directory" else out
+            out = {o["name"]: o for o in out}
+        names = sorted(out)
+        if not detail:
+            return names
+        else:
+            return {name: out[name] for name in names}
+    def cp_file(self, path1: str, path2: str, revision: Optional[str] = None, **kwargs) -> None:
+        """
+        Copy a file within or between repositories.
+        <Tip warning={true}>
+            Note: When possible, use `HfApi.upload_file()` for better performance.
+        </Tip>
+        Args:
+            path1 (`str`):
+                Source path to copy from.
+            path2 (`str`):
+                Destination path to copy to.
+            revision (`str`, *optional*):
+                The git revision to copy from.
+        """
+        resolved_path1 = self.resolve_path(path1, revision=revision)
+        resolved_path2 = self.resolve_path(path2, revision=revision)
+        same_repo = (
+            resolved_path1.repo_type == resolved_path2.repo_type and resolved_path1.repo_id == resolved_path2.repo_id
+        )
+        if same_repo:
+            commit_message = f"Copy {path1} to {path2}"
+            self._api.create_commit(
+                repo_id=resolved_path1.repo_id,
+                repo_type=resolved_path1.repo_type,
+                revision=resolved_path2.revision,
+                commit_message=kwargs.get("commit_message", commit_message),
+                commit_description=kwargs.get("commit_description", ""),
+                operations=[
+                    CommitOperationCopy(
+                        src_path_in_repo=resolved_path1.path_in_repo,
+                        path_in_repo=resolved_path2.path_in_repo,
+                        src_revision=resolved_path1.revision,
+                    )
+                ],
+            )
+        else:
+            with self.open(path1, "rb", revision=resolved_path1.revision) as f:
+                content = f.read()
+            commit_message = f"Copy {path1} to {path2}"
+            self._api.upload_file(
+                path_or_fileobj=content,
+                path_in_repo=resolved_path2.path_in_repo,
+                repo_id=resolved_path2.repo_id,
+                token=self.token,
+                repo_type=resolved_path2.repo_type,
+                revision=resolved_path2.revision,
+                commit_message=kwargs.get("commit_message", commit_message),
+                commit_description=kwargs.get("commit_description"),
+            )
+        self.invalidate_cache(path=resolved_path1.unresolve())
+        self.invalidate_cache(path=resolved_path2.unresolve())
+    def modified(self, path: str, **kwargs) -> datetime:
+        """
+        Get the last modified time of a file.
+        For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.modified).
+        Args:
+            path (`str`):
+                Path to the file.
+        Returns:
+            `datetime`: Last commit date of the file.
+        """
+        info = self.info(path, **kwargs)
+        return info["last_commit"]["date"]
+    def info(self, path: str, refresh: bool = False, revision: Optional[str] = None, **kwargs) -> Dict[str, Any]:
+        """
+        Get information about a file or directory.
+        For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.info).
+        <Tip warning={true}>
+            Note: When possible, use `HfApi.get_paths_info()` or `HfApi.repo_info()`  for better performance.
+        </Tip>
+        Args:
+            path (`str`):
+                Path to get info for.
+            refresh (`bool`, *optional*):
+                If True, bypass the cache and fetch the latest data. Defaults to False.
+            revision (`str`, *optional*):
+                The git revision to get info from.
+        Returns:
+            `Dict[str, Any]`: Dictionary containing file information (type, size, commit info, etc.).
+        """
+        resolved_path = self.resolve_path(path, revision=revision)
+        path = resolved_path.unresolve()
+        expand_info = kwargs.get(
+            "expand_info", True
+        )  # don't expose it as a parameter in the public API to follow the spec
+        if not resolved_path.path_in_repo:
+            # Path is the root directory
+            out = {
+                "name": path,
+                "size": 0,
+                "type": "directory",
+            }
+            if expand_info:
+                last_commit = self._api.list_repo_commits(
+                    resolved_path.repo_id, repo_type=resolved_path.repo_type, revision=resolved_path.revision
+                )[-1]
+                out = {
+                    **out,
+                    "tree_id": None,  # TODO: tree_id of the root directory?
+                    "last_commit": LastCommitInfo(
+                        oid=last_commit.commit_id, title=last_commit.title, date=last_commit.created_at
+                    ),
+                }
+        else:
+            out = None
+            parent_path = self._parent(path)
+            if not expand_info and parent_path not in self.dircache:
+                # Fill the cache with cheap call
+                self.ls(parent_path, expand_info=False)
+            if parent_path in self.dircache:
+                # Check if the path is in the cache
+                out1 = [o for o in self.dircache[parent_path] if o["name"] == path]
+                if not out1:
+                    _raise_file_not_found(path, None)
+                out = out1[0]
+            if refresh or out is None or (expand_info and out and out["last_commit"] is None):
+                paths_info = self._api.get_paths_info(
+                    resolved_path.repo_id,
+                    resolved_path.path_in_repo,
+                    expand=expand_info,
+                    revision=resolved_path.revision,
+                    repo_type=resolved_path.repo_type,
+                )
+                if not paths_info:
+                    _raise_file_not_found(path, None)
+                path_info = paths_info[0]
+                root_path = HfFileSystemResolvedPath(
+                    resolved_path.repo_type,
+                    resolved_path.repo_id,
+                    resolved_path.revision,
+                    path_in_repo="",
+                    _raw_revision=resolved_path._raw_revision,
+                ).unresolve()
+                if isinstance(path_info, RepoFile):
+                    out = {
+                        "name": root_path + "/" + path_info.path,
+                        "size": path_info.size,
+                        "type": "file",
+                        "blob_id": path_info.blob_id,
+                        "lfs": path_info.lfs,
+                        "last_commit": path_info.last_commit,
+                        "security": path_info.security,
+                    }
+                else:
+                    out = {
+                        "name": root_path + "/" + path_info.path,
+                        "size": 0,
+                        "type": "directory",
+                        "tree_id": path_info.tree_id,
+                        "last_commit": path_info.last_commit,
+                    }
+                if not expand_info:
+                    out = {k: out[k] for k in ["name", "size", "type"]}
+        assert out is not None
+        return out
+    def exists(self, path, **kwargs):
+        """
+        Check if a file exists.
+        For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.exists).
+        <Tip warning={true}>
+            Note: When possible, use `HfApi.file_exists()` for better performance.
+        </Tip>
+        Args:
+            path (`str`):
+                Path to check.
+        Returns:
+            `bool`: True if file exists, False otherwise.
+        """
+        try:
+            if kwargs.get("refresh", False):
+                self.invalidate_cache(path)
+            self.info(path, **{**kwargs, "expand_info": False})
+            return True
+        except:  # noqa: E722
+            return False
+    def isdir(self, path):
+        """
+        Check if a path is a directory.
+        For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.isdir).
+        Args:
+            path (`str`):
+                Path to check.
+        Returns:
+            `bool`: True if path is a directory, False otherwise.
+        """
+        try:
+            return self.info(path, expand_info=False)["type"] == "directory"
+        except OSError:
+            return False
+    def isfile(self, path):
+        """
+        Check if a path is a file.
+        For more details, refer to [fsspec documentation](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.isfile).
+        Args:
+            path (`str`):
+                Path to check.
+        Returns:
+            `bool`: True if path is a file, False otherwise.
+        """
+        try:
+            return self.info(path, expand_info=False)["type"] == "file"
+        except:  # noqa: E722
+            return False
+    def url(self, path: str) -> str:
+        """
+        Get the HTTP URL of the given path.
+        Args:
+            path (`str`):
+                Path to get URL for.
+        Returns:
+            `str`: HTTP URL to access the file or directory on the Hub.
+        """
+        resolved_path = self.resolve_path(path)
+        url = hf_hub_url(
+            resolved_path.repo_id,
+            resolved_path.path_in_repo,
+            repo_type=resolved_path.repo_type,
+            revision=resolved_path.revision,
+            endpoint=self.endpoint,
+        )
+        if self.isdir(path):
+            url = url.replace("/resolve/", "/tree/", 1)
+        return url
+    def get_file(self, rpath, lpath, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs) -> None:
+        """
+        Copy single remote file to local.
+        <Tip warning={true}>
+            Note: When possible, use `HfApi.hf_hub_download()` for better performance.
+        </Tip>
+        Args:
+            rpath (`str`):
+                Remote path to download from.
+            lpath (`str`):
+                Local path to download to.
+            callback (`Callback`, *optional*):
+                Optional callback to track download progress. Defaults to no callback.
+            outfile (`IO`, *optional*):
+                Optional file-like object to write to. If provided, `lpath` is ignored.
+        """
+        revision = kwargs.get("revision")
+        unhandled_kwargs = set(kwargs.keys()) - {"revision"}
+        if not isinstance(callback, (NoOpCallback, TqdmCallback)) or len(unhandled_kwargs) > 0:
+            # for now, let's not handle custom callbacks
+            # and let's not handle custom kwargs
+            return super().get_file(rpath, lpath, callback=callback, outfile=outfile, **kwargs)
+        # Taken from https://github.com/fsspec/filesystem_spec/blob/47b445ae4c284a82dd15e0287b1ffc410e8fc470/fsspec/spec.py#L883
+        if isfilelike(lpath):
+            outfile = lpath
+        elif self.isdir(rpath):
+            os.makedirs(lpath, exist_ok=True)
+            return None
+        if isinstance(lpath, (str, Path)):  # otherwise, let's assume it's a file-like object
+            os.makedirs(os.path.dirname(lpath), exist_ok=True)
+        # Open file if not already open
+        close_file = False
+        if outfile is None:
+            outfile = open(lpath, "wb")
+            close_file = True
+        initial_pos = outfile.tell()
+        # Custom implementation of `get_file` to use `http_get`.
+        resolve_remote_path = self.resolve_path(rpath, revision=revision)
+        expected_size = self.info(rpath, revision=revision)["size"]
+        callback.set_size(expected_size)
+        try:
+            http_get(
+                url=hf_hub_url(
+                    repo_id=resolve_remote_path.repo_id,
+                    revision=resolve_remote_path.revision,
+                    filename=resolve_remote_path.path_in_repo,
+                    repo_type=resolve_remote_path.repo_type,
+                    endpoint=self.endpoint,
+                ),
+                temp_file=outfile,
+                displayed_filename=rpath,
+                expected_size=expected_size,
+                resume_size=0,
+                headers=self._api._build_hf_headers(),
+                _tqdm_bar=callback.tqdm if isinstance(callback, TqdmCallback) else None,
+            )
+            outfile.seek(initial_pos)
+        finally:
+            # Close file only if we opened it ourselves
+            if close_file:
+                outfile.close()
+    @property
+    def transaction(self):
+        """A context within which files are committed together upon exit
+        Requires the file class to implement `.commit()` and `.discard()`
+        for the normal and exception cases.
+        """
+        # Taken from https://github.com/fsspec/filesystem_spec/blob/3fbb6fee33b46cccb015607630843dea049d3243/fsspec/spec.py#L231
+        # See https://github.com/huggingface/huggingface_hub/issues/1733
+        raise NotImplementedError("Transactional commits are not supported.")
+    def start_transaction(self):
+        """Begin write transaction for deferring files, non-context version"""
+        # Taken from https://github.com/fsspec/filesystem_spec/blob/3fbb6fee33b46cccb015607630843dea049d3243/fsspec/spec.py#L241
+        # See https://github.com/huggingface/huggingface_hub/issues/1733
+        raise NotImplementedError("Transactional commits are not supported.")
+class HfFileSystemFile(fsspec.spec.AbstractBufferedFile):
+    def __init__(self, fs: HfFileSystem, path: str, revision: Optional[str] = None, **kwargs):
+        try:
+            self.resolved_path = fs.resolve_path(path, revision=revision)
+        except FileNotFoundError as e:
+            if "w" in kwargs.get("mode", ""):
+                raise FileNotFoundError(
+                    f"{e}.\nMake sure the repository and revision exist before writing data."
+                ) from e
+            raise
+        # avoid an unnecessary .info() call with expensive expand_info=True to instantiate .details
+        if kwargs.get("mode", "rb") == "rb":
+            self.details = fs.info(self.resolved_path.unresolve(), expand_info=False)
+        super().__init__(fs, self.resolved_path.unresolve(), **kwargs)
+        self.fs: HfFileSystem
+    def __del__(self):
+        if not hasattr(self, "resolved_path"):
+            # Means that the constructor failed. Nothing to do.
+            return
+        return super().__del__()
+    def _fetch_range(self, start: int, end: int) -> bytes:
+        headers = {
+            "range": f"bytes={start}-{end - 1}",
+            **self.fs._api._build_hf_headers(),
+        }
+        url = hf_hub_url(
+            repo_id=self.resolved_path.repo_id,
+            revision=self.resolved_path.revision,
+            filename=self.resolved_path.path_in_repo,
+            repo_type=self.resolved_path.repo_type,
+            endpoint=self.fs.endpoint,
+        )
+        r = http_backoff(
+            "GET",
+            url,
+            headers=headers,
+            retry_on_status_codes=(500, 502, 503, 504),
+            timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
+        )
+        hf_raise_for_status(r)
+        return r.content
+    def _initiate_upload(self) -> None:
+        self.temp_file = tempfile.NamedTemporaryFile(prefix="hffs-", delete=False)
+    def _upload_chunk(self, final: bool = False) -> None:
+        self.buffer.seek(0)
+        block = self.buffer.read()
+        self.temp_file.write(block)
+        if final:
+            self.temp_file.close()
+            self.fs._api.upload_file(
+                path_or_fileobj=self.temp_file.name,
+                path_in_repo=self.resolved_path.path_in_repo,
+                repo_id=self.resolved_path.repo_id,
+                token=self.fs.token,
+                repo_type=self.resolved_path.repo_type,
+                revision=self.resolved_path.revision,
+                commit_message=self.kwargs.get("commit_message"),
+                commit_description=self.kwargs.get("commit_description"),
+            )
+            os.remove(self.temp_file.name)
+            self.fs.invalidate_cache(
+                path=self.resolved_path.unresolve(),
+            )
+    def read(self, length=-1):
+        """Read remote file.
+        If `length` is not provided or is -1, the entire file is downloaded and read. On POSIX systems and if
+        `hf_transfer` is not enabled, the file is loaded in memory directly. Otherwise, the file is downloaded to a
+        temporary file and read from there.
+        """
+        if self.mode == "rb" and (length is None or length == -1) and self.loc == 0:
+            with self.fs.open(self.path, "rb", block_size=0) as f:  # block_size=0 enables fast streaming
+                return f.read()
+        return super().read(length)
+    def url(self) -> str:
+        return self.fs.url(self.path)
+class HfFileSystemStreamFile(fsspec.spec.AbstractBufferedFile):
+    def __init__(
+        self,
+        fs: HfFileSystem,
+        path: str,
+        mode: str = "rb",
+        revision: Optional[str] = None,
+        block_size: int = 0,
+        cache_type: str = "none",
+        **kwargs,
+    ):
+        if block_size != 0:
+            raise ValueError(f"HfFileSystemStreamFile only supports block_size=0 but got {block_size}")
+        if cache_type != "none":
+            raise ValueError(f"HfFileSystemStreamFile only supports cache_type='none' but got {cache_type}")
+        if "w" in mode:
+            raise ValueError(f"HfFileSystemStreamFile only supports reading but got mode='{mode}'")
+        try:
+            self.resolved_path = fs.resolve_path(path, revision=revision)
+        except FileNotFoundError as e:
+            if "w" in kwargs.get("mode", ""):
+                raise FileNotFoundError(
+                    f"{e}.\nMake sure the repository and revision exist before writing data."
+                ) from e
+        # avoid an unnecessary .info() call to instantiate .details
+        self.details = {"name": self.resolved_path.unresolve(), "size": None}
+        super().__init__(
+            fs, self.resolved_path.unresolve(), mode=mode, block_size=block_size, cache_type=cache_type, **kwargs
+        )
+        self.response: Optional[Response] = None
+        self.fs: HfFileSystem
+    def seek(self, loc: int, whence: int = 0):
+        if loc == 0 and whence == 1:
+            return
+        if loc == self.loc and whence == 0:
+            return
+        raise ValueError("Cannot seek streaming HF file")
+    def read(self, length: int = -1):
+        read_args = (length,) if length >= 0 else ()
+        if self.response is None or self.response.raw.isclosed():
+            url = hf_hub_url(
+                repo_id=self.resolved_path.repo_id,
+                revision=self.resolved_path.revision,
+                filename=self.resolved_path.path_in_repo,
+                repo_type=self.resolved_path.repo_type,
+                endpoint=self.fs.endpoint,
+            )
+            self.response = http_backoff(
+                "GET",
+                url,
+                headers=self.fs._api._build_hf_headers(),
+                retry_on_status_codes=(500, 502, 503, 504),
+                stream=True,
+                timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
+            )
+            hf_raise_for_status(self.response)
+        try:
+            out = self.response.raw.read(*read_args)
+        except Exception:
+            self.response.close()
+            # Retry by recreating the connection
+            url = hf_hub_url(
+                repo_id=self.resolved_path.repo_id,
+                revision=self.resolved_path.revision,
+                filename=self.resolved_path.path_in_repo,
+                repo_type=self.resolved_path.repo_type,
+                endpoint=self.fs.endpoint,
+            )
+            self.response = http_backoff(
+                "GET",
+                url,
+                headers={"Range": "bytes=%d-" % self.loc, **self.fs._api._build_hf_headers()},
+                retry_on_status_codes=(500, 502, 503, 504),
+                stream=True,
+                timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
+            )
+            hf_raise_for_status(self.response)
+            try:
+                out = self.response.raw.read(*read_args)
+            except Exception:
+                self.response.close()
+                raise
+        self.loc += len(out)
+        return out
+    def url(self) -> str:
+        return self.fs.url(self.path)
+    def __del__(self):
+        if not hasattr(self, "resolved_path"):
+            # Means that the constructor failed. Nothing to do.
+            return
+        return super().__del__()
+    def __reduce__(self):
+        return reopen, (self.fs, self.path, self.mode, self.blocksize, self.cache.name)
+def safe_revision(revision: str) -> str:
+    return revision if SPECIAL_REFS_REVISION_REGEX.match(revision) else safe_quote(revision)
+def safe_quote(s: str) -> str:
+    return quote(s, safe="")
+def _raise_file_not_found(path: str, err: Optional[Exception]) -> NoReturn:
+    msg = path
+    if isinstance(err, RepositoryNotFoundError):
+        msg = f"{path} (repository not found)"
+    elif isinstance(err, RevisionNotFoundError):
+        msg = f"{path} (revision not found)"
+    elif isinstance(err, HFValidationError):
+        msg = f"{path} (invalid repository id)"
+    raise FileNotFoundError(msg) from err
+def reopen(fs: HfFileSystem, path: str, mode: str, block_size: int, cache_type: str):
+    return fs.open(path, mode=mode, block_size=block_size, cache_type=cache_type)

.venv/lib/python3.11/site-packages/huggingface_hub/hub_mixin.py ADDED Viewed

	@@ -0,0 +1,836 @@

+import inspect
+import json
+import os
+from dataclasses import Field, asdict, dataclass, is_dataclass
+from pathlib import Path
+from typing import Any, Callable, ClassVar, Dict, List, Optional, Protocol, Tuple, Type, TypeVar, Union
+import packaging.version
+from . import constants
+from .errors import EntryNotFoundError, HfHubHTTPError
+from .file_download import hf_hub_download
+from .hf_api import HfApi
+from .repocard import ModelCard, ModelCardData
+from .utils import (
+    SoftTemporaryDirectory,
+    is_jsonable,
+    is_safetensors_available,
+    is_simple_optional_type,
+    is_torch_available,
+    logging,
+    unwrap_simple_optional_type,
+    validate_hf_hub_args,
+)
+if is_torch_available():
+    import torch  # type: ignore
+if is_safetensors_available():
+    import safetensors
+    from safetensors.torch import load_model as load_model_as_safetensor
+    from safetensors.torch import save_model as save_model_as_safetensor
+logger = logging.get_logger(__name__)
+# Type alias for dataclass instances, copied from https://github.com/python/typeshed/blob/9f28171658b9ca6c32a7cb93fbb99fc92b17858b/stdlib/_typeshed/__init__.pyi#L349
+class DataclassInstance(Protocol):
+    __dataclass_fields__: ClassVar[Dict[str, Field]]
+# Generic variable that is either ModelHubMixin or a subclass thereof
+T = TypeVar("T", bound="ModelHubMixin")
+# Generic variable to represent an args type
+ARGS_T = TypeVar("ARGS_T")
+ENCODER_T = Callable[[ARGS_T], Any]
+DECODER_T = Callable[[Any], ARGS_T]
+CODER_T = Tuple[ENCODER_T, DECODER_T]
+DEFAULT_MODEL_CARD = """
+---
+# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
+# Doc / guide: https://huggingface.co/docs/hub/model-cards
+{{ card_data }}
+---
+This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
+- Library: {{ repo_url | default("[More Information Needed]", true) }}
+- Docs: {{ docs_url | default("[More Information Needed]", true) }}
+"""
+@dataclass
+class MixinInfo:
+    model_card_template: str
+    model_card_data: ModelCardData
+    repo_url: Optional[str] = None
+    docs_url: Optional[str] = None
+class ModelHubMixin:
+    """
+    A generic mixin to integrate ANY machine learning framework with the Hub.
+    To integrate your framework, your model class must inherit from this class. Custom logic for saving/loading models
+    have to be overwritten in  [`_from_pretrained`] and [`_save_pretrained`]. [`PyTorchModelHubMixin`] is a good example
+    of mixin integration with the Hub. Check out our [integration guide](../guides/integrations) for more instructions.
+    When inheriting from [`ModelHubMixin`], you can define class-level attributes. These attributes are not passed to
+    `__init__` but to the class definition itself. This is useful to define metadata about the library integrating
+    [`ModelHubMixin`].
+    For more details on how to integrate the mixin with your library, checkout the [integration guide](../guides/integrations).
+    Args:
+        repo_url (`str`, *optional*):
+            URL of the library repository. Used to generate model card.
+        docs_url (`str`, *optional*):
+            URL of the library documentation. Used to generate model card.
+        model_card_template (`str`, *optional*):
+            Template of the model card. Used to generate model card. Defaults to a generic template.
+        language (`str` or `List[str]`, *optional*):
+            Language supported by the library. Used to generate model card.
+        library_name (`str`, *optional*):
+            Name of the library integrating ModelHubMixin. Used to generate model card.
+        license (`str`, *optional*):
+            License of the library integrating ModelHubMixin. Used to generate model card.
+            E.g: "apache-2.0"
+        license_name (`str`, *optional*):
+            Name of the library integrating ModelHubMixin. Used to generate model card.
+            Only used if `license` is set to `other`.
+            E.g: "coqui-public-model-license".
+        license_link (`str`, *optional*):
+            URL to the license of the library integrating ModelHubMixin. Used to generate model card.
+            Only used if `license` is set to `other` and `license_name` is set.
+            E.g: "https://coqui.ai/cpml".
+        pipeline_tag (`str`, *optional*):
+            Tag of the pipeline. Used to generate model card. E.g. "text-classification".
+        tags (`List[str]`, *optional*):
+            Tags to be added to the model card. Used to generate model card. E.g. ["x-custom-tag", "arxiv:2304.12244"]
+        coders (`Dict[Type, Tuple[Callable, Callable]]`, *optional*):
+            Dictionary of custom types and their encoders/decoders. Used to encode/decode arguments that are not
+            jsonable by default. E.g dataclasses, argparse.Namespace, OmegaConf, etc.
+    Example:
+    ```python
+    >>> from huggingface_hub import ModelHubMixin
+    # Inherit from ModelHubMixin
+    >>> class MyCustomModel(
+    ...         ModelHubMixin,
+    ...         library_name="my-library",
+    ...         tags=["x-custom-tag", "arxiv:2304.12244"],
+    ...         repo_url="https://github.com/huggingface/my-cool-library",
+    ...         docs_url="https://huggingface.co/docs/my-cool-library",
+    ...         # ^ optional metadata to generate model card
+    ...     ):
+    ...     def __init__(self, size: int = 512, device: str = "cpu"):
+    ...         # define how to initialize your model
+    ...         super().__init__()
+    ...         ...
+    ...
+    ...     def _save_pretrained(self, save_directory: Path) -> None:
+    ...         # define how to serialize your model
+    ...         ...
+    ...
+    ...     @classmethod
+    ...     def from_pretrained(
+    ...         cls: Type[T],
+    ...         pretrained_model_name_or_path: Union[str, Path],
+    ...         *,
+    ...         force_download: bool = False,
+    ...         resume_download: Optional[bool] = None,
+    ...         proxies: Optional[Dict] = None,
+    ...         token: Optional[Union[str, bool]] = None,
+    ...         cache_dir: Optional[Union[str, Path]] = None,
+    ...         local_files_only: bool = False,
+    ...         revision: Optional[str] = None,
+    ...         **model_kwargs,
+    ...     ) -> T:
+    ...         # define how to deserialize your model
+    ...         ...
+    >>> model = MyCustomModel(size=256, device="gpu")
+    # Save model weights to local directory
+    >>> model.save_pretrained("my-awesome-model")
+    # Push model weights to the Hub
+    >>> model.push_to_hub("my-awesome-model")
+    # Download and initialize weights from the Hub
+    >>> reloaded_model = MyCustomModel.from_pretrained("username/my-awesome-model")
+    >>> reloaded_model.size
+    256
+    # Model card has been correctly populated
+    >>> from huggingface_hub import ModelCard
+    >>> card = ModelCard.load("username/my-awesome-model")
+    >>> card.data.tags
+    ["x-custom-tag", "pytorch_model_hub_mixin", "model_hub_mixin"]
+    >>> card.data.library_name
+    "my-library"
+    ```
+    """
+    _hub_mixin_config: Optional[Union[dict, DataclassInstance]] = None
+    # ^ optional config attribute automatically set in `from_pretrained`
+    _hub_mixin_info: MixinInfo
+    # ^ information about the library integrating ModelHubMixin (used to generate model card)
+    _hub_mixin_inject_config: bool  # whether `_from_pretrained` expects `config` or not
+    _hub_mixin_init_parameters: Dict[str, inspect.Parameter]  # __init__ parameters
+    _hub_mixin_jsonable_default_values: Dict[str, Any]  # default values for __init__ parameters
+    _hub_mixin_jsonable_custom_types: Tuple[Type, ...]  # custom types that can be encoded/decoded
+    _hub_mixin_coders: Dict[Type, CODER_T]  # encoders/decoders for custom types
+    # ^ internal values to handle config
+    def __init_subclass__(
+        cls,
+        *,
+        # Generic info for model card
+        repo_url: Optional[str] = None,
+        docs_url: Optional[str] = None,
+        # Model card template
+        model_card_template: str = DEFAULT_MODEL_CARD,
+        # Model card metadata
+        language: Optional[List[str]] = None,
+        library_name: Optional[str] = None,
+        license: Optional[str] = None,
+        license_name: Optional[str] = None,
+        license_link: Optional[str] = None,
+        pipeline_tag: Optional[str] = None,
+        tags: Optional[List[str]] = None,
+        # How to encode/decode arguments with custom type into a JSON config?
+        coders: Optional[
+            Dict[Type, CODER_T]
+            # Key is a type.
+            # Value is a tuple (encoder, decoder).
+            # Example: {MyCustomType: (lambda x: x.value, lambda data: MyCustomType(data))}
+        ] = None,
+    ) -> None:
+        """Inspect __init__ signature only once when subclassing + handle modelcard."""
+        super().__init_subclass__()
+        # Will be reused when creating modelcard
+        tags = tags or []
+        tags.append("model_hub_mixin")
+        # Initialize MixinInfo if not existent
+        info = MixinInfo(model_card_template=model_card_template, model_card_data=ModelCardData())
+        # If parent class has a MixinInfo, inherit from it as a copy
+        if hasattr(cls, "_hub_mixin_info"):
+            # Inherit model card template from parent class if not explicitly set
+            if model_card_template == DEFAULT_MODEL_CARD:
+                info.model_card_template = cls._hub_mixin_info.model_card_template
+            # Inherit from parent model card data
+            info.model_card_data = ModelCardData(**cls._hub_mixin_info.model_card_data.to_dict())
+            # Inherit other info
+            info.docs_url = cls._hub_mixin_info.docs_url
+            info.repo_url = cls._hub_mixin_info.repo_url
+        cls._hub_mixin_info = info
+        # Update MixinInfo with metadata
+        if model_card_template is not None and model_card_template != DEFAULT_MODEL_CARD:
+            info.model_card_template = model_card_template
+        if repo_url is not None:
+            info.repo_url = repo_url
+        if docs_url is not None:
+            info.docs_url = docs_url
+        if language is not None:
+            info.model_card_data.language = language
+        if library_name is not None:
+            info.model_card_data.library_name = library_name
+        if license is not None:
+            info.model_card_data.license = license
+        if license_name is not None:
+            info.model_card_data.license_name = license_name
+        if license_link is not None:
+            info.model_card_data.license_link = license_link
+        if pipeline_tag is not None:
+            info.model_card_data.pipeline_tag = pipeline_tag
+        if tags is not None:
+            if info.model_card_data.tags is not None:
+                info.model_card_data.tags.extend(tags)
+            else:
+                info.model_card_data.tags = tags
+        info.model_card_data.tags = sorted(set(info.model_card_data.tags))
+        # Handle encoders/decoders for args
+        cls._hub_mixin_coders = coders or {}
+        cls._hub_mixin_jsonable_custom_types = tuple(cls._hub_mixin_coders.keys())
+        # Inspect __init__ signature to handle config
+        cls._hub_mixin_init_parameters = dict(inspect.signature(cls.__init__).parameters)
+        cls._hub_mixin_jsonable_default_values = {
+            param.name: cls._encode_arg(param.default)
+            for param in cls._hub_mixin_init_parameters.values()
+            if param.default is not inspect.Parameter.empty and cls._is_jsonable(param.default)
+        }
+        cls._hub_mixin_inject_config = "config" in inspect.signature(cls._from_pretrained).parameters
+    def __new__(cls: Type[T], *args, **kwargs) -> T:
+        """Create a new instance of the class and handle config.
+        3 cases:
+        - If `self._hub_mixin_config` is already set, do nothing.
+        - If `config` is passed as a dataclass, set it as `self._hub_mixin_config`.
+        - Otherwise, build `self._hub_mixin_config` from default values and passed values.
+        """
+        instance = super().__new__(cls)
+        # If `config` is already set, return early
+        if instance._hub_mixin_config is not None:
+            return instance
+        # Infer passed values
+        passed_values = {
+            **{
+                key: value
+                for key, value in zip(
+                    # [1:] to skip `self` parameter
+                    list(cls._hub_mixin_init_parameters)[1:],
+                    args,
+                )
+            },
+            **kwargs,
+        }
+        # If config passed as dataclass => set it and return early
+        if is_dataclass(passed_values.get("config")):
+            instance._hub_mixin_config = passed_values["config"]
+            return instance
+        # Otherwise, build config from default + passed values
+        init_config = {
+            # default values
+            **cls._hub_mixin_jsonable_default_values,
+            # passed values
+            **{
+                key: cls._encode_arg(value)  # Encode custom types as jsonable value
+                for key, value in passed_values.items()
+                if instance._is_jsonable(value)  # Only if jsonable or we have a custom encoder
+            },
+        }
+        passed_config = init_config.pop("config", {})
+        # Populate `init_config` with provided config
+        if isinstance(passed_config, dict):
+            init_config.update(passed_config)
+        # Set `config` attribute and return
+        if init_config != {}:
+            instance._hub_mixin_config = init_config
+        return instance
+    @classmethod
+    def _is_jsonable(cls, value: Any) -> bool:
+        """Check if a value is JSON serializable."""
+        if isinstance(value, cls._hub_mixin_jsonable_custom_types):
+            return True
+        return is_jsonable(value)
+    @classmethod
+    def _encode_arg(cls, arg: Any) -> Any:
+        """Encode an argument into a JSON serializable format."""
+        for type_, (encoder, _) in cls._hub_mixin_coders.items():
+            if isinstance(arg, type_):
+                if arg is None:
+                    return None
+                return encoder(arg)
+        return arg
+    @classmethod
+    def _decode_arg(cls, expected_type: Type[ARGS_T], value: Any) -> Optional[ARGS_T]:
+        """Decode a JSON serializable value into an argument."""
+        if is_simple_optional_type(expected_type):
+            if value is None:
+                return None
+            expected_type = unwrap_simple_optional_type(expected_type)
+        # Dataclass => handle it
+        if is_dataclass(expected_type):
+            return _load_dataclass(expected_type, value)  # type: ignore[return-value]
+        # Otherwise => check custom decoders
+        for type_, (_, decoder) in cls._hub_mixin_coders.items():
+            if inspect.isclass(expected_type) and issubclass(expected_type, type_):
+                return decoder(value)
+        # Otherwise => don't decode
+        return value
+    def save_pretrained(
+        self,
+        save_directory: Union[str, Path],
+        *,
+        config: Optional[Union[dict, DataclassInstance]] = None,
+        repo_id: Optional[str] = None,
+        push_to_hub: bool = False,
+        model_card_kwargs: Optional[Dict[str, Any]] = None,
+        **push_to_hub_kwargs,
+    ) -> Optional[str]:
+        """
+        Save weights in local directory.
+        Args:
+            save_directory (`str` or `Path`):
+                Path to directory in which the model weights and configuration will be saved.
+            config (`dict` or `DataclassInstance`, *optional*):
+                Model configuration specified as a key/value dictionary or a dataclass instance.
+            push_to_hub (`bool`, *optional*, defaults to `False`):
+                Whether or not to push your model to the Huggingface Hub after saving it.
+            repo_id (`str`, *optional*):
+                ID of your repository on the Hub. Used only if `push_to_hub=True`. Will default to the folder name if
+                not provided.
+            model_card_kwargs (`Dict[str, Any]`, *optional*):
+                Additional arguments passed to the model card template to customize the model card.
+            push_to_hub_kwargs:
+                Additional key word arguments passed along to the [`~ModelHubMixin.push_to_hub`] method.
+        Returns:
+            `str` or `None`: url of the commit on the Hub if `push_to_hub=True`, `None` otherwise.
+        """
+        save_directory = Path(save_directory)
+        save_directory.mkdir(parents=True, exist_ok=True)
+        # Remove config.json if already exists. After `_save_pretrained` we don't want to overwrite config.json
+        # as it might have been saved by the custom `_save_pretrained` already. However we do want to overwrite
+        # an existing config.json if it was not saved by `_save_pretrained`.
+        config_path = save_directory / constants.CONFIG_NAME
+        config_path.unlink(missing_ok=True)
+        # save model weights/files (framework-specific)
+        self._save_pretrained(save_directory)
+        # save config (if provided and if not serialized yet in `_save_pretrained`)
+        if config is None:
+            config = self._hub_mixin_config
+        if config is not None:
+            if is_dataclass(config):
+                config = asdict(config)  # type: ignore[arg-type]
+            if not config_path.exists():
+                config_str = json.dumps(config, sort_keys=True, indent=2)
+                config_path.write_text(config_str)
+        # save model card
+        model_card_path = save_directory / "README.md"
+        model_card_kwargs = model_card_kwargs if model_card_kwargs is not None else {}
+        if not model_card_path.exists():  # do not overwrite if already exists
+            self.generate_model_card(**model_card_kwargs).save(save_directory / "README.md")
+        # push to the Hub if required
+        if push_to_hub:
+            kwargs = push_to_hub_kwargs.copy()  # soft-copy to avoid mutating input
+            if config is not None:  # kwarg for `push_to_hub`
+                kwargs["config"] = config
+            if repo_id is None:
+                repo_id = save_directory.name  # Defaults to `save_directory` name
+            return self.push_to_hub(repo_id=repo_id, model_card_kwargs=model_card_kwargs, **kwargs)
+        return None
+    def _save_pretrained(self, save_directory: Path) -> None:
+        """
+        Overwrite this method in subclass to define how to save your model.
+        Check out our [integration guide](../guides/integrations) for instructions.
+        Args:
+            save_directory (`str` or `Path`):
+                Path to directory in which the model weights and configuration will be saved.
+        """
+        raise NotImplementedError
+    @classmethod
+    @validate_hf_hub_args
+    def from_pretrained(
+        cls: Type[T],
+        pretrained_model_name_or_path: Union[str, Path],
+        *,
+        force_download: bool = False,
+        resume_download: Optional[bool] = None,
+        proxies: Optional[Dict] = None,
+        token: Optional[Union[str, bool]] = None,
+        cache_dir: Optional[Union[str, Path]] = None,
+        local_files_only: bool = False,
+        revision: Optional[str] = None,
+        **model_kwargs,
+    ) -> T:
+        """
+        Download a model from the Huggingface Hub and instantiate it.
+        Args:
+            pretrained_model_name_or_path (`str`, `Path`):
+                - Either the `model_id` (string) of a model hosted on the Hub, e.g. `bigscience/bloom`.
+                - Or a path to a `directory` containing model weights saved using
+                    [`~transformers.PreTrainedModel.save_pretrained`], e.g., `../path/to/my_model_directory/`.
+            revision (`str`, *optional*):
+                Revision of the model on the Hub. Can be a branch name, a git tag or any commit id.
+                Defaults to the latest commit on `main` branch.
+            force_download (`bool`, *optional*, defaults to `False`):
+                Whether to force (re-)downloading the model weights and configuration files from the Hub, overriding
+                the existing cache.
+            proxies (`Dict[str, str]`, *optional*):
+                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
+                'http://hostname': 'foo.bar:4012'}`. The proxies are used on every request.
+            token (`str` or `bool`, *optional*):
+                The token to use as HTTP bearer authorization for remote files. By default, it will use the token
+                cached when running `huggingface-cli login`.
+            cache_dir (`str`, `Path`, *optional*):
+                Path to the folder where cached files are stored.
+            local_files_only (`bool`, *optional*, defaults to `False`):
+                If `True`, avoid downloading the file and return the path to the local cached file if it exists.
+            model_kwargs (`Dict`, *optional*):
+                Additional kwargs to pass to the model during initialization.
+        """
+        model_id = str(pretrained_model_name_or_path)
+        config_file: Optional[str] = None
+        if os.path.isdir(model_id):
+            if constants.CONFIG_NAME in os.listdir(model_id):
+                config_file = os.path.join(model_id, constants.CONFIG_NAME)
+            else:
+                logger.warning(f"{constants.CONFIG_NAME} not found in {Path(model_id).resolve()}")
+        else:
+            try:
+                config_file = hf_hub_download(
+                    repo_id=model_id,
+                    filename=constants.CONFIG_NAME,
+                    revision=revision,
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    proxies=proxies,
+                    resume_download=resume_download,
+                    token=token,
+                    local_files_only=local_files_only,
+                )
+            except HfHubHTTPError as e:
+                logger.info(f"{constants.CONFIG_NAME} not found on the HuggingFace Hub: {str(e)}")
+        # Read config
+        config = None
+        if config_file is not None:
+            with open(config_file, "r", encoding="utf-8") as f:
+                config = json.load(f)
+            # Decode custom types in config
+            for key, value in config.items():
+                if key in cls._hub_mixin_init_parameters:
+                    expected_type = cls._hub_mixin_init_parameters[key].annotation
+                    if expected_type is not inspect.Parameter.empty:
+                        config[key] = cls._decode_arg(expected_type, value)
+            # Populate model_kwargs from config
+            for param in cls._hub_mixin_init_parameters.values():
+                if param.name not in model_kwargs and param.name in config:
+                    model_kwargs[param.name] = config[param.name]
+            # Check if `config` argument was passed at init
+            if "config" in cls._hub_mixin_init_parameters and "config" not in model_kwargs:
+                # Decode `config` argument if it was passed
+                config_annotation = cls._hub_mixin_init_parameters["config"].annotation
+                config = cls._decode_arg(config_annotation, config)
+                # Forward config to model initialization
+                model_kwargs["config"] = config
+            # Inject config if `**kwargs` are expected
+            if is_dataclass(cls):
+                for key in cls.__dataclass_fields__:
+                    if key not in model_kwargs and key in config:
+                        model_kwargs[key] = config[key]
+            elif any(param.kind == inspect.Parameter.VAR_KEYWORD for param in cls._hub_mixin_init_parameters.values()):
+                for key, value in config.items():
+                    if key not in model_kwargs:
+                        model_kwargs[key] = value
+            # Finally, also inject if `_from_pretrained` expects it
+            if cls._hub_mixin_inject_config and "config" not in model_kwargs:
+                model_kwargs["config"] = config
+        instance = cls._from_pretrained(
+            model_id=str(model_id),
+            revision=revision,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            proxies=proxies,
+            resume_download=resume_download,
+            local_files_only=local_files_only,
+            token=token,
+            **model_kwargs,
+        )
+        # Implicitly set the config as instance attribute if not already set by the class
+        # This way `config` will be available when calling `save_pretrained` or `push_to_hub`.
+        if config is not None and (getattr(instance, "_hub_mixin_config", None) in (None, {})):
+            instance._hub_mixin_config = config
+        return instance
+    @classmethod
+    def _from_pretrained(
+        cls: Type[T],
+        *,
+        model_id: str,
+        revision: Optional[str],
+        cache_dir: Optional[Union[str, Path]],
+        force_download: bool,
+        proxies: Optional[Dict],
+        resume_download: Optional[bool],
+        local_files_only: bool,
+        token: Optional[Union[str, bool]],
+        **model_kwargs,
+    ) -> T:
+        """Overwrite this method in subclass to define how to load your model from pretrained.
+        Use [`hf_hub_download`] or [`snapshot_download`] to download files from the Hub before loading them. Most
+        args taken as input can be directly passed to those 2 methods. If needed, you can add more arguments to this
+        method using "model_kwargs". For example [`PyTorchModelHubMixin._from_pretrained`] takes as input a `map_location`
+        parameter to set on which device the model should be loaded.
+        Check out our [integration guide](../guides/integrations) for more instructions.
+        Args:
+            model_id (`str`):
+                ID of the model to load from the Huggingface Hub (e.g. `bigscience/bloom`).
+            revision (`str`, *optional*):
+                Revision of the model on the Hub. Can be a branch name, a git tag or any commit id. Defaults to the
+                latest commit on `main` branch.
+            force_download (`bool`, *optional*, defaults to `False`):
+                Whether to force (re-)downloading the model weights and configuration files from the Hub, overriding
+                the existing cache.
+            proxies (`Dict[str, str]`, *optional*):
+                A dictionary of proxy servers to use by protocol or endpoint (e.g., `{'http': 'foo.bar:3128',
+                'http://hostname': 'foo.bar:4012'}`).
+            token (`str` or `bool`, *optional*):
+                The token to use as HTTP bearer authorization for remote files. By default, it will use the token
+                cached when running `huggingface-cli login`.
+            cache_dir (`str`, `Path`, *optional*):
+                Path to the folder where cached files are stored.
+            local_files_only (`bool`, *optional*, defaults to `False`):
+                If `True`, avoid downloading the file and return the path to the local cached file if it exists.
+            model_kwargs:
+                Additional keyword arguments passed along to the [`~ModelHubMixin._from_pretrained`] method.
+        """
+        raise NotImplementedError
+    @validate_hf_hub_args
+    def push_to_hub(
+        self,
+        repo_id: str,
+        *,
+        config: Optional[Union[dict, DataclassInstance]] = None,
+        commit_message: str = "Push model using huggingface_hub.",
+        private: Optional[bool] = None,
+        token: Optional[str] = None,
+        branch: Optional[str] = None,
+        create_pr: Optional[bool] = None,
+        allow_patterns: Optional[Union[List[str], str]] = None,
+        ignore_patterns: Optional[Union[List[str], str]] = None,
+        delete_patterns: Optional[Union[List[str], str]] = None,
+        model_card_kwargs: Optional[Dict[str, Any]] = None,
+    ) -> str:
+        """
+        Upload model checkpoint to the Hub.
+        Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use
+        `delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more
+        details.
+        Args:
+            repo_id (`str`):
+                ID of the repository to push to (example: `"username/my-model"`).
+            config (`dict` or `DataclassInstance`, *optional*):
+                Model configuration specified as a key/value dictionary or a dataclass instance.
+            commit_message (`str`, *optional*):
+                Message to commit while pushing.
+            private (`bool`, *optional*):
+                Whether the repository created should be private.
+                If `None` (default), the repo will be public unless the organization's default is private.
+            token (`str`, *optional*):
+                The token to use as HTTP bearer authorization for remote files. By default, it will use the token
+                cached when running `huggingface-cli login`.
+            branch (`str`, *optional*):
+                The git branch on which to push the model. This defaults to `"main"`.
+            create_pr (`boolean`, *optional*):
+                Whether or not to create a Pull Request from `branch` with that commit. Defaults to `False`.
+            allow_patterns (`List[str]` or `str`, *optional*):
+                If provided, only files matching at least one pattern are pushed.
+            ignore_patterns (`List[str]` or `str`, *optional*):
+                If provided, files matching any of the patterns are not pushed.
+            delete_patterns (`List[str]` or `str`, *optional*):
+                If provided, remote files matching any of the patterns will be deleted from the repo.
+            model_card_kwargs (`Dict[str, Any]`, *optional*):
+                Additional arguments passed to the model card template to customize the model card.
+        Returns:
+            The url of the commit of your model in the given repository.
+        """
+        api = HfApi(token=token)
+        repo_id = api.create_repo(repo_id=repo_id, private=private, exist_ok=True).repo_id
+        # Push the files to the repo in a single commit
+        with SoftTemporaryDirectory() as tmp:
+            saved_path = Path(tmp) / repo_id
+            self.save_pretrained(saved_path, config=config, model_card_kwargs=model_card_kwargs)
+            return api.upload_folder(
+                repo_id=repo_id,
+                repo_type="model",
+                folder_path=saved_path,
+                commit_message=commit_message,
+                revision=branch,
+                create_pr=create_pr,
+                allow_patterns=allow_patterns,
+                ignore_patterns=ignore_patterns,
+                delete_patterns=delete_patterns,
+            )
+    def generate_model_card(self, *args, **kwargs) -> ModelCard:
+        card = ModelCard.from_template(
+            card_data=self._hub_mixin_info.model_card_data,
+            template_str=self._hub_mixin_info.model_card_template,
+            repo_url=self._hub_mixin_info.repo_url,
+            docs_url=self._hub_mixin_info.docs_url,
+            **kwargs,
+        )
+        return card
+class PyTorchModelHubMixin(ModelHubMixin):
+    """
+    Implementation of [`ModelHubMixin`] to provide model Hub upload/download capabilities to PyTorch models. The model
+    is set in evaluation mode by default using `model.eval()` (dropout modules are deactivated). To train the model,
+    you should first set it back in training mode with `model.train()`.
+    See [`ModelHubMixin`] for more details on how to use the mixin.
+    Example:
+    ```python
+    >>> import torch
+    >>> import torch.nn as nn
+    >>> from huggingface_hub import PyTorchModelHubMixin
+    >>> class MyModel(
+    ...         nn.Module,
+    ...         PyTorchModelHubMixin,
+    ...         library_name="keras-nlp",
+    ...         repo_url="https://github.com/keras-team/keras-nlp",
+    ...         docs_url="https://keras.io/keras_nlp/",
+    ...         # ^ optional metadata to generate model card
+    ...     ):
+    ...     def __init__(self, hidden_size: int = 512, vocab_size: int = 30000, output_size: int = 4):
+    ...         super().__init__()
+    ...         self.param = nn.Parameter(torch.rand(hidden_size, vocab_size))
+    ...         self.linear = nn.Linear(output_size, vocab_size)
+    ...     def forward(self, x):
+    ...         return self.linear(x + self.param)
+    >>> model = MyModel(hidden_size=256)
+    # Save model weights to local directory
+    >>> model.save_pretrained("my-awesome-model")
+    # Push model weights to the Hub
+    >>> model.push_to_hub("my-awesome-model")
+    # Download and initialize weights from the Hub
+    >>> model = MyModel.from_pretrained("username/my-awesome-model")
+    >>> model.hidden_size
+    256
+    ```
+    """
+    def __init_subclass__(cls, *args, tags: Optional[List[str]] = None, **kwargs) -> None:
+        tags = tags or []
+        tags.append("pytorch_model_hub_mixin")
+        kwargs["tags"] = tags
+        return super().__init_subclass__(*args, **kwargs)
+    def _save_pretrained(self, save_directory: Path) -> None:
+        """Save weights from a Pytorch model to a local directory."""
+        model_to_save = self.module if hasattr(self, "module") else self  # type: ignore
+        save_model_as_safetensor(model_to_save, str(save_directory / constants.SAFETENSORS_SINGLE_FILE))
+    @classmethod
+    def _from_pretrained(
+        cls,
+        *,
+        model_id: str,
+        revision: Optional[str],
+        cache_dir: Optional[Union[str, Path]],
+        force_download: bool,
+        proxies: Optional[Dict],
+        resume_download: Optional[bool],
+        local_files_only: bool,
+        token: Union[str, bool, None],
+        map_location: str = "cpu",
+        strict: bool = False,
+        **model_kwargs,
+    ):
+        """Load Pytorch pretrained weights and return the loaded model."""
+        model = cls(**model_kwargs)
+        if os.path.isdir(model_id):
+            print("Loading weights from local directory")
+            model_file = os.path.join(model_id, constants.SAFETENSORS_SINGLE_FILE)
+            return cls._load_as_safetensor(model, model_file, map_location, strict)
+        else:
+            try:
+                model_file = hf_hub_download(
+                    repo_id=model_id,
+                    filename=constants.SAFETENSORS_SINGLE_FILE,
+                    revision=revision,
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    proxies=proxies,
+                    resume_download=resume_download,
+                    token=token,
+                    local_files_only=local_files_only,
+                )
+                return cls._load_as_safetensor(model, model_file, map_location, strict)
+            except EntryNotFoundError:
+                model_file = hf_hub_download(
+                    repo_id=model_id,
+                    filename=constants.PYTORCH_WEIGHTS_NAME,
+                    revision=revision,
+                    cache_dir=cache_dir,
+                    force_download=force_download,
+                    proxies=proxies,
+                    resume_download=resume_download,
+                    token=token,
+                    local_files_only=local_files_only,
+                )
+                return cls._load_as_pickle(model, model_file, map_location, strict)
+    @classmethod
+    def _load_as_pickle(cls, model: T, model_file: str, map_location: str, strict: bool) -> T:
+        state_dict = torch.load(model_file, map_location=torch.device(map_location), weights_only=True)
+        model.load_state_dict(state_dict, strict=strict)  # type: ignore
+        model.eval()  # type: ignore
+        return model
+    @classmethod
+    def _load_as_safetensor(cls, model: T, model_file: str, map_location: str, strict: bool) -> T:
+        if packaging.version.parse(safetensors.__version__) < packaging.version.parse("0.4.3"):  # type: ignore [attr-defined]
+            load_model_as_safetensor(model, model_file, strict=strict)  # type: ignore [arg-type]
+            if map_location != "cpu":
+                logger.warning(
+                    "Loading model weights on other devices than 'cpu' is not supported natively in your version of safetensors."
+                    " This means that the model is loaded on 'cpu' first and then copied to the device."
+                    " This leads to a slower loading time."
+                    " Please update safetensors to version 0.4.3 or above for improved performance."
+                )
+                model.to(map_location)  # type: ignore [attr-defined]
+        else:
+            safetensors.torch.load_model(model, model_file, strict=strict, device=map_location)  # type: ignore [arg-type]
+        return model
+def _load_dataclass(datacls: Type[DataclassInstance], data: dict) -> DataclassInstance:
+    """Load a dataclass instance from a dictionary.
+    Fields not expected by the dataclass are ignored.
+    """
+    return datacls(**{k: v for k, v in data.items() if k in datacls.__dataclass_fields__})

.venv/lib/python3.11/site-packages/huggingface_hub/inference/__init__.py ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/huggingface_hub/inference/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (198 Bytes). View file

.venv/lib/python3.11/site-packages/huggingface_hub/inference/__pycache__/_common.cpython-311.pyc ADDED Viewed

Binary file (19.2 kB). View file

.venv/lib/python3.11/site-packages/huggingface_hub/inference/_client.py ADDED Viewed

The diff for this file is too large to render. See raw diff

.venv/lib/python3.11/site-packages/huggingface_hub/inference/_common.py ADDED Viewed

	@@ -0,0 +1,446 @@

+# coding=utf-8
+# Copyright 2023-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains utilities used by both the sync and async inference clients."""
+import base64
+import io
+import json
+import logging
+from abc import ABC, abstractmethod
+from contextlib import contextmanager
+from dataclasses import dataclass
+from pathlib import Path
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    AsyncIterable,
+    BinaryIO,
+    ContextManager,
+    Dict,
+    Generator,
+    Iterable,
+    List,
+    Literal,
+    NoReturn,
+    Optional,
+    Union,
+    overload,
+)
+from requests import HTTPError
+from huggingface_hub.errors import (
+    GenerationError,
+    IncompleteGenerationError,
+    OverloadedError,
+    TextGenerationError,
+    UnknownError,
+    ValidationError,
+)
+from ..utils import (
+    get_session,
+    is_aiohttp_available,
+    is_numpy_available,
+    is_pillow_available,
+)
+from ._generated.types import ChatCompletionStreamOutput, TextGenerationStreamOutput
+if TYPE_CHECKING:
+    from aiohttp import ClientResponse, ClientSession
+    from PIL.Image import Image
+# TYPES
+UrlT = str
+PathT = Union[str, Path]
+BinaryT = Union[bytes, BinaryIO]
+ContentT = Union[BinaryT, PathT, UrlT]
+# Use to set a Accept: image/png header
+TASKS_EXPECTING_IMAGES = {"text-to-image", "image-to-image"}
+logger = logging.getLogger(__name__)
+@dataclass
+class RequestParameters:
+    url: str
+    task: str
+    model: Optional[str]
+    json: Optional[Union[str, Dict, List]]
+    data: Optional[ContentT]
+    headers: Dict[str, Any]
+class TaskProviderHelper(ABC):
+    """Protocol defining the interface for task-specific provider helpers."""
+    @abstractmethod
+    def prepare_request(
+        self,
+        *,
+        inputs: Any,
+        parameters: Dict[str, Any],
+        headers: Dict,
+        model: Optional[str],
+        api_key: Optional[str],
+        extra_payload: Optional[Dict[str, Any]] = None,
+    ) -> RequestParameters: ...
+    @abstractmethod
+    def get_response(self, response: Union[bytes, Dict]) -> Any: ...
+# Add dataclass for ModelStatus. We use this dataclass in get_model_status function.
+@dataclass
+class ModelStatus:
+    """
+    This Dataclass represents the model status in the Hugging Face Inference API.
+    Args:
+        loaded (`bool`):
+            If the model is currently loaded into Hugging Face's InferenceAPI. Models
+            are loaded on-demand, leading to the user's first request taking longer.
+            If a model is loaded, you can be assured that it is in a healthy state.
+        state (`str`):
+            The current state of the model. This can be 'Loaded', 'Loadable', 'TooBig'.
+            If a model's state is 'Loadable', it's not too big and has a supported
+            backend. Loadable models are automatically loaded when the user first
+            requests inference on the endpoint. This means it is transparent for the
+            user to load a model, except that the first call takes longer to complete.
+        compute_type (`Dict`):
+            Information about the compute resource the model is using or will use, such as 'gpu' type and number of
+            replicas.
+        framework (`str`):
+            The name of the framework that the model was built with, such as 'transformers'
+            or 'text-generation-inference'.
+    """
+    loaded: bool
+    state: str
+    compute_type: Dict
+    framework: str
+## IMPORT UTILS
+def _import_aiohttp():
+    # Make sure `aiohttp` is installed on the machine.
+    if not is_aiohttp_available():
+        raise ImportError("Please install aiohttp to use `AsyncInferenceClient` (`pip install aiohttp`).")
+    import aiohttp
+    return aiohttp
+def _import_numpy():
+    """Make sure `numpy` is installed on the machine."""
+    if not is_numpy_available():
+        raise ImportError("Please install numpy to use deal with embeddings (`pip install numpy`).")
+    import numpy
+    return numpy
+def _import_pil_image():
+    """Make sure `PIL` is installed on the machine."""
+    if not is_pillow_available():
+        raise ImportError(
+            "Please install Pillow to use deal with images (`pip install Pillow`). If you don't want the image to be"
+            " post-processed, use `client.post(...)` and get the raw response from the server."
+        )
+    from PIL import Image
+    return Image
+## ENCODING / DECODING UTILS
+@overload
+def _open_as_binary(
+    content: ContentT,
+) -> ContextManager[BinaryT]: ...  # means "if input is not None, output is not None"
+@overload
+def _open_as_binary(
+    content: Literal[None],
+) -> ContextManager[Literal[None]]: ...  # means "if input is None, output is None"
+@contextmanager  # type: ignore
+def _open_as_binary(content: Optional[ContentT]) -> Generator[Optional[BinaryT], None, None]:
+    """Open `content` as a binary file, either from a URL, a local path, or raw bytes.
+    Do nothing if `content` is None,
+    TODO: handle a PIL.Image as input
+    TODO: handle base64 as input
+    """
+    # If content is a string => must be either a URL or a path
+    if isinstance(content, str):
+        if content.startswith("https://") or content.startswith("http://"):
+            logger.debug(f"Downloading content from {content}")
+            yield get_session().get(content).content  # TODO: retrieve as stream and pipe to post request ?
+            return
+        content = Path(content)
+        if not content.exists():
+            raise FileNotFoundError(
+                f"File not found at {content}. If `data` is a string, it must either be a URL or a path to a local"
+                " file. To pass raw content, please encode it as bytes first."
+            )
+    # If content is a Path => open it
+    if isinstance(content, Path):
+        logger.debug(f"Opening content from {content}")
+        with content.open("rb") as f:
+            yield f
+    else:
+        # Otherwise: already a file-like object or None
+        yield content
+def _b64_encode(content: ContentT) -> str:
+    """Encode a raw file (image, audio) into base64. Can be bytes, an opened file, a path or a URL."""
+    with _open_as_binary(content) as data:
+        data_as_bytes = data if isinstance(data, bytes) else data.read()
+        return base64.b64encode(data_as_bytes).decode()
+def _b64_to_image(encoded_image: str) -> "Image":
+    """Parse a base64-encoded string into a PIL Image."""
+    Image = _import_pil_image()
+    return Image.open(io.BytesIO(base64.b64decode(encoded_image)))
+def _bytes_to_list(content: bytes) -> List:
+    """Parse bytes from a Response object into a Python list.
+    Expects the response body to be JSON-encoded data.
+    NOTE: This is exactly the same implementation as `_bytes_to_dict` and will not complain if the returned data is a
+    dictionary. The only advantage of having both is to help the user (and mypy) understand what kind of data to expect.
+    """
+    return json.loads(content.decode())
+def _bytes_to_dict(content: bytes) -> Dict:
+    """Parse bytes from a Response object into a Python dictionary.
+    Expects the response body to be JSON-encoded data.
+    NOTE: This is exactly the same implementation as `_bytes_to_list` and will not complain if the returned data is a
+    list. The only advantage of having both is to help the user (and mypy) understand what kind of data to expect.
+    """
+    return json.loads(content.decode())
+def _bytes_to_image(content: bytes) -> "Image":
+    """Parse bytes from a Response object into a PIL Image.
+    Expects the response body to be raw bytes. To deal with b64 encoded images, use `_b64_to_image` instead.
+    """
+    Image = _import_pil_image()
+    return Image.open(io.BytesIO(content))
+def _as_dict(response: Union[bytes, Dict]) -> Dict:
+    return json.loads(response) if isinstance(response, bytes) else response
+## PAYLOAD UTILS
+## STREAMING UTILS
+def _stream_text_generation_response(
+    bytes_output_as_lines: Iterable[bytes], details: bool
+) -> Union[Iterable[str], Iterable[TextGenerationStreamOutput]]:
+    """Used in `InferenceClient.text_generation`."""
+    # Parse ServerSentEvents
+    for byte_payload in bytes_output_as_lines:
+        try:
+            output = _format_text_generation_stream_output(byte_payload, details)
+        except StopIteration:
+            break
+        if output is not None:
+            yield output
+async def _async_stream_text_generation_response(
+    bytes_output_as_lines: AsyncIterable[bytes], details: bool
+) -> Union[AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]:
+    """Used in `AsyncInferenceClient.text_generation`."""
+    # Parse ServerSentEvents
+    async for byte_payload in bytes_output_as_lines:
+        try:
+            output = _format_text_generation_stream_output(byte_payload, details)
+        except StopIteration:
+            break
+        if output is not None:
+            yield output
+def _format_text_generation_stream_output(
+    byte_payload: bytes, details: bool
+) -> Optional[Union[str, TextGenerationStreamOutput]]:
+    if not byte_payload.startswith(b"data:"):
+        return None  # empty line
+    if byte_payload.strip() == b"data: [DONE]":
+        raise StopIteration("[DONE] signal received.")
+    # Decode payload
+    payload = byte_payload.decode("utf-8")
+    json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
+    # Either an error as being returned
+    if json_payload.get("error") is not None:
+        raise _parse_text_generation_error(json_payload["error"], json_payload.get("error_type"))
+    # Or parse token payload
+    output = TextGenerationStreamOutput.parse_obj_as_instance(json_payload)
+    return output.token.text if not details else output
+def _stream_chat_completion_response(
+    bytes_lines: Iterable[bytes],
+) -> Iterable[ChatCompletionStreamOutput]:
+    """Used in `InferenceClient.chat_completion` if model is served with TGI."""
+    for item in bytes_lines:
+        try:
+            output = _format_chat_completion_stream_output(item)
+        except StopIteration:
+            break
+        if output is not None:
+            yield output
+async def _async_stream_chat_completion_response(
+    bytes_lines: AsyncIterable[bytes],
+) -> AsyncIterable[ChatCompletionStreamOutput]:
+    """Used in `AsyncInferenceClient.chat_completion`."""
+    async for item in bytes_lines:
+        try:
+            output = _format_chat_completion_stream_output(item)
+        except StopIteration:
+            break
+        if output is not None:
+            yield output
+def _format_chat_completion_stream_output(
+    byte_payload: bytes,
+) -> Optional[ChatCompletionStreamOutput]:
+    if not byte_payload.startswith(b"data:"):
+        return None  # empty line
+    if byte_payload.strip() == b"data: [DONE]":
+        raise StopIteration("[DONE] signal received.")
+    # Decode payload
+    payload = byte_payload.decode("utf-8")
+    json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
+    # Either an error as being returned
+    if json_payload.get("error") is not None:
+        raise _parse_text_generation_error(json_payload["error"], json_payload.get("error_type"))
+    # Or parse token payload
+    return ChatCompletionStreamOutput.parse_obj_as_instance(json_payload)
+async def _async_yield_from(client: "ClientSession", response: "ClientResponse") -> AsyncIterable[bytes]:
+    async for byte_payload in response.content:
+        yield byte_payload.strip()
+    await client.close()
+# "TGI servers" are servers running with the `text-generation-inference` backend.
+# This backend is the go-to solution to run large language models at scale. However,
+# for some smaller models (e.g. "gpt2") the default `transformers` + `api-inference`
+# solution is still in use.
+#
+# Both approaches have very similar APIs, but not exactly the same. What we do first in
+# the `text_generation` method is to assume the model is served via TGI. If we realize
+# it's not the case (i.e. we receive an HTTP 400 Bad Request), we fallback to the
+# default API with a warning message. When that's the case, We remember the unsupported
+# attributes for this model in the `_UNSUPPORTED_TEXT_GENERATION_KWARGS` global variable.
+#
+# In addition, TGI servers have a built-in API route for chat-completion, which is not
+# available on the default API. We use this route to provide a more consistent behavior
+# when available.
+#
+# For more details, see https://github.com/huggingface/text-generation-inference and
+# https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task.
+_UNSUPPORTED_TEXT_GENERATION_KWARGS: Dict[Optional[str], List[str]] = {}
+def _set_unsupported_text_generation_kwargs(model: Optional[str], unsupported_kwargs: List[str]) -> None:
+    _UNSUPPORTED_TEXT_GENERATION_KWARGS.setdefault(model, []).extend(unsupported_kwargs)
+def _get_unsupported_text_generation_kwargs(model: Optional[str]) -> List[str]:
+    return _UNSUPPORTED_TEXT_GENERATION_KWARGS.get(model, [])
+# TEXT GENERATION ERRORS
+# ----------------------
+# Text-generation errors are parsed separately to handle as much as possible the errors returned by the text generation
+# inference project (https://github.com/huggingface/text-generation-inference).
+# ----------------------
+def raise_text_generation_error(http_error: HTTPError) -> NoReturn:
+    """
+    Try to parse text-generation-inference error message and raise HTTPError in any case.
+    Args:
+        error (`HTTPError`):
+            The HTTPError that have been raised.
+    """
+    # Try to parse a Text Generation Inference error
+    try:
+        # Hacky way to retrieve payload in case of aiohttp error
+        payload = getattr(http_error, "response_error_payload", None) or http_error.response.json()
+        error = payload.get("error")
+        error_type = payload.get("error_type")
+    except Exception:  # no payload
+        raise http_error
+    # If error_type => more information than `hf_raise_for_status`
+    if error_type is not None:
+        exception = _parse_text_generation_error(error, error_type)
+        raise exception from http_error
+    # Otherwise, fallback to default error
+    raise http_error
+def _parse_text_generation_error(error: Optional[str], error_type: Optional[str]) -> TextGenerationError:
+    if error_type == "generation":
+        return GenerationError(error)  # type: ignore
+    if error_type == "incomplete_generation":
+        return IncompleteGenerationError(error)  # type: ignore
+    if error_type == "overloaded":
+        return OverloadedError(error)  # type: ignore
+    if error_type == "validation":
+        return ValidationError(error)  # type: ignore
+    return UnknownError(error)  # type: ignore

.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/__init__.py ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/_async_client.py ADDED Viewed

The diff for this file is too large to render. See raw diff

.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/audio_to_audio.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from dataclasses import dataclass
+from typing import Any
+from .base import BaseInferenceType
+@dataclass
+class AudioToAudioInput(BaseInferenceType):
+    """Inputs for Audio to Audio inference"""
+    inputs: Any
+    """The input audio data"""
+@dataclass
+class AudioToAudioOutputElement(BaseInferenceType):
+    """Outputs of inference for the Audio To Audio task
+    A generated audio file with its label.
+    """
+    blob: Any
+    """The generated audio file."""
+    content_type: str
+    """The content type of audio file."""
+    label: str
+    """The label of the audio file."""

.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from dataclasses import dataclass
+from typing import List, Literal, Optional, Union
+from .base import BaseInferenceType
+AutomaticSpeechRecognitionEarlyStoppingEnum = Literal["never"]
+@dataclass
+class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
+    """Parametrization of the text generation process"""
+    do_sample: Optional[bool] = None
+    """Whether to use sampling instead of greedy decoding when generating new tokens."""
+    early_stopping: Optional[Union[bool, "AutomaticSpeechRecognitionEarlyStoppingEnum"]] = None
+    """Controls the stopping condition for beam-based methods."""
+    epsilon_cutoff: Optional[float] = None
+    """If set to float strictly between 0 and 1, only tokens with a conditional probability
+    greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
+    3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
+    Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
+    """
+    eta_cutoff: Optional[float] = None
+    """Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
+    float strictly between 0 and 1, a token is only considered if it is greater than either
+    eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
+    term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
+    the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+    See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
+    for more details.
+    """
+    max_length: Optional[int] = None
+    """The maximum length (in tokens) of the generated text, including the input."""
+    max_new_tokens: Optional[int] = None
+    """The maximum number of tokens to generate. Takes precedence over max_length."""
+    min_length: Optional[int] = None
+    """The minimum length (in tokens) of the generated text, including the input."""
+    min_new_tokens: Optional[int] = None
+    """The minimum number of tokens to generate. Takes precedence over min_length."""
+    num_beam_groups: Optional[int] = None
+    """Number of groups to divide num_beams into in order to ensure diversity among different
+    groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
+    """
+    num_beams: Optional[int] = None
+    """Number of beams to use for beam search."""
+    penalty_alpha: Optional[float] = None
+    """The value balances the model confidence and the degeneration penalty in contrastive
+    search decoding.
+    """
+    temperature: Optional[float] = None
+    """The value used to modulate the next token probabilities."""
+    top_k: Optional[int] = None
+    """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
+    top_p: Optional[float] = None
+    """If set to float < 1, only the smallest set of most probable tokens with probabilities
+    that add up to top_p or higher are kept for generation.
+    """
+    typical_p: Optional[float] = None
+    """Local typicality measures how similar the conditional probability of predicting a target
+    token next is to the expected conditional probability of predicting a random token next,
+    given the partial text already generated. If set to float < 1, the smallest set of the
+    most locally typical tokens with probabilities that add up to typical_p or higher are
+    kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
+    """
+    use_cache: Optional[bool] = None
+    """Whether the model should use the past last key/values attentions to speed up decoding"""
+@dataclass
+class AutomaticSpeechRecognitionParameters(BaseInferenceType):
+    """Additional inference parameters for Automatic Speech Recognition"""
+    return_timestamps: Optional[bool] = None
+    """Whether to output corresponding timestamps with the generated text"""
+    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+    generate_kwargs: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
+    """Parametrization of the text generation process"""
+@dataclass
+class AutomaticSpeechRecognitionInput(BaseInferenceType):
+    """Inputs for Automatic Speech Recognition inference"""
+    inputs: str
+    """The input audio data as a base64-encoded string. If no `parameters` are provided, you can
+    also provide the audio data as a raw bytes payload.
+    """
+    parameters: Optional[AutomaticSpeechRecognitionParameters] = None
+    """Additional inference parameters for Automatic Speech Recognition"""
+@dataclass
+class AutomaticSpeechRecognitionOutputChunk(BaseInferenceType):
+    text: str
+    """A chunk of text identified by the model"""
+    timestamps: List[float]
+    """The start and end timestamps corresponding with the text"""
+@dataclass
+class AutomaticSpeechRecognitionOutput(BaseInferenceType):
+    """Outputs of inference for the Automatic Speech Recognition task"""
+    text: str
+    """The recognized text."""
+    chunks: Optional[List[AutomaticSpeechRecognitionOutputChunk]] = None
+    """When returnTimestamps is enabled, chunks contains a list of audio chunks identified by
+    the model.
+    """

.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/depth_estimation.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+from .base import BaseInferenceType
+@dataclass
+class DepthEstimationInput(BaseInferenceType):
+    """Inputs for Depth Estimation inference"""
+    inputs: Any
+    """The input image data"""
+    parameters: Optional[Dict[str, Any]] = None
+    """Additional inference parameters for Depth Estimation"""
+@dataclass
+class DepthEstimationOutput(BaseInferenceType):
+    """Outputs of inference for the Depth Estimation task"""
+    depth: Any
+    """The predicted depth as an image"""
+    predicted_depth: Any
+    """The predicted depth as a tensor"""

.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/fill_mask.py ADDED Viewed

	@@ -0,0 +1,48 @@

+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from dataclasses import dataclass
+from typing import Any, List, Optional
+from .base import BaseInferenceType
+@dataclass
+class FillMaskParameters(BaseInferenceType):
+    """Additional inference parameters for Fill Mask"""
+    targets: Optional[List[str]] = None
+    """When passed, the model will limit the scores to the passed targets instead of looking up
+    in the whole vocabulary. If the provided targets are not in the model vocab, they will be
+    tokenized and the first resulting token will be used (with a warning, and that might be
+    slower).
+    """
+    top_k: Optional[int] = None
+    """When passed, overrides the number of predictions to return."""
+@dataclass
+class FillMaskInput(BaseInferenceType):
+    """Inputs for Fill Mask inference"""
+    inputs: str
+    """The text with masked tokens"""
+    parameters: Optional[FillMaskParameters] = None
+    """Additional inference parameters for Fill Mask"""
+@dataclass
+class FillMaskOutputElement(BaseInferenceType):
+    """Outputs of inference for the Fill Mask task"""
+    score: float
+    """The corresponding probability"""
+    sequence: str
+    """The corresponding input with the mask token prediction."""
+    token: int
+    """The predicted token id (to replace the masked one)."""
+    token_str: Any
+    fill_mask_output_token_str: Optional[str] = None
+    """The predicted token (to replace the masked one)."""

.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/image_segmentation.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from dataclasses import dataclass
+from typing import Literal, Optional
+from .base import BaseInferenceType
+ImageSegmentationSubtask = Literal["instance", "panoptic", "semantic"]
+@dataclass
+class ImageSegmentationParameters(BaseInferenceType):
+    """Additional inference parameters for Image Segmentation"""
+    mask_threshold: Optional[float] = None
+    """Threshold to use when turning the predicted masks into binary values."""
+    overlap_mask_area_threshold: Optional[float] = None
+    """Mask overlap threshold to eliminate small, disconnected segments."""
+    subtask: Optional["ImageSegmentationSubtask"] = None
+    """Segmentation task to be performed, depending on model capabilities."""
+    threshold: Optional[float] = None
+    """Probability threshold to filter out predicted masks."""
+@dataclass
+class ImageSegmentationInput(BaseInferenceType):
+    """Inputs for Image Segmentation inference"""
+    inputs: str
+    """The input image data as a base64-encoded string. If no `parameters` are provided, you can
+    also provide the image data as a raw bytes payload.
+    """
+    parameters: Optional[ImageSegmentationParameters] = None
+    """Additional inference parameters for Image Segmentation"""
+@dataclass
+class ImageSegmentationOutputElement(BaseInferenceType):
+    """Outputs of inference for the Image Segmentation task
+    A predicted mask / segment
+    """
+    label: str
+    """The label of the predicted segment."""
+    mask: str
+    """The corresponding mask as a black-and-white image (base64-encoded)."""
+    score: Optional[float] = None
+    """The score or confidence degree the model has."""

.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/image_to_image.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from dataclasses import dataclass
+from typing import Any, Optional
+from .base import BaseInferenceType
+@dataclass
+class ImageToImageTargetSize(BaseInferenceType):
+    """The size in pixel of the output image."""
+    height: int
+    width: int
+@dataclass
+class ImageToImageParameters(BaseInferenceType):
+    """Additional inference parameters for Image To Image"""
+    guidance_scale: Optional[float] = None
+    """For diffusion models. A higher guidance scale value encourages the model to generate
+    images closely linked to the text prompt at the expense of lower image quality.
+    """
+    negative_prompt: Optional[str] = None
+    """One prompt to guide what NOT to include in image generation."""
+    num_inference_steps: Optional[int] = None
+    """For diffusion models. The number of denoising steps. More denoising steps usually lead to
+    a higher quality image at the expense of slower inference.
+    """
+    target_size: Optional[ImageToImageTargetSize] = None
+    """The size in pixel of the output image."""
+@dataclass
+class ImageToImageInput(BaseInferenceType):
+    """Inputs for Image To Image inference"""
+    inputs: str
+    """The input image data as a base64-encoded string. If no `parameters` are provided, you can
+    also provide the image data as a raw bytes payload.
+    """
+    parameters: Optional[ImageToImageParameters] = None
+    """Additional inference parameters for Image To Image"""
+@dataclass
+class ImageToImageOutput(BaseInferenceType):
+    """Outputs of inference for the Image To Image task"""
+    image: Any
+    """The output image returned as raw bytes in the payload."""

.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/image_to_text.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from dataclasses import dataclass
+from typing import Any, Literal, Optional, Union
+from .base import BaseInferenceType
+ImageToTextEarlyStoppingEnum = Literal["never"]
+@dataclass
+class ImageToTextGenerationParameters(BaseInferenceType):
+    """Parametrization of the text generation process"""
+    do_sample: Optional[bool] = None
+    """Whether to use sampling instead of greedy decoding when generating new tokens."""
+    early_stopping: Optional[Union[bool, "ImageToTextEarlyStoppingEnum"]] = None
+    """Controls the stopping condition for beam-based methods."""
+    epsilon_cutoff: Optional[float] = None
+    """If set to float strictly between 0 and 1, only tokens with a conditional probability
+    greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
+    3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
+    Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
+    """
+    eta_cutoff: Optional[float] = None
+    """Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
+    float strictly between 0 and 1, a token is only considered if it is greater than either
+    eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
+    term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
+    the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+    See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
+    for more details.
+    """
+    max_length: Optional[int] = None
+    """The maximum length (in tokens) of the generated text, including the input."""
+    max_new_tokens: Optional[int] = None
+    """The maximum number of tokens to generate. Takes precedence over max_length."""
+    min_length: Optional[int] = None
+    """The minimum length (in tokens) of the generated text, including the input."""
+    min_new_tokens: Optional[int] = None
+    """The minimum number of tokens to generate. Takes precedence over min_length."""
+    num_beam_groups: Optional[int] = None
+    """Number of groups to divide num_beams into in order to ensure diversity among different
+    groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
+    """
+    num_beams: Optional[int] = None
+    """Number of beams to use for beam search."""
+    penalty_alpha: Optional[float] = None
+    """The value balances the model confidence and the degeneration penalty in contrastive
+    search decoding.
+    """
+    temperature: Optional[float] = None
+    """The value used to modulate the next token probabilities."""
+    top_k: Optional[int] = None
+    """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
+    top_p: Optional[float] = None
+    """If set to float < 1, only the smallest set of most probable tokens with probabilities
+    that add up to top_p or higher are kept for generation.
+    """
+    typical_p: Optional[float] = None
+    """Local typicality measures how similar the conditional probability of predicting a target
+    token next is to the expected conditional probability of predicting a random token next,
+    given the partial text already generated. If set to float < 1, the smallest set of the
+    most locally typical tokens with probabilities that add up to typical_p or higher are
+    kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
+    """
+    use_cache: Optional[bool] = None
+    """Whether the model should use the past last key/values attentions to speed up decoding"""
+@dataclass
+class ImageToTextParameters(BaseInferenceType):
+    """Additional inference parameters for Image To Text"""
+    max_new_tokens: Optional[int] = None
+    """The amount of maximum tokens to generate."""
+    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+    generate_kwargs: Optional[ImageToTextGenerationParameters] = None
+    """Parametrization of the text generation process"""
+@dataclass
+class ImageToTextInput(BaseInferenceType):
+    """Inputs for Image To Text inference"""
+    inputs: Any
+    """The input image data"""
+    parameters: Optional[ImageToTextParameters] = None
+    """Additional inference parameters for Image To Text"""
+@dataclass
+class ImageToTextOutput(BaseInferenceType):
+    """Outputs of inference for the Image To Text task"""
+    generated_text: Any
+    image_to_text_output_generated_text: Optional[str] = None
+    """The generated text."""

.venv/lib/python3.11/site-packages/huggingface_hub/inference/_generated/types/object_detection.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from dataclasses import dataclass
+from typing import Optional
+from .base import BaseInferenceType
+@dataclass
+class ObjectDetectionParameters(BaseInferenceType):
+    """Additional inference parameters for Object Detection"""
+    threshold: Optional[float] = None
+    """The probability necessary to make a prediction."""
+@dataclass
+class ObjectDetectionInput(BaseInferenceType):
+    """Inputs for Object Detection inference"""
+    inputs: str
+    """The input image data as a base64-encoded string. If no `parameters` are provided, you can
+    also provide the image data as a raw bytes payload.
+    """
+    parameters: Optional[ObjectDetectionParameters] = None
+    """Additional inference parameters for Object Detection"""
+@dataclass
+class ObjectDetectionBoundingBox(BaseInferenceType):
+    """The predicted bounding box. Coordinates are relative to the top left corner of the input
+    image.
+    """
+    xmax: int
+    """The x-coordinate of the bottom-right corner of the bounding box."""
+    xmin: int
+    """The x-coordinate of the top-left corner of the bounding box."""
+    ymax: int
+    """The y-coordinate of the bottom-right corner of the bounding box."""
+    ymin: int
+    """The y-coordinate of the top-left corner of the bounding box."""
+@dataclass
+class ObjectDetectionOutputElement(BaseInferenceType):
+    """Outputs of inference for the Object Detection task"""
+    box: ObjectDetectionBoundingBox
+    """The predicted bounding box. Coordinates are relative to the top left corner of the input
+    image.
+    """
+    label: str
+    """The predicted label for the bounding box."""
+    score: float
+    """The associated score / probability."""