| | """Utilities to efficiently compute the SHA 256 hash of a bunch of bytes.""" |
| |
|
| | from typing import BinaryIO, Optional |
| |
|
| | from .insecure_hashlib import sha1, sha256 |
| |
|
| |
|
| | def sha_fileobj(fileobj: BinaryIO, chunk_size: Optional[int] = None) -> bytes: |
| | """ |
| | Computes the sha256 hash of the given file object, by chunks of size `chunk_size`. |
| | |
| | Args: |
| | fileobj (file-like object): |
| | The File object to compute sha256 for, typically obtained with `open(path, "rb")` |
| | chunk_size (`int`, *optional*): |
| | The number of bytes to read from `fileobj` at once, defaults to 1MB. |
| | |
| | Returns: |
| | `bytes`: `fileobj`'s sha256 hash as bytes |
| | """ |
| | chunk_size = chunk_size if chunk_size is not None else 1024 * 1024 |
| |
|
| | sha = sha256() |
| | while True: |
| | chunk = fileobj.read(chunk_size) |
| | sha.update(chunk) |
| | if not chunk: |
| | break |
| | return sha.digest() |
| |
|
| |
|
| | def git_hash(data: bytes) -> str: |
| | """ |
| | Computes the git-sha1 hash of the given bytes, using the same algorithm as git. |
| | |
| | This is equivalent to running `git hash-object`. See https://git-scm.com/docs/git-hash-object |
| | for more details. |
| | |
| | Note: this method is valid for regular files. For LFS files, the proper git hash is supposed to be computed on the |
| | pointer file content, not the actual file content. However, for simplicity, we directly compare the sha256 of |
| | the LFS file content when we want to compare LFS files. |
| | |
| | Args: |
| | data (`bytes`): |
| | The data to compute the git-hash for. |
| | |
| | Returns: |
| | `str`: the git-hash of `data` as an hexadecimal string. |
| | |
| | Example: |
| | ```python |
| | >>> from huggingface_hub.utils.sha import git_hash |
| | >>> git_hash(b"Hello, World!") |
| | 'b45ef6fec89518d314f546fd6c3025367b721684' |
| | ``` |
| | """ |
| | |
| | |
| | sha = sha1() |
| | sha.update(b"blob ") |
| | sha.update(str(len(data)).encode()) |
| | sha.update(b"\0") |
| | sha.update(data) |
| | return sha.hexdigest() |
| |
|