Shinichie's picture
Upload folder using huggingface_hub
6e17fd0 verified
import base64
import datetime
import hashlib
import os
import shutil
import sys
from model.data import ModelId
def get_local_miners_dir(base_dir: str) -> str:
return os.path.join(base_dir, "models")
def get_local_miner_dir(base_dir: str, hotkey: str) -> str:
return os.path.join(get_local_miners_dir(base_dir), hotkey)
# Hugging face stores models under models--namespace--name/snapshots/commit when downloading.
def get_local_model_dir(base_dir: str, hotkey: str, model_id: ModelId) -> str:
return os.path.join(
get_local_miner_dir(base_dir, hotkey),
"models" + "--" + model_id.namespace + "--" + model_id.name,
)
def get_local_model_snapshot_dir(base_dir: str, hotkey: str, model_id: ModelId) -> str:
return os.path.join(
get_local_model_dir(base_dir, hotkey, model_id),
"snapshots",
model_id.commit,
)
def get_hf_download_path(local_path: str, model_id: ModelId) -> str:
return os.path.join(
local_path,
"models" + "--" + model_id.namespace + "--" + model_id.name,
"snapshots",
model_id.commit,
)
def get_newest_datetime_under_path(path: str) -> datetime.datetime:
newest_filetime = sys.maxsize
# Check to see if any file at any level was modified more recently than the current one.
for cur_path, dirnames, filenames in os.walk(path):
for filename in filenames:
path = os.path.join(cur_path, filename)
try:
mod_time = os.stat(path).st_mtime
if mod_time < newest_filetime:
newest_filetime = mod_time
except:
pass
if newest_filetime == sys.maxsize:
return datetime.datetime.max
return datetime.datetime.fromtimestamp(newest_filetime)
def remove_dir_out_of_grace_by_datetime(path: str, grace_period_seconds: int, last_modified: datetime.datetime) -> bool:
"""Removes a dir if the last modified time is out of grace period secs. Returns if it was deleted."""
grace = datetime.timedelta(seconds=grace_period_seconds)
if last_modified < datetime.datetime.now() - grace:
shutil.rmtree(path=path, ignore_errors=True)
return True
return False
def remove_dir_out_of_grace(path: str, grace_period_seconds: int) -> bool:
"""Removes a dir if the last modified time is out of grace period secs. Returns if it was deleted."""
last_modified = get_newest_datetime_under_path(path)
return remove_dir_out_of_grace_by_datetime(path, grace_period_seconds, last_modified)
def realize_symlinks_in_directory(path: str) -> int:
"""Realizes all symlinks in the given directory, moving the linked file to the location. Returns count removed."""
realized_symlinks = 0
for cur_path, dirnames, filenames in os.walk(path):
for filename in filenames:
path = os.path.abspath(os.path.join(cur_path, filename))
# Get path resolving symlinks if encountered
real_path = os.path.realpath(path)
# If different then move
if path != real_path:
realized_symlinks += 1
shutil.move(real_path, path)
return realized_symlinks
def get_hash_of_file(path: str) -> str:
blocksize = 64 * 1024
file_hash = hashlib.sha256()
with open(path, "rb") as fp:
while True:
data = fp.read(blocksize)
if not data:
break
file_hash.update(data)
return base64.b64encode(file_hash.digest()).decode("utf-8")
def get_hash_of_directory(path: str) -> str:
dir_hash = hashlib.sha256()
# Recursively walk everything under the directory for files.
for cur_path, dirnames, filenames in os.walk(path):
# Ensure we walk future directories in a consistent order.
dirnames.sort()
# Ensure we walk files in a consistent order.
for filename in sorted(filenames):
path = os.path.join(cur_path, filename)
file_hash = get_hash_of_file(path)
dir_hash.update(file_hash.encode())
return base64.b64encode(dir_hash.digest()).decode("utf-8")