File size: 4,150 Bytes
6e17fd0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import base64
import datetime
import hashlib
import os
import shutil
import sys
from model.data import ModelId
def get_local_miners_dir(base_dir: str) -> str:
return os.path.join(base_dir, "models")
def get_local_miner_dir(base_dir: str, hotkey: str) -> str:
return os.path.join(get_local_miners_dir(base_dir), hotkey)
# Hugging face stores models under models--namespace--name/snapshots/commit when downloading.
def get_local_model_dir(base_dir: str, hotkey: str, model_id: ModelId) -> str:
return os.path.join(
get_local_miner_dir(base_dir, hotkey),
"models" + "--" + model_id.namespace + "--" + model_id.name,
)
def get_local_model_snapshot_dir(base_dir: str, hotkey: str, model_id: ModelId) -> str:
return os.path.join(
get_local_model_dir(base_dir, hotkey, model_id),
"snapshots",
model_id.commit,
)
def get_hf_download_path(local_path: str, model_id: ModelId) -> str:
return os.path.join(
local_path,
"models" + "--" + model_id.namespace + "--" + model_id.name,
"snapshots",
model_id.commit,
)
def get_newest_datetime_under_path(path: str) -> datetime.datetime:
newest_filetime = sys.maxsize
# Check to see if any file at any level was modified more recently than the current one.
for cur_path, dirnames, filenames in os.walk(path):
for filename in filenames:
path = os.path.join(cur_path, filename)
try:
mod_time = os.stat(path).st_mtime
if mod_time < newest_filetime:
newest_filetime = mod_time
except:
pass
if newest_filetime == sys.maxsize:
return datetime.datetime.max
return datetime.datetime.fromtimestamp(newest_filetime)
def remove_dir_out_of_grace_by_datetime(path: str, grace_period_seconds: int, last_modified: datetime.datetime) -> bool:
"""Removes a dir if the last modified time is out of grace period secs. Returns if it was deleted."""
grace = datetime.timedelta(seconds=grace_period_seconds)
if last_modified < datetime.datetime.now() - grace:
shutil.rmtree(path=path, ignore_errors=True)
return True
return False
def remove_dir_out_of_grace(path: str, grace_period_seconds: int) -> bool:
"""Removes a dir if the last modified time is out of grace period secs. Returns if it was deleted."""
last_modified = get_newest_datetime_under_path(path)
return remove_dir_out_of_grace_by_datetime(path, grace_period_seconds, last_modified)
def realize_symlinks_in_directory(path: str) -> int:
"""Realizes all symlinks in the given directory, moving the linked file to the location. Returns count removed."""
realized_symlinks = 0
for cur_path, dirnames, filenames in os.walk(path):
for filename in filenames:
path = os.path.abspath(os.path.join(cur_path, filename))
# Get path resolving symlinks if encountered
real_path = os.path.realpath(path)
# If different then move
if path != real_path:
realized_symlinks += 1
shutil.move(real_path, path)
return realized_symlinks
def get_hash_of_file(path: str) -> str:
blocksize = 64 * 1024
file_hash = hashlib.sha256()
with open(path, "rb") as fp:
while True:
data = fp.read(blocksize)
if not data:
break
file_hash.update(data)
return base64.b64encode(file_hash.digest()).decode("utf-8")
def get_hash_of_directory(path: str) -> str:
dir_hash = hashlib.sha256()
# Recursively walk everything under the directory for files.
for cur_path, dirnames, filenames in os.walk(path):
# Ensure we walk future directories in a consistent order.
dirnames.sort()
# Ensure we walk files in a consistent order.
for filename in sorted(filenames):
path = os.path.join(cur_path, filename)
file_hash = get_hash_of_file(path)
dir_hash.update(file_hash.encode())
return base64.b64encode(dir_hash.digest()).decode("utf-8")
|