Update tools/hf_backup.py
Browse files- tools/hf_backup.py +17 -14
tools/hf_backup.py
CHANGED
|
@@ -13,13 +13,16 @@ def list_backups(api: HfApi, repo_id: str, prefix: str):
|
|
| 13 |
backs.sort()
|
| 14 |
return backs
|
| 15 |
|
| 16 |
-
def
|
| 17 |
-
api = HfApi(token=args.token)
|
| 18 |
-
# 可选:确认数据集存在
|
| 19 |
try:
|
| 20 |
-
api.dataset_info(repo_id=
|
| 21 |
except Exception:
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# 上传
|
| 25 |
api.upload_file(
|
|
@@ -31,15 +34,15 @@ def upload(args):
|
|
| 31 |
|
| 32 |
# 保留最新 N 份
|
| 33 |
if args.max and args.max > 0:
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
|
| 44 |
def restore(args):
|
| 45 |
api = HfApi(token=args.token)
|
|
|
|
| 13 |
backs.sort()
|
| 14 |
return backs
|
| 15 |
|
| 16 |
+
def ensure_dataset(api: HfApi, repo_id: str):
|
|
|
|
|
|
|
| 17 |
try:
|
| 18 |
+
api.dataset_info(repo_id=repo_id, use_auth_token=api.token) # 兼容旧版
|
| 19 |
except Exception:
|
| 20 |
+
# 不存在则创建(可按需改为 public)
|
| 21 |
+
api.create_repo(repo_id=repo_id, repo_type="dataset", private=True, exist_ok=True)
|
| 22 |
+
|
| 23 |
+
def upload(args):
|
| 24 |
+
api = HfApi(token=args.token)
|
| 25 |
+
ensure_dataset(api, args.repo)
|
| 26 |
|
| 27 |
# 上传
|
| 28 |
api.upload_file(
|
|
|
|
| 34 |
|
| 35 |
# 保留最新 N 份
|
| 36 |
if args.max and args.max > 0:
|
| 37 |
+
backs = list_backups(api, args.repo, args.prefix)
|
| 38 |
+
if len(backs) > args.max:
|
| 39 |
+
to_del = backs[: len(backs) - args.max]
|
| 40 |
+
for f in to_del:
|
| 41 |
+
try:
|
| 42 |
+
api.delete_file(path_in_repo=f, repo_id=args.repo, repo_type="dataset")
|
| 43 |
+
except Exception:
|
| 44 |
+
# 删除失败不致命
|
| 45 |
+
pass
|
| 46 |
|
| 47 |
def restore(args):
|
| 48 |
api = HfApi(token=args.token)
|