File size: 1,240 Bytes
90afcf2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | #!/usr/bin/env python3
"""
Hugging Face 断点续传下载脚本
镜像站: hf-mirror.com
目标: MMInstruction/M3IT
"""
import os
import sys
# 设置国内镜像站
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
from huggingface_hub import snapshot_download
from huggingface_hub import hf_hub_download
import huggingface_hub
REPO_ID = "MMInstruction/M3IT"
LOCAL_DIR = "/workspace/xiaobin/dataset"
REPO_TYPE = "dataset" # M3IT 是数据集
def download():
print(f"镜像站: {os.environ['HF_ENDPOINT']}")
print(f"下载仓库: {REPO_ID}")
print(f"保存目录: {LOCAL_DIR}")
print("-" * 50)
os.makedirs(LOCAL_DIR, exist_ok=True)
try:
snapshot_download(
repo_id=REPO_ID,
repo_type=REPO_TYPE,
local_dir=LOCAL_DIR,
local_dir_use_symlinks=False, # 直接复制文件,不用软链接
resume_download=True, # 断点续传
ignore_patterns=["*.gitattributes"],
)
print("\n下载完成!")
except Exception as e:
print(f"\n出错: {e}")
print("提示: 如果是模型仓库,请将 REPO_TYPE 改为 'model' 后重试")
sys.exit(1)
if __name__ == "__main__":
download() |