Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| 从 Hugging Face Dataset 仓库恢复 OpenCode 数据到 ~/.local/share/opencode。 | |
| 需设置环境变量: HF_TOKEN, OPENCODE_DATASET_REPO。 | |
| """ | |
| import os | |
| import re | |
| import shutil | |
| import sys | |
| def _normalize_repo_id(value): | |
| """接受 repo_id 或完整 URL,返回 namespace/repo_name。""" | |
| if not value or not value.strip(): | |
| return None | |
| value = value.strip() | |
| # 若是 URL,提取最后两段路径作为 namespace/repo_name | |
| m = re.search(r"(?:huggingface\.co/datasets/|^)([\w.-]+/[\w.-]+)/?$", value) | |
| if m: | |
| return m.group(1) | |
| # 已是 namespace/repo_name 形式 | |
| if "/" in value: | |
| return value | |
| return None | |
| def main(): | |
| token = os.environ.get("HF_TOKEN") | |
| raw = os.environ.get("OPENCODE_DATASET_REPO") | |
| repo_id = _normalize_repo_id(raw) | |
| data_dir = os.path.expanduser("~/.local/share/opencode") | |
| if not token or not repo_id: | |
| return 0 | |
| try: | |
| from huggingface_hub import HfApi, snapshot_download | |
| except ImportError: | |
| print("restore: huggingface_hub not installed, skip restore", file=sys.stderr) | |
| return 0 | |
| try: | |
| api = HfApi(token=token) | |
| files = api.list_repo_files(repo_id, repo_type="dataset") | |
| if not files or set(files) <= {".gitattributes"}: | |
| return 0 | |
| except Exception as e: | |
| print(f"restore: list repo failed ({e}), skip restore", file=sys.stderr) | |
| return 0 | |
| os.makedirs(data_dir, exist_ok=True) | |
| tmp_dir = data_dir + ".restore_tmp" | |
| try: | |
| snapshot_download( | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| local_dir=tmp_dir, | |
| token=token, | |
| ) | |
| for name in os.listdir(tmp_dir): | |
| if name == ".gitattributes": | |
| continue | |
| src = os.path.join(tmp_dir, name) | |
| dst = os.path.join(data_dir, name) | |
| if os.path.isdir(src): | |
| if os.path.exists(dst): | |
| shutil.rmtree(dst, ignore_errors=True) | |
| shutil.copytree(src, dst) | |
| else: | |
| shutil.copy2(src, dst) | |
| finally: | |
| if os.path.isdir(tmp_dir): | |
| shutil.rmtree(tmp_dir, ignore_errors=True) | |
| return 0 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |