| | |
| | """ |
| | 从 Hugging Face Dataset 仓库恢复 OpenCode 数据到 ~/.local/share/opencode。 |
| | 需设置环境变量: HF_TOKEN, OPENCODE_DATASET_REPO。 |
| | """ |
| | import os |
| | import re |
| | import shutil |
| | import sys |
| |
|
| | def _normalize_repo_id(value): |
| | """接受 repo_id 或完整 URL,返回 namespace/repo_name。""" |
| | if not value or not value.strip(): |
| | return None |
| | value = value.strip() |
| | |
| | m = re.search(r"(?:huggingface\.co/datasets/|^)([\w.-]+/[\w.-]+)/?$", value) |
| | if m: |
| | return m.group(1) |
| | |
| | if "/" in value: |
| | return value |
| | return None |
| |
|
| | def main(): |
| | token = os.environ.get("HF_TOKEN") |
| | raw = os.environ.get("OPENCODE_DATASET_REPO") |
| | repo_id = _normalize_repo_id(raw) |
| | data_dir = os.path.expanduser("~/.local/share/opencode") |
| |
|
| | if not token or not repo_id: |
| | return 0 |
| |
|
| | try: |
| | from huggingface_hub import HfApi, snapshot_download |
| | except ImportError: |
| | print("restore: huggingface_hub not installed, skip restore", file=sys.stderr) |
| | return 0 |
| |
|
| | try: |
| | api = HfApi(token=token) |
| | files = api.list_repo_files(repo_id, repo_type="dataset") |
| | if not files or set(files) <= {".gitattributes"}: |
| | return 0 |
| | except Exception as e: |
| | print(f"restore: list repo failed ({e}), skip restore", file=sys.stderr) |
| | return 0 |
| |
|
| | os.makedirs(data_dir, exist_ok=True) |
| | tmp_dir = data_dir + ".restore_tmp" |
| | try: |
| | snapshot_download( |
| | repo_id=repo_id, |
| | repo_type="dataset", |
| | local_dir=tmp_dir, |
| | token=token, |
| | ) |
| | for name in os.listdir(tmp_dir): |
| | if name == ".gitattributes": |
| | continue |
| | src = os.path.join(tmp_dir, name) |
| | dst = os.path.join(data_dir, name) |
| | if os.path.isdir(src): |
| | if os.path.exists(dst): |
| | shutil.rmtree(dst, ignore_errors=True) |
| | shutil.copytree(src, dst) |
| | else: |
| | shutil.copy2(src, dst) |
| | finally: |
| | if os.path.isdir(tmp_dir): |
| | shutil.rmtree(tmp_dir, ignore_errors=True) |
| | return 0 |
| |
|
| | if __name__ == "__main__": |
| | sys.exit(main()) |
| |
|