fix: OPENCODE_DATASET_REPO 支持填完整 URL,自动解析为 repo_id
Browse filesCo-authored-by: Cursor <cursoragent@cursor.com>
- README.md +1 -1
- scripts/restore_from_dataset.py +17 -1
- scripts/save_to_dataset.py +16 -2
README.md
CHANGED
|
@@ -36,7 +36,7 @@ pinned: false
|
|
| 36 |
|
| 37 |
2. **在 Space 里配置**
|
| 38 |
- **Settings → Repository secrets**:新增 `HF_TOKEN`,值为你的 [Access Token](https://huggingface.co/settings/tokens)(需 **Write** 权限)。
|
| 39 |
-
- **Settings → Variables**:新增 `OPENCODE_DATASET_REPO`,值为
|
| 40 |
|
| 41 |
3. **行为**
|
| 42 |
- 启动时:从该 Dataset 拉取已有数据到 `~/.local/share/opencode`(若有)。
|
|
|
|
| 36 |
|
| 37 |
2. **在 Space 里配置**
|
| 38 |
- **Settings → Repository secrets**:新增 `HF_TOKEN`,值为你的 [Access Token](https://huggingface.co/settings/tokens)(需 **Write** 权限)。
|
| 39 |
+
- **Settings → Variables**:新增 `OPENCODE_DATASET_REPO`,值为 **repo_id**(如 `tao-shen/opencode-data`),或填完整 URL(如 `https://huggingface.co/datasets/tao-shen/opencode-data`)也会自动识别。
|
| 40 |
|
| 41 |
3. **行为**
|
| 42 |
- 启动时:从该 Dataset 拉取已有数据到 `~/.local/share/opencode`(若有)。
|
scripts/restore_from_dataset.py
CHANGED
|
@@ -4,12 +4,28 @@
|
|
| 4 |
需设置环境变量: HF_TOKEN, OPENCODE_DATASET_REPO。
|
| 5 |
"""
|
| 6 |
import os
|
|
|
|
| 7 |
import shutil
|
| 8 |
import sys
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def main():
|
| 11 |
token = os.environ.get("HF_TOKEN")
|
| 12 |
-
|
|
|
|
| 13 |
data_dir = os.path.expanduser("~/.local/share/opencode")
|
| 14 |
|
| 15 |
if not token or not repo_id:
|
|
|
|
| 4 |
需设置环境变量: HF_TOKEN, OPENCODE_DATASET_REPO。
|
| 5 |
"""
|
| 6 |
import os
|
| 7 |
+
import re
|
| 8 |
import shutil
|
| 9 |
import sys
|
| 10 |
|
| 11 |
+
def _normalize_repo_id(value):
|
| 12 |
+
"""接受 repo_id 或完整 URL,返回 namespace/repo_name。"""
|
| 13 |
+
if not value or not value.strip():
|
| 14 |
+
return None
|
| 15 |
+
value = value.strip()
|
| 16 |
+
# 若是 URL,提取最后两段路径作为 namespace/repo_name
|
| 17 |
+
m = re.search(r"(?:huggingface\.co/datasets/|^)([\w.-]+/[\w.-]+)/?$", value)
|
| 18 |
+
if m:
|
| 19 |
+
return m.group(1)
|
| 20 |
+
# 已是 namespace/repo_name 形式
|
| 21 |
+
if "/" in value:
|
| 22 |
+
return value
|
| 23 |
+
return None
|
| 24 |
+
|
| 25 |
def main():
|
| 26 |
token = os.environ.get("HF_TOKEN")
|
| 27 |
+
raw = os.environ.get("OPENCODE_DATASET_REPO")
|
| 28 |
+
repo_id = _normalize_repo_id(raw)
|
| 29 |
data_dir = os.path.expanduser("~/.local/share/opencode")
|
| 30 |
|
| 31 |
if not token or not repo_id:
|
scripts/save_to_dataset.py
CHANGED
|
@@ -4,11 +4,25 @@
|
|
| 4 |
需设置环境变量: HF_TOKEN, OPENCODE_DATASET_REPO。
|
| 5 |
"""
|
| 6 |
import os
|
|
|
|
| 7 |
import sys
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
def main():
|
| 10 |
token = os.environ.get("HF_TOKEN")
|
| 11 |
-
|
|
|
|
| 12 |
data_dir = os.path.expanduser("~/.local/share/opencode")
|
| 13 |
|
| 14 |
if not token or not repo_id:
|
|
@@ -24,7 +38,7 @@ def main():
|
|
| 24 |
return 0
|
| 25 |
|
| 26 |
try:
|
| 27 |
-
api = HfApi()
|
| 28 |
api.upload_folder(
|
| 29 |
folder_path=data_dir,
|
| 30 |
path_in_repo=".",
|
|
|
|
| 4 |
需设置环境变量: HF_TOKEN, OPENCODE_DATASET_REPO。
|
| 5 |
"""
|
| 6 |
import os
|
| 7 |
+
import re
|
| 8 |
import sys
|
| 9 |
|
| 10 |
+
def _normalize_repo_id(value):
|
| 11 |
+
"""接受 repo_id 或完整 URL,返回 namespace/repo_name。"""
|
| 12 |
+
if not value or not value.strip():
|
| 13 |
+
return None
|
| 14 |
+
value = value.strip()
|
| 15 |
+
m = re.search(r"(?:huggingface\.co/datasets/|^)([\w.-]+/[\w.-]+)/?$", value)
|
| 16 |
+
if m:
|
| 17 |
+
return m.group(1)
|
| 18 |
+
if "/" in value:
|
| 19 |
+
return value
|
| 20 |
+
return None
|
| 21 |
+
|
| 22 |
def main():
|
| 23 |
token = os.environ.get("HF_TOKEN")
|
| 24 |
+
raw = os.environ.get("OPENCODE_DATASET_REPO")
|
| 25 |
+
repo_id = _normalize_repo_id(raw)
|
| 26 |
data_dir = os.path.expanduser("~/.local/share/opencode")
|
| 27 |
|
| 28 |
if not token or not repo_id:
|
|
|
|
| 38 |
return 0
|
| 39 |
|
| 40 |
try:
|
| 41 |
+
api = HfApi(token=token)
|
| 42 |
api.upload_folder(
|
| 43 |
folder_path=data_dir,
|
| 44 |
path_in_repo=".",
|