Spaces:
Sleeping
Sleeping
fix: always derive dataset from SPACE_ID, ignore OPENCLAW_DATASET_REPO on HF
Browse filesWhen running on HF Spaces (SPACE_ID is set), always use
{SPACE_ID}-data as the dataset name. This prevents duplicated
Spaces from inheriting the original's dataset via copied secrets.
OPENCLAW_DATASET_REPO is only used for local Docker without SPACE_ID.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- scripts/sync_hf.py +17 -9
scripts/sync_hf.py
CHANGED
|
@@ -80,25 +80,33 @@ SPACE_ID = os.environ.get("SPACE_ID", "") # e.g. "tao-shen/HuggingClaw"
|
|
| 80 |
SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "60"))
|
| 81 |
AUTO_CREATE_DATASET = os.environ.get("AUTO_CREATE_DATASET", "false").lower() in ("true", "1", "yes")
|
| 82 |
|
| 83 |
-
# Dataset repo: always
|
| 84 |
-
#
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
# SPACE_ID = "username/SpaceName" → derive "username/SpaceName-data"
|
| 89 |
HF_REPO_ID = f"{SPACE_ID}-data"
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
# Fallback: no SPACE_ID (local Docker), derive from HF_TOKEN username
|
| 93 |
try:
|
| 94 |
_api = HfApi(token=HF_TOKEN)
|
| 95 |
_username = _api.whoami()["name"]
|
| 96 |
HF_REPO_ID = f"{_username}/HuggingClaw-data"
|
| 97 |
-
print(f"[SYNC]
|
| 98 |
del _api, _username
|
| 99 |
except Exception as e:
|
| 100 |
print(f"[SYNC] WARNING: Could not derive username from HF_TOKEN: {e}")
|
| 101 |
HF_REPO_ID = ""
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
# Setup logging
|
| 104 |
log_dir = OPENCLAW_HOME / "workspace"
|
|
|
|
| 80 |
SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "60"))
|
| 81 |
AUTO_CREATE_DATASET = os.environ.get("AUTO_CREATE_DATASET", "false").lower() in ("true", "1", "yes")
|
| 82 |
|
| 83 |
+
# Dataset repo: always derive from SPACE_ID to ensure each Space uses its own dataset.
|
| 84 |
+
# OPENCLAW_DATASET_REPO is only used as fallback when SPACE_ID is not available (local Docker).
|
| 85 |
+
_user_repo = os.environ.get("OPENCLAW_DATASET_REPO", "")
|
| 86 |
+
if SPACE_ID:
|
| 87 |
+
# Always use SPACE_ID — prevents duplicated Spaces from sharing the original's dataset
|
|
|
|
| 88 |
HF_REPO_ID = f"{SPACE_ID}-data"
|
| 89 |
+
if _user_repo and _user_repo != HF_REPO_ID:
|
| 90 |
+
print(f"[SYNC] NOTE: Ignoring OPENCLAW_DATASET_REPO={_user_repo} — using SPACE_ID-derived: {HF_REPO_ID}")
|
| 91 |
+
else:
|
| 92 |
+
print(f"[SYNC] Dataset repo auto-derived from SPACE_ID: {HF_REPO_ID}")
|
| 93 |
+
elif _user_repo:
|
| 94 |
+
HF_REPO_ID = _user_repo
|
| 95 |
+
print(f"[SYNC] Using OPENCLAW_DATASET_REPO: {HF_REPO_ID}")
|
| 96 |
+
elif HF_TOKEN:
|
| 97 |
# Fallback: no SPACE_ID (local Docker), derive from HF_TOKEN username
|
| 98 |
try:
|
| 99 |
_api = HfApi(token=HF_TOKEN)
|
| 100 |
_username = _api.whoami()["name"]
|
| 101 |
HF_REPO_ID = f"{_username}/HuggingClaw-data"
|
| 102 |
+
print(f"[SYNC] Dataset repo auto-derived from HF_TOKEN: {HF_REPO_ID}")
|
| 103 |
del _api, _username
|
| 104 |
except Exception as e:
|
| 105 |
print(f"[SYNC] WARNING: Could not derive username from HF_TOKEN: {e}")
|
| 106 |
HF_REPO_ID = ""
|
| 107 |
+
else:
|
| 108 |
+
HF_REPO_ID = ""
|
| 109 |
+
del _user_repo
|
| 110 |
|
| 111 |
# Setup logging
|
| 112 |
log_dir = OPENCLAW_HOME / "workspace"
|