tao-shen Claude Opus 4.6 commited on
Commit
419ff91
·
1 Parent(s): 5a096f1

fix: always derive dataset from SPACE_ID, ignore OPENCLAW_DATASET_REPO on HF

Browse files

When running on HF Spaces (SPACE_ID is set), always use
{SPACE_ID}-data as the dataset name. This prevents duplicated
Spaces from inheriting the original's dataset via copied secrets.
OPENCLAW_DATASET_REPO is only used for local Docker without SPACE_ID.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. scripts/sync_hf.py +17 -9
scripts/sync_hf.py CHANGED
@@ -80,25 +80,33 @@ SPACE_ID = os.environ.get("SPACE_ID", "") # e.g. "tao-shen/HuggingClaw"
80
  SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "60"))
81
  AUTO_CREATE_DATASET = os.environ.get("AUTO_CREATE_DATASET", "false").lower() in ("true", "1", "yes")
82
 
83
- # Dataset repo: always auto-derive from SPACE_ID when not explicitly set.
84
- # Format: {username}/{SpaceName}-data (e.g. "tao-shen/HuggingClaw-data")
85
- # This ensures each duplicated Space gets its own dataset automatically.
86
- HF_REPO_ID = os.environ.get("OPENCLAW_DATASET_REPO", "")
87
- if not HF_REPO_ID and SPACE_ID:
88
- # SPACE_ID = "username/SpaceName" → derive "username/SpaceName-data"
89
  HF_REPO_ID = f"{SPACE_ID}-data"
90
- print(f"[SYNC] OPENCLAW_DATASET_REPO not set auto-derived from SPACE_ID: {HF_REPO_ID}")
91
- elif not HF_REPO_ID and HF_TOKEN:
 
 
 
 
 
 
92
  # Fallback: no SPACE_ID (local Docker), derive from HF_TOKEN username
93
  try:
94
  _api = HfApi(token=HF_TOKEN)
95
  _username = _api.whoami()["name"]
96
  HF_REPO_ID = f"{_username}/HuggingClaw-data"
97
- print(f"[SYNC] OPENCLAW_DATASET_REPO not set — auto-derived from HF_TOKEN: {HF_REPO_ID}")
98
  del _api, _username
99
  except Exception as e:
100
  print(f"[SYNC] WARNING: Could not derive username from HF_TOKEN: {e}")
101
  HF_REPO_ID = ""
 
 
 
102
 
103
  # Setup logging
104
  log_dir = OPENCLAW_HOME / "workspace"
 
80
  SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "60"))
81
  AUTO_CREATE_DATASET = os.environ.get("AUTO_CREATE_DATASET", "false").lower() in ("true", "1", "yes")
82
 
83
+ # Dataset repo: always derive from SPACE_ID to ensure each Space uses its own dataset.
84
+ # OPENCLAW_DATASET_REPO is only used as fallback when SPACE_ID is not available (local Docker).
85
+ _user_repo = os.environ.get("OPENCLAW_DATASET_REPO", "")
86
+ if SPACE_ID:
87
+ # Always use SPACE_ID — prevents duplicated Spaces from sharing the original's dataset
 
88
  HF_REPO_ID = f"{SPACE_ID}-data"
89
+ if _user_repo and _user_repo != HF_REPO_ID:
90
+ print(f"[SYNC] NOTE: Ignoring OPENCLAW_DATASET_REPO={_user_repo} — using SPACE_ID-derived: {HF_REPO_ID}")
91
+ else:
92
+ print(f"[SYNC] Dataset repo auto-derived from SPACE_ID: {HF_REPO_ID}")
93
+ elif _user_repo:
94
+ HF_REPO_ID = _user_repo
95
+ print(f"[SYNC] Using OPENCLAW_DATASET_REPO: {HF_REPO_ID}")
96
+ elif HF_TOKEN:
97
  # Fallback: no SPACE_ID (local Docker), derive from HF_TOKEN username
98
  try:
99
  _api = HfApi(token=HF_TOKEN)
100
  _username = _api.whoami()["name"]
101
  HF_REPO_ID = f"{_username}/HuggingClaw-data"
102
+ print(f"[SYNC] Dataset repo auto-derived from HF_TOKEN: {HF_REPO_ID}")
103
  del _api, _username
104
  except Exception as e:
105
  print(f"[SYNC] WARNING: Could not derive username from HF_TOKEN: {e}")
106
  HF_REPO_ID = ""
107
+ else:
108
+ HF_REPO_ID = ""
109
+ del _user_repo
110
 
111
  # Setup logging
112
  log_dir = OPENCLAW_HOME / "workspace"