tao-shen Claude Opus 4.6 commited on
Commit
7728a08
·
1 Parent(s): 419ff91

fix: restore OPENCLAW_DATASET_REPO priority over SPACE_ID derivation

Browse files

Revert to original logic: user-specified OPENCLAW_DATASET_REPO takes
priority. SPACE_ID auto-derivation is only used as fallback. This
restores the linked-spaces association on the dataset page.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. scripts/sync_hf.py +7 -17
scripts/sync_hf.py CHANGED
@@ -80,33 +80,23 @@ SPACE_ID = os.environ.get("SPACE_ID", "") # e.g. "tao-shen/HuggingClaw"
80
  SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "60"))
81
  AUTO_CREATE_DATASET = os.environ.get("AUTO_CREATE_DATASET", "false").lower() in ("true", "1", "yes")
82
 
83
- # Dataset repo: always derive from SPACE_ID to ensure each Space uses its own dataset.
84
- # OPENCLAW_DATASET_REPO is only used as fallback when SPACE_ID is not available (local Docker).
85
- _user_repo = os.environ.get("OPENCLAW_DATASET_REPO", "")
86
- if SPACE_ID:
87
- # Always use SPACE_ID — prevents duplicated Spaces from sharing the original's dataset
88
- HF_REPO_ID = f"{SPACE_ID}-data"
89
- if _user_repo and _user_repo != HF_REPO_ID:
90
- print(f"[SYNC] NOTE: Ignoring OPENCLAW_DATASET_REPO={_user_repo} — using SPACE_ID-derived: {HF_REPO_ID}")
91
- else:
92
- print(f"[SYNC] Dataset repo auto-derived from SPACE_ID: {HF_REPO_ID}")
93
- elif _user_repo:
94
- HF_REPO_ID = _user_repo
95
  print(f"[SYNC] Using OPENCLAW_DATASET_REPO: {HF_REPO_ID}")
 
 
 
96
  elif HF_TOKEN:
97
- # Fallback: no SPACE_ID (local Docker), derive from HF_TOKEN username
98
  try:
99
  _api = HfApi(token=HF_TOKEN)
100
  _username = _api.whoami()["name"]
101
  HF_REPO_ID = f"{_username}/HuggingClaw-data"
102
- print(f"[SYNC] Dataset repo auto-derived from HF_TOKEN: {HF_REPO_ID}")
103
  del _api, _username
104
  except Exception as e:
105
  print(f"[SYNC] WARNING: Could not derive username from HF_TOKEN: {e}")
106
  HF_REPO_ID = ""
107
- else:
108
- HF_REPO_ID = ""
109
- del _user_repo
110
 
111
  # Setup logging
112
  log_dir = OPENCLAW_HOME / "workspace"
 
80
  SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "60"))
81
  AUTO_CREATE_DATASET = os.environ.get("AUTO_CREATE_DATASET", "false").lower() in ("true", "1", "yes")
82
 
83
+ # Dataset repo: user-specified takes priority, then auto-derive from SPACE_ID.
84
+ HF_REPO_ID = os.environ.get("OPENCLAW_DATASET_REPO", "")
85
+ if HF_REPO_ID:
 
 
 
 
 
 
 
 
 
86
  print(f"[SYNC] Using OPENCLAW_DATASET_REPO: {HF_REPO_ID}")
87
+ elif SPACE_ID:
88
+ HF_REPO_ID = f"{SPACE_ID}-data"
89
+ print(f"[SYNC] OPENCLAW_DATASET_REPO not set — auto-derived from SPACE_ID: {HF_REPO_ID}")
90
  elif HF_TOKEN:
 
91
  try:
92
  _api = HfApi(token=HF_TOKEN)
93
  _username = _api.whoami()["name"]
94
  HF_REPO_ID = f"{_username}/HuggingClaw-data"
95
+ print(f"[SYNC] OPENCLAW_DATASET_REPO not set — auto-derived from HF_TOKEN: {HF_REPO_ID}")
96
  del _api, _username
97
  except Exception as e:
98
  print(f"[SYNC] WARNING: Could not derive username from HF_TOKEN: {e}")
99
  HF_REPO_ID = ""
 
 
 
100
 
101
  # Setup logging
102
  log_dir = OPENCLAW_HOME / "workspace"