Spaces:
Paused
Paused
fix: exclude credentials dir, sanitize ALL json files recursively to fix HF 400 error
Browse files- scripts/sync_hf.py +50 -6
scripts/sync_hf.py
CHANGED
|
@@ -183,10 +183,8 @@ class OpenClawFullSync:
|
|
| 183 |
# Copy the directory, skip symlinks and excluded patterns
|
| 184 |
self._copy_for_upload(OPENCLAW_HOME, staging_path)
|
| 185 |
|
| 186 |
-
# Sanitize
|
| 187 |
-
|
| 188 |
-
if config_staged.exists():
|
| 189 |
-
self._sanitize_config_file(config_staged)
|
| 190 |
|
| 191 |
# Log staging dir size
|
| 192 |
total_size = 0
|
|
@@ -200,6 +198,10 @@ class OpenClawFullSync:
|
|
| 200 |
print(f"[SYNC] staging: {os.path.relpath(fp, staging_path)} ({sz} bytes)")
|
| 201 |
print(f"[SYNC] Staging: {file_count} files, {total_size} bytes total")
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
# Upload the sanitized staging directory
|
| 204 |
self.api.upload_folder(
|
| 205 |
folder_path=str(staging_path),
|
|
@@ -234,8 +236,9 @@ class OpenClawFullSync:
|
|
| 234 |
def _copy_for_upload(self, src: Path, dst: Path):
|
| 235 |
"""Copy directory for upload, skipping symlinks and excluded items."""
|
| 236 |
skip_names = {".git", "node_modules", "__pycache__", ".cache",
|
| 237 |
-
"extensions", ".DS_Store", "logs", "temp", "tmp"
|
| 238 |
-
|
|
|
|
| 239 |
# Skip files larger than 10MB to avoid timeout
|
| 240 |
MAX_FILE_SIZE = 10 * 1024 * 1024
|
| 241 |
|
|
@@ -260,6 +263,47 @@ class OpenClawFullSync:
|
|
| 260 |
elif item.is_dir():
|
| 261 |
self._copy_for_upload(item, dst / item.name)
|
| 262 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
def _sanitize_config_file(self, config_path: Path):
|
| 264 |
"""Sanitize openclaw.json to remove secrets before upload."""
|
| 265 |
try:
|
|
|
|
| 183 |
# Copy the directory, skip symlinks and excluded patterns
|
| 184 |
self._copy_for_upload(OPENCLAW_HOME, staging_path)
|
| 185 |
|
| 186 |
+
# Sanitize ALL JSON files to redact tokens/keys/secrets
|
| 187 |
+
self._sanitize_all_json(staging_path)
|
|
|
|
|
|
|
| 188 |
|
| 189 |
# Log staging dir size
|
| 190 |
total_size = 0
|
|
|
|
| 198 |
print(f"[SYNC] staging: {os.path.relpath(fp, staging_path)} ({sz} bytes)")
|
| 199 |
print(f"[SYNC] Staging: {file_count} files, {total_size} bytes total")
|
| 200 |
|
| 201 |
+
if file_count == 0:
|
| 202 |
+
print("[SYNC] Nothing to upload (staging is empty).")
|
| 203 |
+
return
|
| 204 |
+
|
| 205 |
# Upload the sanitized staging directory
|
| 206 |
self.api.upload_folder(
|
| 207 |
folder_path=str(staging_path),
|
|
|
|
| 236 |
def _copy_for_upload(self, src: Path, dst: Path):
|
| 237 |
"""Copy directory for upload, skipping symlinks and excluded items."""
|
| 238 |
skip_names = {".git", "node_modules", "__pycache__", ".cache",
|
| 239 |
+
"extensions", ".DS_Store", "logs", "temp", "tmp",
|
| 240 |
+
"credentials"} # credentials are injected from env at runtime
|
| 241 |
+
skip_exts = {".lock", ".tmp", ".socket", ".pid", ".pyc", ".log", ".bak"}
|
| 242 |
# Skip files larger than 10MB to avoid timeout
|
| 243 |
MAX_FILE_SIZE = 10 * 1024 * 1024
|
| 244 |
|
|
|
|
| 263 |
elif item.is_dir():
|
| 264 |
self._copy_for_upload(item, dst / item.name)
|
| 265 |
|
| 266 |
+
def _sanitize_all_json(self, staging_path: Path):
|
| 267 |
+
"""Sanitize ALL JSON files to redact tokens, keys, secrets."""
|
| 268 |
+
SECRET_KEYS = {"apiKey", "apikey", "api_key", "token", "secret",
|
| 269 |
+
"password", "access_token", "bot_token"}
|
| 270 |
+
count = 0
|
| 271 |
+
for json_file in staging_path.rglob("*.json"):
|
| 272 |
+
try:
|
| 273 |
+
with open(json_file, "r") as f:
|
| 274 |
+
data = json.load(f)
|
| 275 |
+
changed = self._redact_secrets(data, SECRET_KEYS)
|
| 276 |
+
if changed:
|
| 277 |
+
with open(json_file, "w") as f:
|
| 278 |
+
json.dump(data, f, indent=2)
|
| 279 |
+
count += 1
|
| 280 |
+
except Exception:
|
| 281 |
+
pass
|
| 282 |
+
if count:
|
| 283 |
+
print(f"[SYNC] Sanitized {count} JSON files (redacted secrets)")
|
| 284 |
+
|
| 285 |
+
def _redact_secrets(self, obj, secret_keys, depth=0):
|
| 286 |
+
"""Recursively redact values of secret-looking keys."""
|
| 287 |
+
if depth > 10:
|
| 288 |
+
return False
|
| 289 |
+
changed = False
|
| 290 |
+
if isinstance(obj, dict):
|
| 291 |
+
for key in obj:
|
| 292 |
+
if key.lower() in {k.lower() for k in secret_keys}:
|
| 293 |
+
val = obj[key]
|
| 294 |
+
if isinstance(val, str) and len(val) > 3 and not val.startswith("<"):
|
| 295 |
+
obj[key] = "<REDACTED>"
|
| 296 |
+
changed = True
|
| 297 |
+
elif isinstance(obj[key], (dict, list)):
|
| 298 |
+
if self._redact_secrets(obj[key], secret_keys, depth + 1):
|
| 299 |
+
changed = True
|
| 300 |
+
elif isinstance(obj, list):
|
| 301 |
+
for item in obj:
|
| 302 |
+
if isinstance(item, (dict, list)):
|
| 303 |
+
if self._redact_secrets(item, secret_keys, depth + 1):
|
| 304 |
+
changed = True
|
| 305 |
+
return changed
|
| 306 |
+
|
| 307 |
def _sanitize_config_file(self, config_path: Path):
|
| 308 |
"""Sanitize openclaw.json to remove secrets before upload."""
|
| 309 |
try:
|