tao-shen commited on
Commit
dae020b
·
1 Parent(s): 171726e

fix: exclude credentials dir, sanitize ALL json files recursively to fix HF 400 error

Browse files
Files changed (1) hide show
  1. scripts/sync_hf.py +50 -6
scripts/sync_hf.py CHANGED
@@ -183,10 +183,8 @@ class OpenClawFullSync:
183
  # Copy the directory, skip symlinks and excluded patterns
184
  self._copy_for_upload(OPENCLAW_HOME, staging_path)
185
 
186
- # Sanitize openclaw.json in the staging copy
187
- config_staged = staging_path / "openclaw.json"
188
- if config_staged.exists():
189
- self._sanitize_config_file(config_staged)
190
 
191
  # Log staging dir size
192
  total_size = 0
@@ -200,6 +198,10 @@ class OpenClawFullSync:
200
  print(f"[SYNC] staging: {os.path.relpath(fp, staging_path)} ({sz} bytes)")
201
  print(f"[SYNC] Staging: {file_count} files, {total_size} bytes total")
202
 
 
 
 
 
203
  # Upload the sanitized staging directory
204
  self.api.upload_folder(
205
  folder_path=str(staging_path),
@@ -234,8 +236,9 @@ class OpenClawFullSync:
234
  def _copy_for_upload(self, src: Path, dst: Path):
235
  """Copy directory for upload, skipping symlinks and excluded items."""
236
  skip_names = {".git", "node_modules", "__pycache__", ".cache",
237
- "extensions", ".DS_Store", "logs", "temp", "tmp"}
238
- skip_exts = {".lock", ".tmp", ".socket", ".pid", ".pyc", ".log"}
 
239
  # Skip files larger than 10MB to avoid timeout
240
  MAX_FILE_SIZE = 10 * 1024 * 1024
241
 
@@ -260,6 +263,47 @@ class OpenClawFullSync:
260
  elif item.is_dir():
261
  self._copy_for_upload(item, dst / item.name)
262
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  def _sanitize_config_file(self, config_path: Path):
264
  """Sanitize openclaw.json to remove secrets before upload."""
265
  try:
 
183
  # Copy the directory, skip symlinks and excluded patterns
184
  self._copy_for_upload(OPENCLAW_HOME, staging_path)
185
 
186
+ # Sanitize ALL JSON files to redact tokens/keys/secrets
187
+ self._sanitize_all_json(staging_path)
 
 
188
 
189
  # Log staging dir size
190
  total_size = 0
 
198
  print(f"[SYNC] staging: {os.path.relpath(fp, staging_path)} ({sz} bytes)")
199
  print(f"[SYNC] Staging: {file_count} files, {total_size} bytes total")
200
 
201
+ if file_count == 0:
202
+ print("[SYNC] Nothing to upload (staging is empty).")
203
+ return
204
+
205
  # Upload the sanitized staging directory
206
  self.api.upload_folder(
207
  folder_path=str(staging_path),
 
236
  def _copy_for_upload(self, src: Path, dst: Path):
237
  """Copy directory for upload, skipping symlinks and excluded items."""
238
  skip_names = {".git", "node_modules", "__pycache__", ".cache",
239
+ "extensions", ".DS_Store", "logs", "temp", "tmp",
240
+ "credentials"} # credentials are injected from env at runtime
241
+ skip_exts = {".lock", ".tmp", ".socket", ".pid", ".pyc", ".log", ".bak"}
242
  # Skip files larger than 10MB to avoid timeout
243
  MAX_FILE_SIZE = 10 * 1024 * 1024
244
 
 
263
  elif item.is_dir():
264
  self._copy_for_upload(item, dst / item.name)
265
 
266
+ def _sanitize_all_json(self, staging_path: Path):
267
+ """Sanitize ALL JSON files to redact tokens, keys, secrets."""
268
+ SECRET_KEYS = {"apiKey", "apikey", "api_key", "token", "secret",
269
+ "password", "access_token", "bot_token"}
270
+ count = 0
271
+ for json_file in staging_path.rglob("*.json"):
272
+ try:
273
+ with open(json_file, "r") as f:
274
+ data = json.load(f)
275
+ changed = self._redact_secrets(data, SECRET_KEYS)
276
+ if changed:
277
+ with open(json_file, "w") as f:
278
+ json.dump(data, f, indent=2)
279
+ count += 1
280
+ except Exception:
281
+ pass
282
+ if count:
283
+ print(f"[SYNC] Sanitized {count} JSON files (redacted secrets)")
284
+
285
+ def _redact_secrets(self, obj, secret_keys, depth=0):
286
+ """Recursively redact values of secret-looking keys."""
287
+ if depth > 10:
288
+ return False
289
+ changed = False
290
+ if isinstance(obj, dict):
291
+ for key in obj:
292
+ if key.lower() in {k.lower() for k in secret_keys}:
293
+ val = obj[key]
294
+ if isinstance(val, str) and len(val) > 3 and not val.startswith("<"):
295
+ obj[key] = "<REDACTED>"
296
+ changed = True
297
+ elif isinstance(obj[key], (dict, list)):
298
+ if self._redact_secrets(obj[key], secret_keys, depth + 1):
299
+ changed = True
300
+ elif isinstance(obj, list):
301
+ for item in obj:
302
+ if isinstance(item, (dict, list)):
303
+ if self._redact_secrets(item, secret_keys, depth + 1):
304
+ changed = True
305
+ return changed
306
+
307
  def _sanitize_config_file(self, config_path: Path):
308
  """Sanitize openclaw.json to remove secrets before upload."""
309
  try: