tao-shen commited on
Commit
972704a
·
1 Parent(s): 6e0fe23

feat: Migrate dataset backups to gzipped tar archives and ensure backward compatibility during restore and rotation.

Browse files
scripts/restore_from_dataset.py CHANGED
@@ -28,8 +28,8 @@ def main() -> None:
28
  api = HfApi(token=token)
29
  files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
30
 
31
- # Filter for our backup pattern
32
- backups = sorted([f for f in files if f.startswith("state/backup-") and f.endswith(".tar")], reverse=True)
33
 
34
  if not backups:
35
  # Fallback to legacy filename if no rolling backups exist
@@ -51,7 +51,8 @@ def main() -> None:
51
  token=token,
52
  )
53
 
54
- with tarfile.open(tar_path, "r") as tf:
 
55
  tf.extractall(state_dir)
56
 
57
  print(f"[restore_from_dataset] Successfully restored from {backup_file}")
 
28
  api = HfApi(token=token)
29
  files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
30
 
31
+ # Filter for our backup pattern (support both .tar and .tar.gz)
32
+ backups = sorted([f for f in files if f.startswith("state/backup-") and (f.endswith(".tar") or f.endswith(".tar.gz"))], reverse=True)
33
 
34
  if not backups:
35
  # Fallback to legacy filename if no rolling backups exist
 
51
  token=token,
52
  )
53
 
54
+ # Auto-detect compression based on file extension or header (r:*)
55
+ with tarfile.open(tar_path, "r:*") as tf:
56
  tf.extractall(state_dir)
57
 
58
  print(f"[restore_from_dataset] Successfully restored from {backup_file}")
scripts/save_to_dataset.py CHANGED
@@ -65,13 +65,13 @@ def main() -> None:
65
 
66
  # Generate timestamped filename
67
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
68
- filename = f"state/backup-{timestamp}.tar"
69
 
70
  with tempfile.TemporaryDirectory() as tmpdir:
71
- tar_path = os.path.join(tmpdir, "openclaw.tar")
72
 
73
  try:
74
- with tarfile.open(tar_path, "w") as tf:
75
  # Filter to exclude lock files or temp files if needed, but allow extensions
76
  def exclude_filter(info: tarfile.TarInfo) -> tarfile.TarInfo | None:
77
  if info.name.endswith(".lock"):
@@ -98,7 +98,8 @@ def main() -> None:
98
  # 2. Rotation: Delete old backups, keep last 5
99
  try:
100
  files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
101
- backups = sorted([f for f in files if f.startswith("state/backup-") and f.endswith(".tar")])
 
102
 
103
  if len(backups) > 5:
104
  # Delete oldest
 
65
 
66
  # Generate timestamped filename
67
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
68
+ filename = f"state/backup-{timestamp}.tar.gz"
69
 
70
  with tempfile.TemporaryDirectory() as tmpdir:
71
+ tar_path = os.path.join(tmpdir, "openclaw.tar.gz")
72
 
73
  try:
74
+ with tarfile.open(tar_path, "w:gz") as tf:
75
  # Filter to exclude lock files or temp files if needed, but allow extensions
76
  def exclude_filter(info: tarfile.TarInfo) -> tarfile.TarInfo | None:
77
  if info.name.endswith(".lock"):
 
98
  # 2. Rotation: Delete old backups, keep last 5
99
  try:
100
  files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
101
+ # Match both .tar and .tar.gz for backward compatibility during transition
102
+ backups = sorted([f for f in files if f.startswith("state/backup-") and (f.endswith(".tar") or f.endswith(".tar.gz"))])
103
 
104
  if len(backups) > 5:
105
  # Delete oldest