Claude commited on
Commit
caee4c3
·
1 Parent(s): 055138d

feat: refactor backup with configurable intervals and retention

Browse files

- Fix env var: HF_BACKUP_REPO -> HF_DATASET
- Add WebDAV env vars: WEBDAV_URL, WEBDAV_USERNAME, WEBDAV_PASSWORD
- Add interval config: BACKUP_WEBDAV_INTERVAL (default 1440min/24h),
BACKUP_HF_INTERVAL (default 10min)
- WebDAV: timestamped files, keep max 10, delete oldest
- HF Dataset: fixed filename, auto-overwrite
- Scheduler uses configurable intervals

Files changed (1) hide show
  1. backup-manager.py +57 -22
backup-manager.py CHANGED
@@ -91,6 +91,15 @@ class WebDAVClient:
91
  except:
92
  return False
93
 
 
 
 
 
 
 
 
 
 
94
 
95
  # G2.2/G2.3: 备份管理器
96
  class BackupManager:
@@ -106,10 +115,12 @@ class BackupManager:
106
  self.webdav_user = os.environ.get("WEBDAV_USERNAME", "")
107
  self.webdav_pass = os.environ.get("WEBDAV_PASSWORD", "")
108
  self.webdav_path = os.environ.get("WEBDAV_BACKUP_PATH", "/openclaw-backups")
109
-
 
110
  # HF Dataset 配置
111
  self.hf_token = os.environ.get("HF_TOKEN", "")
112
- self.hf_repo = os.environ.get("HF_BACKUP_REPO", "")
 
113
 
114
  self.webdav: Optional[WebDAVClient] = None
115
  self.hf_api: Optional[HfApi] = None
@@ -207,21 +218,33 @@ class BackupManager:
207
  else:
208
  print(f"[Backup] WebDAV upload failed")
209
  success = False
210
-
 
 
 
 
 
 
 
 
 
 
 
 
211
  # 备份到 HF Dataset
212
  if self.hf_api and self.hf_repo:
213
- print(f"[Backup] Uploading to HF Dataset: {archive_name}")
214
  try:
215
  # 确保repo存在
216
  try:
217
  self.hf_api.repo_info(repo_id=self.hf_repo, repo_type="dataset")
218
  except:
219
  create_repo(self.hf_repo, repo_type="dataset", token=self.hf_token, private=True)
220
-
221
  # 上传文件
222
  upload_file(
223
  path_or_fileobj=io.BytesIO(archive_data),
224
- path_in_repo=archive_name,
225
  repo_id=self.hf_repo,
226
  repo_type="dataset",
227
  token=self.hf_token
@@ -291,13 +314,25 @@ class BackupManager:
291
  print(f"[Backup] WebDAV incremental upload successful")
292
  else:
293
  success = False
294
-
 
 
 
 
 
 
 
 
 
 
 
 
295
  if self.hf_api and self.hf_repo:
296
- print(f"[Backup] Uploading incremental to HF Dataset: {archive_name}")
297
  try:
298
  upload_file(
299
  path_or_fileobj=io.BytesIO(archive_data),
300
- path_in_repo=archive_name,
301
  repo_id=self.hf_repo,
302
  repo_type="dataset",
303
  token=self.hf_token
@@ -369,7 +404,7 @@ class BackupManager:
369
  try:
370
  # 列出repo中的文件
371
  files = self.hf_api.list_repo_files(self.hf_repo, repo_type="dataset")
372
- backups = [f for f in files if f.startswith("openclaw_full_")]
373
 
374
  if not backups:
375
  return False
@@ -430,7 +465,7 @@ class BackupManager:
430
  """应用 HF Dataset 增量备份"""
431
  try:
432
  files = self.hf_api.list_repo_files(self.hf_repo, repo_type="dataset")
433
- incr_backups = sorted([f for f in files if f.startswith("openclaw_incr_")])
434
 
435
  for backup_name in incr_backups:
436
  print(f"[Backup] Applying incremental: {backup_name}")
@@ -449,17 +484,17 @@ class BackupManager:
449
 
450
  # G2.5: 调度器
451
  def scheduler(self):
452
- """后台调度器 - 每小时增量 + 每24小时全量"""
453
- print("[Backup] Starting backup scheduler...")
454
-
455
- # 每小时增量备份
456
- schedule.every().hour.do(self.incremental_backup)
457
-
458
- # 每24小时全量备份
459
- schedule.every().day.at("02:00").do(self.full_backup)
460
-
461
- print("[Backup] Scheduler started (hourly incr, daily full at 02:00)")
462
-
463
  while True:
464
  schedule.run_pending()
465
  time.sleep(60)
 
91
  except:
92
  return False
93
 
94
+ def delete(self, path: str) -> bool:
95
+ """Delete a file"""
96
+ try:
97
+ resp = self.session.delete(self._url(path))
98
+ return resp.status_code in [200, 204, 404]
99
+ except Exception as e:
100
+ print(f"[WebDAV] DELETE error: {e}")
101
+ return False
102
+
103
 
104
  # G2.2/G2.3: 备份管理器
105
  class BackupManager:
 
115
  self.webdav_user = os.environ.get("WEBDAV_USERNAME", "")
116
  self.webdav_pass = os.environ.get("WEBDAV_PASSWORD", "")
117
  self.webdav_path = os.environ.get("WEBDAV_BACKUP_PATH", "/openclaw-backups")
118
+ self.webdav_interval = int(os.environ.get("BACKUP_WEBDAV_INTERVAL", "1440")) # minutes, default 24h
119
+
120
  # HF Dataset 配置
121
  self.hf_token = os.environ.get("HF_TOKEN", "")
122
+ self.hf_repo = os.environ.get("HF_DATASET", "")
123
+ self.hf_interval = int(os.environ.get("BACKUP_HF_INTERVAL", "10")) # minutes, default 10min
124
 
125
  self.webdav: Optional[WebDAVClient] = None
126
  self.hf_api: Optional[HfApi] = None
 
218
  else:
219
  print(f"[Backup] WebDAV upload failed")
220
  success = False
221
+
222
+ # Keep only latest 10 WebDAV backups
223
+ try:
224
+ items = self.webdav.propfind(self.webdav_path)
225
+ full_backups = sorted([i for i in items if "openclaw_full_" in i.get("href", "")])
226
+ while len(full_backups) > 10:
227
+ oldest = full_backups.pop(0)
228
+ oldest_name = oldest.split("/")[-1]
229
+ self.webdav.delete(f"{self.webdav_path}/{oldest_name}")
230
+ print(f"[Backup] Cleaned old WebDAV backup: {oldest_name}")
231
+ except Exception as e:
232
+ print(f"[Backup] WebDAV cleanup error: {e}")
233
+
234
  # 备份到 HF Dataset
235
  if self.hf_api and self.hf_repo:
236
+ print(f"[Backup] Uploading to HF Dataset: openclaw_full_latest.tar.gz")
237
  try:
238
  # 确保repo存在
239
  try:
240
  self.hf_api.repo_info(repo_id=self.hf_repo, repo_type="dataset")
241
  except:
242
  create_repo(self.hf_repo, repo_type="dataset", token=self.hf_token, private=True)
243
+
244
  # 上传文件
245
  upload_file(
246
  path_or_fileobj=io.BytesIO(archive_data),
247
+ path_in_repo="openclaw_full_latest.tar.gz",
248
  repo_id=self.hf_repo,
249
  repo_type="dataset",
250
  token=self.hf_token
 
314
  print(f"[Backup] WebDAV incremental upload successful")
315
  else:
316
  success = False
317
+
318
+ # Keep only latest 10 WebDAV incremental backups
319
+ try:
320
+ items = self.webdav.propfind(self.webdav_path)
321
+ incr_backups = sorted([i for i in items if "openclaw_incr_" in i.get("href", "")])
322
+ while len(incr_backups) > 10:
323
+ oldest = incr_backups.pop(0)
324
+ oldest_name = oldest.split("/")[-1]
325
+ self.webdav.delete(f"{self.webdav_path}/{oldest_name}")
326
+ print(f"[Backup] Cleaned old WebDAV incremental backup: {oldest_name}")
327
+ except Exception as e:
328
+ print(f"[Backup] WebDAV cleanup error: {e}")
329
+
330
  if self.hf_api and self.hf_repo:
331
+ print(f"[Backup] Uploading incremental to HF Dataset: openclaw_incr_latest.tar.gz")
332
  try:
333
  upload_file(
334
  path_or_fileobj=io.BytesIO(archive_data),
335
+ path_in_repo="openclaw_incr_latest.tar.gz",
336
  repo_id=self.hf_repo,
337
  repo_type="dataset",
338
  token=self.hf_token
 
404
  try:
405
  # 列出repo中的文件
406
  files = self.hf_api.list_repo_files(self.hf_repo, repo_type="dataset")
407
+ backups = [f for f in files if f.startswith("openclaw_full_latest")]
408
 
409
  if not backups:
410
  return False
 
465
  """应用 HF Dataset 增量备份"""
466
  try:
467
  files = self.hf_api.list_repo_files(self.hf_repo, repo_type="dataset")
468
+ incr_backups = sorted([f for f in files if f.startswith("openclaw_incr_latest")])
469
 
470
  for backup_name in incr_backups:
471
  print(f"[Backup] Applying incremental: {backup_name}")
 
484
 
485
  # G2.5: 调度器
486
  def scheduler(self):
487
+ """后台调度器 - 按配置间隔执行备份"""
488
+ print(f"[Backup] Starting scheduler: WebDAV every {self.webdav_interval}min, HF every {self.hf_interval}min")
489
+
490
+ # WebDAV: full backup at configured interval
491
+ if self.webdav:
492
+ schedule.every(self.webdav_interval).minutes.do(self.full_backup)
493
+
494
+ # HF Dataset: incremental at configured interval
495
+ if self.hf_api and self.hf_repo:
496
+ schedule.every(self.hf_interval).minutes.do(self.incremental_backup)
497
+
498
  while True:
499
  schedule.run_pending()
500
  time.sleep(60)