Spaces:
Running
Running
Claude commited on
Commit ·
caee4c3
1
Parent(s): 055138d
feat: refactor backup with configurable intervals and retention
Browse files- Fix env var: HF_BACKUP_REPO -> HF_DATASET
- Add WebDAV env vars: WEBDAV_URL, WEBDAV_USERNAME, WEBDAV_PASSWORD
- Add interval config: BACKUP_WEBDAV_INTERVAL (default 1440min/24h),
BACKUP_HF_INTERVAL (default 10min)
- WebDAV: timestamped files, keep max 10, delete oldest
- HF Dataset: fixed filename, auto-overwrite
- Scheduler uses configurable intervals
- backup-manager.py +57 -22
backup-manager.py
CHANGED
|
@@ -91,6 +91,15 @@ class WebDAVClient:
|
|
| 91 |
except:
|
| 92 |
return False
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
# G2.2/G2.3: 备份管理器
|
| 96 |
class BackupManager:
|
|
@@ -106,10 +115,12 @@ class BackupManager:
|
|
| 106 |
self.webdav_user = os.environ.get("WEBDAV_USERNAME", "")
|
| 107 |
self.webdav_pass = os.environ.get("WEBDAV_PASSWORD", "")
|
| 108 |
self.webdav_path = os.environ.get("WEBDAV_BACKUP_PATH", "/openclaw-backups")
|
| 109 |
-
|
|
|
|
| 110 |
# HF Dataset 配置
|
| 111 |
self.hf_token = os.environ.get("HF_TOKEN", "")
|
| 112 |
-
self.hf_repo = os.environ.get("
|
|
|
|
| 113 |
|
| 114 |
self.webdav: Optional[WebDAVClient] = None
|
| 115 |
self.hf_api: Optional[HfApi] = None
|
|
@@ -207,21 +218,33 @@ class BackupManager:
|
|
| 207 |
else:
|
| 208 |
print(f"[Backup] WebDAV upload failed")
|
| 209 |
success = False
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
# 备份到 HF Dataset
|
| 212 |
if self.hf_api and self.hf_repo:
|
| 213 |
-
print(f"[Backup] Uploading to HF Dataset:
|
| 214 |
try:
|
| 215 |
# 确保repo存在
|
| 216 |
try:
|
| 217 |
self.hf_api.repo_info(repo_id=self.hf_repo, repo_type="dataset")
|
| 218 |
except:
|
| 219 |
create_repo(self.hf_repo, repo_type="dataset", token=self.hf_token, private=True)
|
| 220 |
-
|
| 221 |
# 上传文件
|
| 222 |
upload_file(
|
| 223 |
path_or_fileobj=io.BytesIO(archive_data),
|
| 224 |
-
path_in_repo=
|
| 225 |
repo_id=self.hf_repo,
|
| 226 |
repo_type="dataset",
|
| 227 |
token=self.hf_token
|
|
@@ -291,13 +314,25 @@ class BackupManager:
|
|
| 291 |
print(f"[Backup] WebDAV incremental upload successful")
|
| 292 |
else:
|
| 293 |
success = False
|
| 294 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
if self.hf_api and self.hf_repo:
|
| 296 |
-
print(f"[Backup] Uploading incremental to HF Dataset:
|
| 297 |
try:
|
| 298 |
upload_file(
|
| 299 |
path_or_fileobj=io.BytesIO(archive_data),
|
| 300 |
-
path_in_repo=
|
| 301 |
repo_id=self.hf_repo,
|
| 302 |
repo_type="dataset",
|
| 303 |
token=self.hf_token
|
|
@@ -369,7 +404,7 @@ class BackupManager:
|
|
| 369 |
try:
|
| 370 |
# 列出repo中的文件
|
| 371 |
files = self.hf_api.list_repo_files(self.hf_repo, repo_type="dataset")
|
| 372 |
-
backups = [f for f in files if f.startswith("
|
| 373 |
|
| 374 |
if not backups:
|
| 375 |
return False
|
|
@@ -430,7 +465,7 @@ class BackupManager:
|
|
| 430 |
"""应用 HF Dataset 增量备份"""
|
| 431 |
try:
|
| 432 |
files = self.hf_api.list_repo_files(self.hf_repo, repo_type="dataset")
|
| 433 |
-
incr_backups = sorted([f for f in files if f.startswith("
|
| 434 |
|
| 435 |
for backup_name in incr_backups:
|
| 436 |
print(f"[Backup] Applying incremental: {backup_name}")
|
|
@@ -449,17 +484,17 @@ class BackupManager:
|
|
| 449 |
|
| 450 |
# G2.5: 调度器
|
| 451 |
def scheduler(self):
|
| 452 |
-
"""后台调度器 -
|
| 453 |
-
print("[Backup] Starting
|
| 454 |
-
|
| 455 |
-
#
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
while True:
|
| 464 |
schedule.run_pending()
|
| 465 |
time.sleep(60)
|
|
|
|
| 91 |
except:
|
| 92 |
return False
|
| 93 |
|
| 94 |
+
def delete(self, path: str) -> bool:
|
| 95 |
+
"""Delete a file"""
|
| 96 |
+
try:
|
| 97 |
+
resp = self.session.delete(self._url(path))
|
| 98 |
+
return resp.status_code in [200, 204, 404]
|
| 99 |
+
except Exception as e:
|
| 100 |
+
print(f"[WebDAV] DELETE error: {e}")
|
| 101 |
+
return False
|
| 102 |
+
|
| 103 |
|
| 104 |
# G2.2/G2.3: 备份管理器
|
| 105 |
class BackupManager:
|
|
|
|
| 115 |
self.webdav_user = os.environ.get("WEBDAV_USERNAME", "")
|
| 116 |
self.webdav_pass = os.environ.get("WEBDAV_PASSWORD", "")
|
| 117 |
self.webdav_path = os.environ.get("WEBDAV_BACKUP_PATH", "/openclaw-backups")
|
| 118 |
+
self.webdav_interval = int(os.environ.get("BACKUP_WEBDAV_INTERVAL", "1440")) # minutes, default 24h
|
| 119 |
+
|
| 120 |
# HF Dataset 配置
|
| 121 |
self.hf_token = os.environ.get("HF_TOKEN", "")
|
| 122 |
+
self.hf_repo = os.environ.get("HF_DATASET", "")
|
| 123 |
+
self.hf_interval = int(os.environ.get("BACKUP_HF_INTERVAL", "10")) # minutes, default 10min
|
| 124 |
|
| 125 |
self.webdav: Optional[WebDAVClient] = None
|
| 126 |
self.hf_api: Optional[HfApi] = None
|
|
|
|
| 218 |
else:
|
| 219 |
print(f"[Backup] WebDAV upload failed")
|
| 220 |
success = False
|
| 221 |
+
|
| 222 |
+
# Keep only latest 10 WebDAV backups
|
| 223 |
+
try:
|
| 224 |
+
items = self.webdav.propfind(self.webdav_path)
|
| 225 |
+
full_backups = sorted([i for i in items if "openclaw_full_" in i.get("href", "")])
|
| 226 |
+
while len(full_backups) > 10:
|
| 227 |
+
oldest = full_backups.pop(0)
|
| 228 |
+
oldest_name = oldest.split("/")[-1]
|
| 229 |
+
self.webdav.delete(f"{self.webdav_path}/{oldest_name}")
|
| 230 |
+
print(f"[Backup] Cleaned old WebDAV backup: {oldest_name}")
|
| 231 |
+
except Exception as e:
|
| 232 |
+
print(f"[Backup] WebDAV cleanup error: {e}")
|
| 233 |
+
|
| 234 |
# 备份到 HF Dataset
|
| 235 |
if self.hf_api and self.hf_repo:
|
| 236 |
+
print(f"[Backup] Uploading to HF Dataset: openclaw_full_latest.tar.gz")
|
| 237 |
try:
|
| 238 |
# 确保repo存在
|
| 239 |
try:
|
| 240 |
self.hf_api.repo_info(repo_id=self.hf_repo, repo_type="dataset")
|
| 241 |
except:
|
| 242 |
create_repo(self.hf_repo, repo_type="dataset", token=self.hf_token, private=True)
|
| 243 |
+
|
| 244 |
# 上传文件
|
| 245 |
upload_file(
|
| 246 |
path_or_fileobj=io.BytesIO(archive_data),
|
| 247 |
+
path_in_repo="openclaw_full_latest.tar.gz",
|
| 248 |
repo_id=self.hf_repo,
|
| 249 |
repo_type="dataset",
|
| 250 |
token=self.hf_token
|
|
|
|
| 314 |
print(f"[Backup] WebDAV incremental upload successful")
|
| 315 |
else:
|
| 316 |
success = False
|
| 317 |
+
|
| 318 |
+
# Keep only latest 10 WebDAV incremental backups
|
| 319 |
+
try:
|
| 320 |
+
items = self.webdav.propfind(self.webdav_path)
|
| 321 |
+
incr_backups = sorted([i for i in items if "openclaw_incr_" in i.get("href", "")])
|
| 322 |
+
while len(incr_backups) > 10:
|
| 323 |
+
oldest = incr_backups.pop(0)
|
| 324 |
+
oldest_name = oldest.split("/")[-1]
|
| 325 |
+
self.webdav.delete(f"{self.webdav_path}/{oldest_name}")
|
| 326 |
+
print(f"[Backup] Cleaned old WebDAV incremental backup: {oldest_name}")
|
| 327 |
+
except Exception as e:
|
| 328 |
+
print(f"[Backup] WebDAV cleanup error: {e}")
|
| 329 |
+
|
| 330 |
if self.hf_api and self.hf_repo:
|
| 331 |
+
print(f"[Backup] Uploading incremental to HF Dataset: openclaw_incr_latest.tar.gz")
|
| 332 |
try:
|
| 333 |
upload_file(
|
| 334 |
path_or_fileobj=io.BytesIO(archive_data),
|
| 335 |
+
path_in_repo="openclaw_incr_latest.tar.gz",
|
| 336 |
repo_id=self.hf_repo,
|
| 337 |
repo_type="dataset",
|
| 338 |
token=self.hf_token
|
|
|
|
| 404 |
try:
|
| 405 |
# 列出repo中的文件
|
| 406 |
files = self.hf_api.list_repo_files(self.hf_repo, repo_type="dataset")
|
| 407 |
+
backups = [f for f in files if f.startswith("openclaw_full_latest")]
|
| 408 |
|
| 409 |
if not backups:
|
| 410 |
return False
|
|
|
|
| 465 |
"""应用 HF Dataset 增量备份"""
|
| 466 |
try:
|
| 467 |
files = self.hf_api.list_repo_files(self.hf_repo, repo_type="dataset")
|
| 468 |
+
incr_backups = sorted([f for f in files if f.startswith("openclaw_incr_latest")])
|
| 469 |
|
| 470 |
for backup_name in incr_backups:
|
| 471 |
print(f"[Backup] Applying incremental: {backup_name}")
|
|
|
|
| 484 |
|
| 485 |
# G2.5: 调度器
|
| 486 |
def scheduler(self):
|
| 487 |
+
"""后台调度器 - 按配置间隔执行备份"""
|
| 488 |
+
print(f"[Backup] Starting scheduler: WebDAV every {self.webdav_interval}min, HF every {self.hf_interval}min")
|
| 489 |
+
|
| 490 |
+
# WebDAV: full backup at configured interval
|
| 491 |
+
if self.webdav:
|
| 492 |
+
schedule.every(self.webdav_interval).minutes.do(self.full_backup)
|
| 493 |
+
|
| 494 |
+
# HF Dataset: incremental at configured interval
|
| 495 |
+
if self.hf_api and self.hf_repo:
|
| 496 |
+
schedule.every(self.hf_interval).minutes.do(self.incremental_backup)
|
| 497 |
+
|
| 498 |
while True:
|
| 499 |
schedule.run_pending()
|
| 500 |
time.sleep(60)
|