Spaces:
Running
Running
feat(sync): skip backup when no state changes detected
Browse filesAdd pg_stat_database activity marker + PAPERCLIP_HOME filesystem
marker. State persisted to /tmp; backup skipped when both match the
last successful upload's snapshot.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- paperclip-sync.py +87 -0
paperclip-sync.py
CHANGED
|
@@ -49,6 +49,7 @@ SYNC_MAX_FILE_BYTES = int(os.environ.get('SYNC_MAX_FILE_BYTES', '52428800')) #
|
|
| 49 |
PAPERCLIP_HOME = os.environ.get('PAPERCLIP_HOME', '/paperclip')
|
| 50 |
# Status file for dashboard
|
| 51 |
STATUS_FILE = Path('/tmp/sync-status.json')
|
|
|
|
| 52 |
|
| 53 |
# ============================================================================
|
| 54 |
# Helper Functions
|
|
@@ -390,6 +391,81 @@ def sync_from_hf() -> bool:
|
|
| 390 |
logger.error(f'Failed to restore from HF: {e}')
|
| 391 |
return False
|
| 392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
# ============================================================================
|
| 394 |
# Main Sync Operations
|
| 395 |
# ============================================================================
|
|
@@ -398,6 +474,16 @@ def sync_to_backup() -> bool:
|
|
| 398 |
"""Full backup operation: dump DB β create tarball β upload to HF"""
|
| 399 |
logger.info('Syncing backup to HF Dataset...')
|
| 400 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
status = read_status()
|
| 402 |
|
| 403 |
try:
|
|
@@ -430,6 +516,7 @@ def sync_to_backup() -> bool:
|
|
| 430 |
|
| 431 |
if success:
|
| 432 |
logger.info('Backup synced OK')
|
|
|
|
| 433 |
else:
|
| 434 |
logger.warning('Backup sync failed')
|
| 435 |
|
|
|
|
| 49 |
PAPERCLIP_HOME = os.environ.get('PAPERCLIP_HOME', '/paperclip')
|
| 50 |
# Status file for dashboard
|
| 51 |
STATUS_FILE = Path('/tmp/sync-status.json')
|
| 52 |
+
STATE_FILE = Path('/tmp/huggingclip-sync-state.json')
|
| 53 |
|
| 54 |
# ============================================================================
|
| 55 |
# Helper Functions
|
|
|
|
| 391 |
logger.error(f'Failed to restore from HF: {e}')
|
| 392 |
return False
|
| 393 |
|
| 394 |
+
# ============================================================================
|
| 395 |
+
# Change detection helpers
|
| 396 |
+
# ============================================================================
|
| 397 |
+
|
| 398 |
+
def _get_db_marker() -> int:
|
| 399 |
+
"""Return cumulative DB activity count from pg_stat_database. -1 on error."""
|
| 400 |
+
db = parse_db_url(DATABASE_URL)
|
| 401 |
+
if not db:
|
| 402 |
+
return -1
|
| 403 |
+
try:
|
| 404 |
+
env = os.environ.copy()
|
| 405 |
+
if db['password']:
|
| 406 |
+
env['PGPASSWORD'] = db['password']
|
| 407 |
+
db_name = db['database']
|
| 408 |
+
result = subprocess.run(
|
| 409 |
+
[
|
| 410 |
+
'psql',
|
| 411 |
+
f'--host={db["host"]}',
|
| 412 |
+
f'--port={db["port"]}',
|
| 413 |
+
f'--username={db["user"]}',
|
| 414 |
+
'--no-password', '--tuples-only', '--no-align',
|
| 415 |
+
'-c',
|
| 416 |
+
f"SELECT xact_commit + xact_rollback + tup_inserted + tup_updated + tup_deleted "
|
| 417 |
+
f"FROM pg_stat_database WHERE datname = '{db_name}'",
|
| 418 |
+
],
|
| 419 |
+
env=env, capture_output=True, text=True, timeout=10,
|
| 420 |
+
)
|
| 421 |
+
if result.returncode == 0 and result.stdout.strip():
|
| 422 |
+
return int(result.stdout.strip())
|
| 423 |
+
except Exception:
|
| 424 |
+
pass
|
| 425 |
+
return -1
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
def _fs_marker(root: str) -> tuple[int, int, int]:
|
| 429 |
+
p = Path(root)
|
| 430 |
+
if not p.exists():
|
| 431 |
+
return (0, 0, 0)
|
| 432 |
+
fc = ts = nm = 0
|
| 433 |
+
for path in p.rglob("*"):
|
| 434 |
+
if not path.is_file():
|
| 435 |
+
continue
|
| 436 |
+
try:
|
| 437 |
+
st = path.stat()
|
| 438 |
+
fc += 1
|
| 439 |
+
ts += int(st.st_size)
|
| 440 |
+
nm = max(nm, int(st.st_mtime_ns))
|
| 441 |
+
except OSError:
|
| 442 |
+
continue
|
| 443 |
+
return (fc, ts, nm)
|
| 444 |
+
|
| 445 |
+
|
| 446 |
+
def _current_marker() -> tuple:
|
| 447 |
+
return (_get_db_marker(),) + _fs_marker(PAPERCLIP_HOME)
|
| 448 |
+
|
| 449 |
+
|
| 450 |
+
def _load_sync_state():
|
| 451 |
+
try:
|
| 452 |
+
if STATE_FILE.exists():
|
| 453 |
+
d = json.loads(STATE_FILE.read_text())
|
| 454 |
+
m = d.get('marker')
|
| 455 |
+
if m and len(m) == 4:
|
| 456 |
+
return tuple(m)
|
| 457 |
+
except Exception:
|
| 458 |
+
pass
|
| 459 |
+
return None
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
def _save_sync_state(marker: tuple) -> None:
|
| 463 |
+
try:
|
| 464 |
+
STATE_FILE.write_text(json.dumps({'marker': list(marker)}))
|
| 465 |
+
except Exception as e:
|
| 466 |
+
logger.debug(f'Could not save sync state: {e}')
|
| 467 |
+
|
| 468 |
+
|
| 469 |
# ============================================================================
|
| 470 |
# Main Sync Operations
|
| 471 |
# ============================================================================
|
|
|
|
| 474 |
"""Full backup operation: dump DB β create tarball β upload to HF"""
|
| 475 |
logger.info('Syncing backup to HF Dataset...')
|
| 476 |
|
| 477 |
+
last_marker = _load_sync_state()
|
| 478 |
+
current_marker = _current_marker()
|
| 479 |
+
if last_marker is not None and current_marker == last_marker:
|
| 480 |
+
status = read_status()
|
| 481 |
+
status['status'] = 'synced'
|
| 482 |
+
status['message'] = 'No state changes detected.'
|
| 483 |
+
write_status(status)
|
| 484 |
+
logger.info('No state changes detected β skipping backup.')
|
| 485 |
+
return True
|
| 486 |
+
|
| 487 |
status = read_status()
|
| 488 |
|
| 489 |
try:
|
|
|
|
| 516 |
|
| 517 |
if success:
|
| 518 |
logger.info('Backup synced OK')
|
| 519 |
+
_save_sync_state(current_marker)
|
| 520 |
else:
|
| 521 |
logger.warning('Backup sync failed')
|
| 522 |
|