somratpro Claude Sonnet 4.6 commited on
Commit
fd8bbbe
Β·
1 Parent(s): c996923

feat(sync): skip backup when no state changes detected

Browse files

Add pg_stat_database activity marker + PAPERCLIP_HOME filesystem
marker. State persisted to /tmp; backup skipped when both match the
last successful upload's snapshot.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. paperclip-sync.py +87 -0
paperclip-sync.py CHANGED
@@ -49,6 +49,7 @@ SYNC_MAX_FILE_BYTES = int(os.environ.get('SYNC_MAX_FILE_BYTES', '52428800')) #
49
  PAPERCLIP_HOME = os.environ.get('PAPERCLIP_HOME', '/paperclip')
50
  # Status file for dashboard
51
  STATUS_FILE = Path('/tmp/sync-status.json')
 
52
 
53
  # ============================================================================
54
  # Helper Functions
@@ -390,6 +391,81 @@ def sync_from_hf() -> bool:
390
  logger.error(f'Failed to restore from HF: {e}')
391
  return False
392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  # ============================================================================
394
  # Main Sync Operations
395
  # ============================================================================
@@ -398,6 +474,16 @@ def sync_to_backup() -> bool:
398
  """Full backup operation: dump DB β†’ create tarball β†’ upload to HF"""
399
  logger.info('Syncing backup to HF Dataset...')
400
 
 
 
 
 
 
 
 
 
 
 
401
  status = read_status()
402
 
403
  try:
@@ -430,6 +516,7 @@ def sync_to_backup() -> bool:
430
 
431
  if success:
432
  logger.info('Backup synced OK')
 
433
  else:
434
  logger.warning('Backup sync failed')
435
 
 
49
  PAPERCLIP_HOME = os.environ.get('PAPERCLIP_HOME', '/paperclip')
50
  # Status file for dashboard
51
  STATUS_FILE = Path('/tmp/sync-status.json')
52
+ STATE_FILE = Path('/tmp/huggingclip-sync-state.json')
53
 
54
  # ============================================================================
55
  # Helper Functions
 
391
  logger.error(f'Failed to restore from HF: {e}')
392
  return False
393
 
394
+ # ============================================================================
395
+ # Change detection helpers
396
+ # ============================================================================
397
+
398
+ def _get_db_marker() -> int:
399
+ """Return cumulative DB activity count from pg_stat_database. -1 on error."""
400
+ db = parse_db_url(DATABASE_URL)
401
+ if not db:
402
+ return -1
403
+ try:
404
+ env = os.environ.copy()
405
+ if db['password']:
406
+ env['PGPASSWORD'] = db['password']
407
+ db_name = db['database']
408
+ result = subprocess.run(
409
+ [
410
+ 'psql',
411
+ f'--host={db["host"]}',
412
+ f'--port={db["port"]}',
413
+ f'--username={db["user"]}',
414
+ '--no-password', '--tuples-only', '--no-align',
415
+ '-c',
416
+ f"SELECT xact_commit + xact_rollback + tup_inserted + tup_updated + tup_deleted "
417
+ f"FROM pg_stat_database WHERE datname = '{db_name}'",
418
+ ],
419
+ env=env, capture_output=True, text=True, timeout=10,
420
+ )
421
+ if result.returncode == 0 and result.stdout.strip():
422
+ return int(result.stdout.strip())
423
+ except Exception:
424
+ pass
425
+ return -1
426
+
427
+
428
+ def _fs_marker(root: str) -> tuple[int, int, int]:
429
+ p = Path(root)
430
+ if not p.exists():
431
+ return (0, 0, 0)
432
+ fc = ts = nm = 0
433
+ for path in p.rglob("*"):
434
+ if not path.is_file():
435
+ continue
436
+ try:
437
+ st = path.stat()
438
+ fc += 1
439
+ ts += int(st.st_size)
440
+ nm = max(nm, int(st.st_mtime_ns))
441
+ except OSError:
442
+ continue
443
+ return (fc, ts, nm)
444
+
445
+
446
+ def _current_marker() -> tuple:
447
+ return (_get_db_marker(),) + _fs_marker(PAPERCLIP_HOME)
448
+
449
+
450
+ def _load_sync_state():
451
+ try:
452
+ if STATE_FILE.exists():
453
+ d = json.loads(STATE_FILE.read_text())
454
+ m = d.get('marker')
455
+ if m and len(m) == 4:
456
+ return tuple(m)
457
+ except Exception:
458
+ pass
459
+ return None
460
+
461
+
462
+ def _save_sync_state(marker: tuple) -> None:
463
+ try:
464
+ STATE_FILE.write_text(json.dumps({'marker': list(marker)}))
465
+ except Exception as e:
466
+ logger.debug(f'Could not save sync state: {e}')
467
+
468
+
469
  # ============================================================================
470
  # Main Sync Operations
471
  # ============================================================================
 
474
  """Full backup operation: dump DB β†’ create tarball β†’ upload to HF"""
475
  logger.info('Syncing backup to HF Dataset...')
476
 
477
+ last_marker = _load_sync_state()
478
+ current_marker = _current_marker()
479
+ if last_marker is not None and current_marker == last_marker:
480
+ status = read_status()
481
+ status['status'] = 'synced'
482
+ status['message'] = 'No state changes detected.'
483
+ write_status(status)
484
+ logger.info('No state changes detected β€” skipping backup.')
485
+ return True
486
+
487
  status = read_status()
488
 
489
  try:
 
516
 
517
  if success:
518
  logger.info('Backup synced OK')
519
+ _save_sync_state(current_marker)
520
  else:
521
  logger.warning('Backup sync failed')
522