# ─────────────────────────────────────────────────────────────
# 📁 PATH: sync.py (root HF Space repo)
# Updated for Free Models - No API Key Leakage
# ─────────────────────────────────────────────────────────────

import os
import sys
import tarfile
import time
from datetime import datetime
from huggingface_hub import HfApi, hf_hub_download

api = HfApi()
repo_id = os.getenv("HF_DATASET")
token = os.getenv("HF_TOKEN")

# Backup files
MAIN_BACKUP = "openclaw_backup.tar.gz"
BROWSER_BACKUP = "browser_backup.tar.gz"

# Items to skip from main backup (these are recreated or too large)
SKIP_ITEMS = {
    "openclaw.json",      # Recreated from env vars on each start
    "browsers",           # Backed up separately (large file)
    "wechat-data",        # Can be large, optional
    "cache",              # Temporary cache
    ".cache",             # Hidden cache
}

# Items to always backup (core data)
CORE_ITEMS = {
    "sessions",           # Chat history
    "workspace",          # User files and memory
    "agents",             # Agent configurations
    "memory",             # Long-term memory
    "credentials",        # But only if they don't contain keys
}


def should_skip(item_name: str) -> bool:
    """Check if an item should be skipped from backup"""
    return item_name in SKIP_ITEMS or item_name.startswith('.') or item_name.endswith('.tmp')


def restore():
    """Restore from latest backup on startup"""
    if not repo_id or not token:
        print("⚠️  Skip Restore: HF_DATASET or HF_TOKEN not configured")
        print("   → Fresh installation (normal for first deploy)")
        return

    # Restore main backup (sessions, workspace, memory)
    try:
        print(f"📥 Downloading {MAIN_BACKUP} from {repo_id}...")
        path = hf_hub_download(
            repo_id=repo_id,
            filename=MAIN_BACKUP,
            repo_type="dataset",
            token=token
        )
        with tarfile.open(path, "r:gz") as tar:
            tar.extractall(path="/root/.openclaw/")
        print(f"✅ Restored from {MAIN_BACKUP}")

    except Exception as e:
        print(f"ℹ️  No previous backup found: {e}")
        print("   → This is normal for first deployment")

    # Restore browser binaries (if they exist in dataset)
    try:
        print(f"📥 Checking for browser backup...")
        browser_path = hf_hub_download(
            repo_id=repo_id,
            filename=BROWSER_BACKUP,
            repo_type="dataset",
            token=token
        )
        with tarfile.open(browser_path, "r:gz") as tar:
            tar.extractall(path="/root/.openclaw/")
        print(f"✅ Browser binaries restored")

    except Exception:
        print(f"ℹ️  No browser backup found - will install fresh if needed")


def backup():
    """Backup current state (runs every hour)"""
    if not repo_id or not token:
        print("⚠️  Skip Backup: HF_DATASET or HF_TOKEN not configured")
        return

    base_dir = "/root/.openclaw"

    # ── Backup 1: Core data (sessions, workspace, memory) ──────
    if not os.path.exists(base_dir):
        print(f"⚠️  {base_dir} does not exist - nothing to backup")
        return

    try:
        print(f"📦 Creating backup archive...")
        items_backed_up = []
        items_skipped = []

        with tarfile.open(MAIN_BACKUP, "w:gz") as tar:
            for item in os.listdir(base_dir):
                if should_skip(item):
                    items_skipped.append(item)
                    continue

                full_path = os.path.join(base_dir, item)
                if os.path.exists(full_path):
                    tar.add(full_path, arcname=item)
                    items_backed_up.append(item)

        print(f"   ✅ Backed up: {', '.join(items_backed_up)}")
        if items_skipped:
            print(f"   ⏭️  Skipped: {', '.join(items_skipped)}")

        # Upload to Hugging Face Dataset
        print(f"📤 Uploading {MAIN_BACKUP} to {repo_id}...")
        api.upload_file(
            path_or_fileobj=MAIN_BACKUP,
            path_in_repo=MAIN_BACKUP,
            repo_id=repo_id,
            repo_type="dataset",
            token=token
        )
        print(f"✅ Backup complete - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

    except Exception as e:
        print(f"❌ Backup error: {e}")

    # ── Backup 2: Browser binaries (only once, not every time) ──
    try:
        browsers_dir = "/root/.openclaw/browsers"
        if not os.path.exists(browsers_dir):
            return

        # Check if browser backup already exists in dataset
        try:
            hf_hub_download(
                repo_id=repo_id,
                filename=BROWSER_BACKUP,
                repo_type="dataset",
                token=token
            )
            print(f"   Browser backup already exists - skipping upload")

        except Exception:
            # First time - upload browser binaries
            print(f"📦 First time: uploading browser binaries (~150MB)...")
            with tarfile.open(BROWSER_BACKUP, "w:gz") as tar:
                tar.add(browsers_dir, arcname="browsers")

            api.upload_file(
                path_or_fileobj=BROWSER_BACKUP,
                path_in_repo=BROWSER_BACKUP,
                repo_id=repo_id,
                repo_type="dataset",
                token=token
            )
            print(f"✅ Browser backup uploaded (will not upload again)")

    except Exception as e:
        print(f"⚠️  Browser backup skipped: {e}")


def list_backups():
    """List available backups in dataset"""
    if not repo_id or not token:
        print("⚠️  HF_DATASET or HF_TOKEN not configured")
        return

    try:
        files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token)
        backup_files = [f for f in files if f.endswith('.tar.gz')]

        if backup_files:
            print(f"\n📋 Available backups in {repo_id}:")
            for f in sorted(backup_files, reverse=True):
                print(f"   - {f}")
        else:
            print(f"📋 No backups found in {repo_id}")

    except Exception as e:
        print(f"❌ Error listing backups: {e}")


if __name__ == "__main__":
    if len(sys.argv) > 1:
        if sys.argv[1] == "backup":
            backup()
        elif sys.argv[1] == "list":
            list_backups()
        elif sys.argv[1] == "restore":
            restore()
    else:
        restore()