#!/bin/bash set -euo pipefail # Strict mode: exit on error, undefined var, pipefail # ============================================ # KONFIGURASI # ============================================ DATASET_REPO="${DATASET_REPO:-https://github.com/personalbotai/picoclaw-memory.git}" DATASET_BRANCH="${DATASET_BRANCH:-main}" # Branch selection (default: main) SYNC_INTERVAL="${SYNC_INTERVAL:-300}" # seconds MAX_RETRIES="${MAX_RETRIES:-3}" BACKUP_RETENTION_DAYS="${BACKUP_RETENTION_DAYS:-7}" MIN_DISK_FREE_MB="${MIN_DISK_FREE_MB:-1024}" # 1GB minimum # ============================================ # DETERMINASI PATHS # ============================================ if [ -z "${HOME:-}" ]; then export HOME="/root" fi PICOCLAW_HOME="${PICOCLAW_HOME:-$HOME/.picoclaw}" WORKSPACE_DIR="$PICOCLAW_HOME/workspace" CONFIG_FILE="$PICOCLAW_HOME/config.json" BACKUP_DIR="$PICOCLAW_HOME/backup" LOG_FILE="$PICOCLAW_HOME/sync.log" STATE_FILE="$PICOCLAW_HOME/sync.state" # ============================================ # LOGGING & UTILS # ============================================ log() { local level="$1" local msg="$2" echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $msg" | tee -a "$LOG_FILE" } die() { log "ERROR" "$1" exit 1 } # ============================================ # GRACEFUL SHUTDOWN # ============================================ shutdown() { log "INFO" "Received shutdown signal, exiting gracefully..." rm -f "$STATE_FILE" exit 0 } trap shutdown SIGTERM SIGINT # ============================================ # GIT SETUP (non-destructive) # ============================================ setup_git() { log "INFO" "Setting up git configuration..." # Use local config only (not global) mkdir -p "$BACKUP_DIR" git --git-dir="$BACKUP_DIR/.git" config user.name "${GIT_AUTHOR_NAME:-picoclaw}" git --git-dir="$BACKUP_DIR/.git" config user.email "${GIT_AUTHOR_EMAIL:-picoclaw@example.com}" # Configure credential helper if token exists if [ -n "${GITHUB_TOKEN:-}" ]; then git config --global credential.helper store echo "https://${GIT_AUTHOR_NAME:-picoclaw}:${GITHUB_TOKEN}@github.com" > ~/.git-credentials fi } # ============================================ # HEALTH CHECKS # ============================================ check_disk_space() { local free_kb=$(df "$BACKUP_DIR" | awk 'NR==2 {print $4}') local free_mb=$((free_kb / 1024)) if [ "$free_mb" -lt "$MIN_DISK_FREE_MB" ]; then log "WARN" "Low disk space: ${free_mb}MB free (min: ${MIN_DISK_FREE_MB}MB)" return 1 fi return 0 } check_workspace() { if [ ! -d "$WORKSPACE_DIR" ]; then log "WARN" "Workspace directory not found: $WORKSPACE_DIR" return 1 fi return 0 } # ============================================ # BACKUP & RESTORE WITH RETRY # ============================================ initial_sync() { log "INFO" "Starting initial sync (branch: $DATASET_BRANCH)..." # Prevent concurrent sync if [ -f "$STATE_FILE" ]; then local pid=$(cat "$STATE_FILE") if kill -0 "$pid" 2>/dev/null; then log "WARN" "Another sync process (PID $pid) is running, waiting..." sleep 10 if [ -f "$STATE_FILE" ] && kill -0 "$(cat "$STATE_FILE")" 2>/dev/null; then die "Another sync process still running, aborting." fi else rm -f "$STATE_FILE" fi fi echo $$ > "$STATE_FILE" # Cleanup old backups find "$BACKUP_DIR" -name "*.bak" -mtime +$BACKUP_RETENTION_DAYS -delete 2>/dev/null || true if [ -d "$BACKUP_DIR/.git" ]; then log "INFO" "Existing backup found, checking branch..." cd "$BACKUP_DIR" # Check current branch local current_branch current_branch=$(git branch --show-current 2>/dev/null || echo "") if [ "$current_branch" != "$DATASET_BRANCH" ]; then log "INFO" "Switching from branch '$current_branch' to '$DATASET_BRANCH'..." git fetch origin "$DATASET_BRANCH" || { log "WARN" "Fetch failed, will try fresh clone" rm -rf "$BACKUP_DIR/.git" } git checkout "$DATASET_BRANCH" || { log "WARN" "Checkout failed, will try fresh clone" rm -rf "$BACKUP_DIR/.git" } fi # Pull latest from specified branch git pull origin "$DATASET_BRANCH" || { log "WARN" "Pull failed, will re-clone..." rm -rf "$BACKUP_DIR/.git" } fi if [ ! -d "$BACKUP_DIR/.git" ]; then log "INFO" "Cloning dataset repository (branch: $DATASET_BRANCH)..." git clone --branch "$DATASET_BRANCH" "$DATASET_REPO" "$BACKUP_DIR" || { die "Failed to clone repository" } fi # Restore workspace if [ -d "$BACKUP_DIR/workspace" ]; then log "INFO" "Restoring workspace from backup..." mkdir -p "$WORKSPACE_DIR" if command -v rsync >/dev/null 2>&1; then rsync -av --delete --exclude='.git' "$BACKUP_DIR/workspace/" "$WORKSPACE_DIR/" || { log "WARN" "rsync failed, falling back to cp" cp -r "$BACKUP_DIR/workspace/." "$WORKSPACE_DIR/" 2>/dev/null || true } else cp -r "$BACKUP_DIR/workspace/." "$WORKSPACE_DIR/" 2>/dev/null || true fi fi if [ -f "$BACKUP_DIR/config.json" ]; then log "INFO" "Restoring config from backup..." cp "$BACKUP_DIR/config.json" "$CONFIG_FILE" 2>/dev/null || true fi rm -f "$STATE_FILE" log "INFO" "Initial sync completed (branch: $DATASET_BRANCH)." } # ============================================ # SYNC NOW (WITH BRANCH SUPPORT) # ============================================ sync_now() { log "INFO" "Starting sync operation..." check_workspace || return 0 if ! check_disk_space; then log "ERROR" "Insufficient disk space, skipping sync" return 1 fi # Backup current workspace before sync mkdir -p "$BACKUP_DIR" if [ -d "$WORKSPACE_DIR" ]; then local timestamp=$(date '+%Y%m%d_%H%M%S') local temp_backup="$BACKUP_DIR/workspace.bak_$timestamp" if command -v rsync >/dev/null 2>&1; then rsync -av --exclude='.git' "$WORKSPACE_DIR/" "$temp_backup/" 2>/dev/null || true else cp -r "$WORKSPACE_DIR/." "$temp_backup/" 2>/dev/null || true fi # Cleanup old backups find "$BACKUP_DIR" -name "workspace.bak_*" -mtime +1 -delete 2>/dev/null || true fi # Sync to backup directory mkdir -p "$BACKUP_DIR/workspace" if command -v rsync >/dev/null 2>&1; then rsync -av --delete --exclude='.git' "$WORKSPACE_DIR/" "$BACKUP_DIR/workspace/" || { log "ERROR" "rsync failed" return 1 } else rm -rf "$BACKUP_DIR/workspace" cp -r "$WORKSPACE_DIR" "$BACKUP_DIR/workspace" 2>/dev/null || { log "ERROR" "cp failed" return 1 } fi if [ -f "$CONFIG_FILE" ]; then cp "$CONFIG_FILE" "$BACKUP_DIR/config.json" 2>/dev/null || true fi # Git operations if [ ! -d "$BACKUP_DIR/.git" ]; then log "WARN" "Backup not a git repo, skipping commit" return 0 fi cd "$BACKUP_DIR" # Ensure we're on the correct branch local current_branch current_branch=$(git branch --show-current 2>/dev/null || echo "") if [ "$current_branch" != "$DATASET_BRANCH" ]; then log "INFO" "Switching to branch '$DATASET_BRANCH'..." git checkout "$DATASET_BRANCH" || { log "ERROR" "Failed to checkout branch $DATASET_BRANCH" return 1 } fi # Security check: prevent pushing to upstream local remote_url remote_url=$(git remote get-url origin 2>/dev/null || echo "") if echo "$remote_url" | grep -q "sipeed/picoclaw"; then log "ERROR" "SECURITY: Attempted to push to upstream (sipeed/picoclaw), aborting!" return 1 fi # Check for changes if [[ -n $(git status -s) ]]; then git add -A # Avoid empty commits if [[ -n $(git status -s) ]]; then git commit -m "Auto-sync: $(date '+%Y-%m-%d %H:%M:%S')" || { log "WARN" "Commit failed (possibly empty)" return 0 } # Push to specific branch for i in $(seq 1 $MAX_RETRIES); do if git push origin "$DATASET_BRANCH"; then log "INFO" "Sync completed and pushed to branch '$DATASET_BRANCH' successfully" break else log "WARN" "Push to branch '$DATASET_BRANCH' failed (attempt $i/$MAX_RETRIES), retrying..." sleep $((i * 5)) fi done if [ $i -eq $MAX_RETRIES ]; then log "ERROR" "Push to branch '$DATASET_BRANCH' failed after $MAX_RETRIES attempts" return 1 fi else log "INFO" "No changes to commit" fi else log "INFO" "No changes detected" fi return 0 } # ============================================ # MAIN LOOP # ============================================ main() { log "INFO" "=== PicoClaw Dataset Sync Daemon v2.1 (Branch Support) ===" log "INFO" "Repository: $DATASET_REPO" log "INFO" "Branch: $DATASET_BRANCH" log "INFO" "Backup dir: $BACKUP_DIR" log "INFO" "Workspace: $WORKSPACE_DIR" log "INFO" "Sync interval: ${SYNC_INTERVAL}s" setup_git # Initial sync with retry local attempt for attempt in $(seq 1 $MAX_RETRIES); do if initial_sync; then break else log "WARN" "Initial sync failed (attempt $attempt/$MAX_RETRIES), retrying in 10s..." sleep 10 fi done if [ $attempt -eq $MAX_RETRIES ]; then die "Initial sync failed after $MAX_RETRIES attempts" fi # Main sync loop while true; do if sync_now; then log "INFO" "Sync cycle completed, sleeping ${SYNC_INTERVAL}s" else log "WARN" "Sync cycle failed, retrying in 30s..." sleep 30 fi sleep "$SYNC_INTERVAL" done } main "$@"