Spaces:
Configuration error
Configuration error
personalbotai
Deploy Archon Dataset Sync v2.1 with branch support\n\n- Add sync_dataset.sh with DATASET_BRANCH support\n- Add Flask monitoring dashboard (app.py)\n- Add Dockerfile for HF Space deployment\n- Add comprehensive documentation\n- Security hardening (upstream protection)\n- Auto-retry with exponential backoff\n- Health checks and graceful shutdown\n\nArchon Standard: Build for Eternity
9de9a1b | set -euo pipefail # Strict mode: exit on error, undefined var, pipefail | |
| # ============================================ | |
| # KONFIGURASI | |
| # ============================================ | |
| DATASET_REPO="${DATASET_REPO:-https://github.com/personalbotai/picoclaw-memory.git}" | |
| DATASET_BRANCH="${DATASET_BRANCH:-main}" # Branch selection (default: main) | |
| SYNC_INTERVAL="${SYNC_INTERVAL:-300}" # seconds | |
| MAX_RETRIES="${MAX_RETRIES:-3}" | |
| BACKUP_RETENTION_DAYS="${BACKUP_RETENTION_DAYS:-7}" | |
| MIN_DISK_FREE_MB="${MIN_DISK_FREE_MB:-1024}" # 1GB minimum | |
| # ============================================ | |
| # DETERMINASI PATHS | |
| # ============================================ | |
| if [ -z "${HOME:-}" ]; then | |
| export HOME="/root" | |
| fi | |
| PICOCLAW_HOME="${PICOCLAW_HOME:-$HOME/.picoclaw}" | |
| WORKSPACE_DIR="$PICOCLAW_HOME/workspace" | |
| CONFIG_FILE="$PICOCLAW_HOME/config.json" | |
| BACKUP_DIR="$PICOCLAW_HOME/backup" | |
| LOG_FILE="$PICOCLAW_HOME/sync.log" | |
| STATE_FILE="$PICOCLAW_HOME/sync.state" | |
| # ============================================ | |
| # LOGGING & UTILS | |
| # ============================================ | |
| log() { | |
| local level="$1" | |
| local msg="$2" | |
| echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $msg" | tee -a "$LOG_FILE" | |
| } | |
| die() { | |
| log "ERROR" "$1" | |
| exit 1 | |
| } | |
| # ============================================ | |
| # GRACEFUL SHUTDOWN | |
| # ============================================ | |
| shutdown() { | |
| log "INFO" "Received shutdown signal, exiting gracefully..." | |
| rm -f "$STATE_FILE" | |
| exit 0 | |
| } | |
| trap shutdown SIGTERM SIGINT | |
| # ============================================ | |
| # GIT SETUP (non-destructive) | |
| # ============================================ | |
| setup_git() { | |
| log "INFO" "Setting up git configuration..." | |
| # Use local config only (not global) | |
| mkdir -p "$BACKUP_DIR" | |
| git --git-dir="$BACKUP_DIR/.git" config user.name "${GIT_AUTHOR_NAME:-picoclaw}" | |
| git --git-dir="$BACKUP_DIR/.git" config user.email "${GIT_AUTHOR_EMAIL:-picoclaw@example.com}" | |
| # Configure credential helper if token exists | |
| if [ -n "${GITHUB_TOKEN:-}" ]; then | |
| git config --global credential.helper store | |
| echo "https://${GIT_AUTHOR_NAME:-picoclaw}:${GITHUB_TOKEN}@github.com" > ~/.git-credentials | |
| fi | |
| } | |
| # ============================================ | |
| # HEALTH CHECKS | |
| # ============================================ | |
| check_disk_space() { | |
| local free_kb=$(df "$BACKUP_DIR" | awk 'NR==2 {print $4}') | |
| local free_mb=$((free_kb / 1024)) | |
| if [ "$free_mb" -lt "$MIN_DISK_FREE_MB" ]; then | |
| log "WARN" "Low disk space: ${free_mb}MB free (min: ${MIN_DISK_FREE_MB}MB)" | |
| return 1 | |
| fi | |
| return 0 | |
| } | |
| check_workspace() { | |
| if [ ! -d "$WORKSPACE_DIR" ]; then | |
| log "WARN" "Workspace directory not found: $WORKSPACE_DIR" | |
| return 1 | |
| fi | |
| return 0 | |
| } | |
| # ============================================ | |
| # BACKUP & RESTORE WITH RETRY | |
| # ============================================ | |
| initial_sync() { | |
| log "INFO" "Starting initial sync (branch: $DATASET_BRANCH)..." | |
| # Prevent concurrent sync | |
| if [ -f "$STATE_FILE" ]; then | |
| local pid=$(cat "$STATE_FILE") | |
| if kill -0 "$pid" 2>/dev/null; then | |
| log "WARN" "Another sync process (PID $pid) is running, waiting..." | |
| sleep 10 | |
| if [ -f "$STATE_FILE" ] && kill -0 "$(cat "$STATE_FILE")" 2>/dev/null; then | |
| die "Another sync process still running, aborting." | |
| fi | |
| else | |
| rm -f "$STATE_FILE" | |
| fi | |
| fi | |
| echo $$ > "$STATE_FILE" | |
| # Cleanup old backups | |
| find "$BACKUP_DIR" -name "*.bak" -mtime +$BACKUP_RETENTION_DAYS -delete 2>/dev/null || true | |
| if [ -d "$BACKUP_DIR/.git" ]; then | |
| log "INFO" "Existing backup found, checking branch..." | |
| cd "$BACKUP_DIR" | |
| # Check current branch | |
| local current_branch | |
| current_branch=$(git branch --show-current 2>/dev/null || echo "") | |
| if [ "$current_branch" != "$DATASET_BRANCH" ]; then | |
| log "INFO" "Switching from branch '$current_branch' to '$DATASET_BRANCH'..." | |
| git fetch origin "$DATASET_BRANCH" || { | |
| log "WARN" "Fetch failed, will try fresh clone" | |
| rm -rf "$BACKUP_DIR/.git" | |
| } | |
| git checkout "$DATASET_BRANCH" || { | |
| log "WARN" "Checkout failed, will try fresh clone" | |
| rm -rf "$BACKUP_DIR/.git" | |
| } | |
| fi | |
| # Pull latest from specified branch | |
| git pull origin "$DATASET_BRANCH" || { | |
| log "WARN" "Pull failed, will re-clone..." | |
| rm -rf "$BACKUP_DIR/.git" | |
| } | |
| fi | |
| if [ ! -d "$BACKUP_DIR/.git" ]; then | |
| log "INFO" "Cloning dataset repository (branch: $DATASET_BRANCH)..." | |
| git clone --branch "$DATASET_BRANCH" "$DATASET_REPO" "$BACKUP_DIR" || { | |
| die "Failed to clone repository" | |
| } | |
| fi | |
| # Restore workspace | |
| if [ -d "$BACKUP_DIR/workspace" ]; then | |
| log "INFO" "Restoring workspace from backup..." | |
| mkdir -p "$WORKSPACE_DIR" | |
| if command -v rsync >/dev/null 2>&1; then | |
| rsync -av --delete --exclude='.git' "$BACKUP_DIR/workspace/" "$WORKSPACE_DIR/" || { | |
| log "WARN" "rsync failed, falling back to cp" | |
| cp -r "$BACKUP_DIR/workspace/." "$WORKSPACE_DIR/" 2>/dev/null || true | |
| } | |
| else | |
| cp -r "$BACKUP_DIR/workspace/." "$WORKSPACE_DIR/" 2>/dev/null || true | |
| fi | |
| fi | |
| if [ -f "$BACKUP_DIR/config.json" ]; then | |
| log "INFO" "Restoring config from backup..." | |
| cp "$BACKUP_DIR/config.json" "$CONFIG_FILE" 2>/dev/null || true | |
| fi | |
| rm -f "$STATE_FILE" | |
| log "INFO" "Initial sync completed (branch: $DATASET_BRANCH)." | |
| } | |
| # ============================================ | |
| # SYNC NOW (WITH BRANCH SUPPORT) | |
| # ============================================ | |
| sync_now() { | |
| log "INFO" "Starting sync operation..." | |
| check_workspace || return 0 | |
| if ! check_disk_space; then | |
| log "ERROR" "Insufficient disk space, skipping sync" | |
| return 1 | |
| fi | |
| # Backup current workspace before sync | |
| mkdir -p "$BACKUP_DIR" | |
| if [ -d "$WORKSPACE_DIR" ]; then | |
| local timestamp=$(date '+%Y%m%d_%H%M%S') | |
| local temp_backup="$BACKUP_DIR/workspace.bak_$timestamp" | |
| if command -v rsync >/dev/null 2>&1; then | |
| rsync -av --exclude='.git' "$WORKSPACE_DIR/" "$temp_backup/" 2>/dev/null || true | |
| else | |
| cp -r "$WORKSPACE_DIR/." "$temp_backup/" 2>/dev/null || true | |
| fi | |
| # Cleanup old backups | |
| find "$BACKUP_DIR" -name "workspace.bak_*" -mtime +1 -delete 2>/dev/null || true | |
| fi | |
| # Sync to backup directory | |
| mkdir -p "$BACKUP_DIR/workspace" | |
| if command -v rsync >/dev/null 2>&1; then | |
| rsync -av --delete --exclude='.git' "$WORKSPACE_DIR/" "$BACKUP_DIR/workspace/" || { | |
| log "ERROR" "rsync failed" | |
| return 1 | |
| } | |
| else | |
| rm -rf "$BACKUP_DIR/workspace" | |
| cp -r "$WORKSPACE_DIR" "$BACKUP_DIR/workspace" 2>/dev/null || { | |
| log "ERROR" "cp failed" | |
| return 1 | |
| } | |
| fi | |
| if [ -f "$CONFIG_FILE" ]; then | |
| cp "$CONFIG_FILE" "$BACKUP_DIR/config.json" 2>/dev/null || true | |
| fi | |
| # Git operations | |
| if [ ! -d "$BACKUP_DIR/.git" ]; then | |
| log "WARN" "Backup not a git repo, skipping commit" | |
| return 0 | |
| fi | |
| cd "$BACKUP_DIR" | |
| # Ensure we're on the correct branch | |
| local current_branch | |
| current_branch=$(git branch --show-current 2>/dev/null || echo "") | |
| if [ "$current_branch" != "$DATASET_BRANCH" ]; then | |
| log "INFO" "Switching to branch '$DATASET_BRANCH'..." | |
| git checkout "$DATASET_BRANCH" || { | |
| log "ERROR" "Failed to checkout branch $DATASET_BRANCH" | |
| return 1 | |
| } | |
| fi | |
| # Security check: prevent pushing to upstream | |
| local remote_url | |
| remote_url=$(git remote get-url origin 2>/dev/null || echo "") | |
| if echo "$remote_url" | grep -q "sipeed/picoclaw"; then | |
| log "ERROR" "SECURITY: Attempted to push to upstream (sipeed/picoclaw), aborting!" | |
| return 1 | |
| fi | |
| # Check for changes | |
| if [[ -n $(git status -s) ]]; then | |
| git add -A | |
| # Avoid empty commits | |
| if [[ -n $(git status -s) ]]; then | |
| git commit -m "Auto-sync: $(date '+%Y-%m-%d %H:%M:%S')" || { | |
| log "WARN" "Commit failed (possibly empty)" | |
| return 0 | |
| } | |
| # Push to specific branch | |
| for i in $(seq 1 $MAX_RETRIES); do | |
| if git push origin "$DATASET_BRANCH"; then | |
| log "INFO" "Sync completed and pushed to branch '$DATASET_BRANCH' successfully" | |
| break | |
| else | |
| log "WARN" "Push to branch '$DATASET_BRANCH' failed (attempt $i/$MAX_RETRIES), retrying..." | |
| sleep $((i * 5)) | |
| fi | |
| done | |
| if [ $i -eq $MAX_RETRIES ]; then | |
| log "ERROR" "Push to branch '$DATASET_BRANCH' failed after $MAX_RETRIES attempts" | |
| return 1 | |
| fi | |
| else | |
| log "INFO" "No changes to commit" | |
| fi | |
| else | |
| log "INFO" "No changes detected" | |
| fi | |
| return 0 | |
| } | |
| # ============================================ | |
| # MAIN LOOP | |
| # ============================================ | |
| main() { | |
| log "INFO" "=== PicoClaw Dataset Sync Daemon v2.1 (Branch Support) ===" | |
| log "INFO" "Repository: $DATASET_REPO" | |
| log "INFO" "Branch: $DATASET_BRANCH" | |
| log "INFO" "Backup dir: $BACKUP_DIR" | |
| log "INFO" "Workspace: $WORKSPACE_DIR" | |
| log "INFO" "Sync interval: ${SYNC_INTERVAL}s" | |
| setup_git | |
| # Initial sync with retry | |
| local attempt | |
| for attempt in $(seq 1 $MAX_RETRIES); do | |
| if initial_sync; then | |
| break | |
| else | |
| log "WARN" "Initial sync failed (attempt $attempt/$MAX_RETRIES), retrying in 10s..." | |
| sleep 10 | |
| fi | |
| done | |
| if [ $attempt -eq $MAX_RETRIES ]; then | |
| die "Initial sync failed after $MAX_RETRIES attempts" | |
| fi | |
| # Main sync loop | |
| while true; do | |
| if sync_now; then | |
| log "INFO" "Sync cycle completed, sleeping ${SYNC_INTERVAL}s" | |
| else | |
| log "WARN" "Sync cycle failed, retrying in 30s..." | |
| sleep 30 | |
| fi | |
| sleep "$SYNC_INTERVAL" | |
| done | |
| } | |
| main "$@" |