zeroclaw / scripts /sync_dataset.sh
personalbotai
Move picoclaw_space to root for Hugging Face Spaces deployment
c1dcaaa
#!/bin/bash
set -e
# Configuration
DATASET_REPO="${DATASET_REPO:-https://github.com/personalbotai/picoclaw-memory.git}" # Default: personalbotai/picoclaw-memory
SYNC_INTERVAL="${SYNC_INTERVAL:-300}" # Default: 5 minutes
# Determine home directory
if [ -z "$HOME" ]; then
echo "HOME not set, defaulting to /root"
export HOME="/root"
fi
PICOCLAW_HOME="${PICOCLAW_HOME:-$HOME/.picoclaw}"
WORKSPACE_DIR="$PICOCLAW_HOME/workspace"
CONFIG_FILE="$PICOCLAW_HOME/config.json"
BACKUP_DIR="$PICOCLAW_HOME/backup"
echo "Using PICOCLAW_HOME: $PICOCLAW_HOME"
echo "Backup Dir: $BACKUP_DIR"
echo "Workspace Dir: $WORKSPACE_DIR"
# Prevent git from asking for credentials interactively
export GIT_TERMINAL_PROMPT=0
if [ -z "$DATASET_REPO" ]; then
echo "DATASET_REPO environment variable not set. Skipping dataset sync."
exit 0
fi
# Check for token or SSH key (implied by absence of error)
if [ -z "$GITHUB_TOKEN" ] && [ ! -f "$HOME/.ssh/id_rsa" ] && [ ! -f "$HOME/.ssh/id_ed25519" ]; then
echo "Warning: GITHUB_TOKEN not set and no SSH keys found. Sync might fail if repo is private or requires auth."
fi
# Setup Git for Dataset
setup_git() {
echo "Setting up git for dataset sync..."
git config --global user.name "${GIT_AUTHOR_NAME:-picoclaw}"
git config --global user.email "${GIT_AUTHOR_EMAIL:-picoclaw@example.com}"
# Configure credential helper for GitHub if token is present
if [ -n "$GITHUB_TOKEN" ]; then
git config --global credential.helper store
echo "https://${GIT_AUTHOR_NAME}:${GITHUB_TOKEN}@github.com" > ~/.git-credentials
fi
}
# Initial Clone/Pull
initial_sync() {
mkdir -p "$BACKUP_DIR"
if [ ! -d "$BACKUP_DIR/.git" ]; then
echo "Cloning dataset $DATASET_REPO..."
git clone "$DATASET_REPO" "$BACKUP_DIR" || echo "Clone failed, continuing..."
else
echo "Pulling latest changes from dataset..."
cd "$BACKUP_DIR" && git pull origin main || echo "Pull failed, continuing..."
fi
# Restore to workspace if backup has data
if [ -d "$BACKUP_DIR/workspace" ]; then
echo "Restoring workspace from backup..."
mkdir -p "$WORKSPACE_DIR"
# Use rsync for better synchronization if available, otherwise cp
if command -v rsync >/dev/null 2>&1; then
rsync -av --update "$BACKUP_DIR/workspace/" "$WORKSPACE_DIR/"
else
cp -r "$BACKUP_DIR/workspace/"* "$WORKSPACE_DIR/" 2>/dev/null || true
fi
fi
if [ -f "$BACKUP_DIR/config.json" ]; then
echo "Restoring config from backup..."
cp "$BACKUP_DIR/config.json" "$CONFIG_FILE" 2>/dev/null || true
fi
}
# Sync Loop
sync_loop() {
echo "Starting sync loop (interval: ${SYNC_INTERVAL}s)..."
# Run sync immediately on start
sync_now
while true; do
sleep "$SYNC_INTERVAL"
sync_now
done
}
sync_now() {
echo "Syncing data to dataset..."
# Copy current state to backup dir
mkdir -p "$BACKUP_DIR/workspace"
if [ -d "$WORKSPACE_DIR" ]; then
if command -v rsync >/dev/null 2>&1; then
rsync -av --update --exclude='.git' "$WORKSPACE_DIR/" "$BACKUP_DIR/workspace/"
else
cp -r "$WORKSPACE_DIR/"* "$BACKUP_DIR/workspace/" 2>/dev/null || true
fi
fi
if [ -f "$CONFIG_FILE" ]; then
cp "$CONFIG_FILE" "$BACKUP_DIR/config.json" 2>/dev/null || true
fi
# Commit and Push
if [ -d "$BACKUP_DIR/.git" ]; then
cd "$BACKUP_DIR"
# Security check: Prevent pushing to upstream repository
REMOTE_URL=$(git remote get-url origin 2>/dev/null || echo "")
if echo "$REMOTE_URL" | grep -q "sipeed/picoclaw"; then
echo "SECURITY WARNING: Detected attempt to push to upstream (sipeed/picoclaw). Aborting push."
return
fi
if [[ -n $(git status -s) ]]; then
git add .
git commit -m "Auto-sync: $(date '+%Y-%m-%d %H:%M:%S')"
git push origin main || echo "Push failed, will retry next time..."
echo "Sync completed successfully."
else
echo "No changes to sync."
fi
else
echo "Backup directory is not a git repository. Attempting initial sync..."
initial_sync
fi
}
# Main execution
setup_git
initial_sync
sync_loop