File size: 20,742 Bytes

e605733

#!/bin/bash
#===============================================================================
# Apollo Training Server Setup Script
#===============================================================================
# This script sets up a fresh server for Apollo training:
# 1. Installs system dependencies
# 2. Sets up ClickHouse and imports blockchain data (epochs 844-850)
# 3. Sets up Neo4j for graph data
# 4. Installs Python dependencies
# 5. Generates/migrates cache for offline training
#
# Usage:
#   chmod +x scripts/setup_server.sh
#   ./scripts/setup_server.sh
#
# Requirements:
#   - Ubuntu 20.04/22.04 or similar Linux
#   - At least 300GB RAM, 48GB+ VRAM
#   - ~500GB free disk space for data
#   - Root/sudo access
#===============================================================================

set -e  # Exit on error

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }

#===============================================================================
# Configuration
#===============================================================================
APOLLO_DIR="${APOLLO_DIR:-/workspace/apollo}"
DATA_DIR="${DATA_DIR:-/workspace/apollo/data}"
CACHE_DIR="${CACHE_DIR:-/workspace/apollo/data/cache}"
CLICKHOUSE_DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}"

# Epochs to download (Solana epochs with memecoin data)
EPOCHS=(844 845 846 847 848 849 850)

# ClickHouse settings
CLICKHOUSE_HOST="localhost"
CLICKHOUSE_PORT=9000
CLICKHOUSE_HTTP_PORT=8123

# Neo4j settings
NEO4J_HOST="localhost"
NEO4J_BOLT_PORT=7687
NEO4J_HTTP_PORT=7474
NEO4J_PASSWORD="apollo_neo4j_2024"

# Number of parallel workers for caching
CACHE_WORKERS=8

#===============================================================================
# Step 0: System Check
#===============================================================================
echo ""
echo "============================================================"
echo "  Apollo Training Server Setup"
echo "============================================================"
echo ""

log_info "Checking system requirements..."

# Check available memory
TOTAL_MEM_GB=$(free -g | awk '/^Mem:/{print $2}')
if [ "$TOTAL_MEM_GB" -lt 64 ]; then
    log_warn "System has ${TOTAL_MEM_GB}GB RAM. Recommended: 300GB+ for optimal performance."
fi

# Check available disk space
AVAILABLE_DISK_GB=$(df -BG "${DATA_DIR%/*}" 2>/dev/null | awk 'NR==2 {print $4}' | tr -d 'G' || echo "0")
if [ "$AVAILABLE_DISK_GB" -lt 400 ]; then
    log_warn "Only ${AVAILABLE_DISK_GB}GB disk space available. Recommended: 500GB+"
fi

# Check for NVIDIA GPU
if command -v nvidia-smi &> /dev/null; then
    GPU_INFO=$(nvidia-smi --query-gpu=name,memory.total --format=csv,noheader | head -1)
    log_info "GPU detected: $GPU_INFO"
else
    log_warn "No NVIDIA GPU detected. Training will be slow without GPU."
fi

log_success "System check complete"

#===============================================================================
# Step 1: Install System Dependencies
#===============================================================================
echo ""
log_info "Step 1: Installing system dependencies..."

# Update package list
sudo apt-get update

# Install essential packages
sudo apt-get install -y \
    curl \
    wget \
    git \
    build-essential \
    python3 \
    python3-pip \
    python3-venv \
    htop \
    tmux \
    unzip \
    aria2 \
    pigz \
    pv

log_success "System dependencies installed"

#===============================================================================
# Step 2: Install and Configure ClickHouse
#===============================================================================
echo ""
log_info "Step 2: Setting up ClickHouse..."

# Check if ClickHouse is already installed
if command -v clickhouse-server &> /dev/null; then
    log_info "ClickHouse already installed, checking version..."
    clickhouse-server --version
else
    log_info "Installing ClickHouse..."

    # Add ClickHouse repository
    sudo apt-get install -y apt-transport-https ca-certificates dirmngr
    sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
    echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee /etc/apt/sources.list.d/clickhouse.list

    # Install ClickHouse
    sudo apt-get update
    sudo DEBIAN_FRONTEND=noninteractive apt-get install -y clickhouse-server clickhouse-client
fi

# Configure ClickHouse for high performance
log_info "Configuring ClickHouse for optimal performance..."

sudo mkdir -p /etc/clickhouse-server/config.d/

cat << 'EOF' | sudo tee /etc/clickhouse-server/config.d/apollo.xml
<?xml version="1.0"?>
<clickhouse>
    <!-- Memory settings for large dataset -->
    <max_server_memory_usage_to_ram_ratio>0.8</max_server_memory_usage_to_ram_ratio>
    <max_memory_usage>200000000000</max_memory_usage>

    <!-- Performance settings -->
    <max_threads>32</max_threads>
    <max_concurrent_queries>100</max_concurrent_queries>

    <!-- Listen on all interfaces -->
    <listen_host>0.0.0.0</listen_host>

    <!-- Logging -->
    <logger>
        <level>warning</level>
        <log>/var/log/clickhouse-server/clickhouse-server.log</log>
        <errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
        <size>100M</size>
        <count>3</count>
    </logger>
</clickhouse>
EOF

# Start ClickHouse
log_info "Starting ClickHouse server..."
sudo systemctl enable clickhouse-server
sudo systemctl restart clickhouse-server

# Wait for ClickHouse to be ready
log_info "Waiting for ClickHouse to be ready..."
for i in {1..30}; do
    if clickhouse-client --query "SELECT 1" &>/dev/null; then
        log_success "ClickHouse is ready"
        break
    fi
    sleep 2
done

#===============================================================================
# Step 3: Install and Configure Neo4j
#===============================================================================
echo ""
log_info "Step 3: Setting up Neo4j..."

# Check if Neo4j is already installed
if command -v neo4j &> /dev/null; then
    log_info "Neo4j already installed"
else
    log_info "Installing Neo4j..."

    # Add Neo4j repository
    wget -O - https://debian.neo4j.com/neotechnology.gpg.key | sudo apt-key add -
    echo 'deb https://debian.neo4j.com stable latest' | sudo tee /etc/apt/sources.list.d/neo4j.list

    # Install Neo4j
    sudo apt-get update
    sudo apt-get install -y neo4j
fi

# Configure Neo4j
log_info "Configuring Neo4j..."

sudo tee /etc/neo4j/neo4j.conf > /dev/null << EOF
# Network
dbms.default_listen_address=0.0.0.0
dbms.connector.bolt.listen_address=:7687
dbms.connector.http.listen_address=:7474

# Memory (adjust based on available RAM)
dbms.memory.heap.initial_size=4g
dbms.memory.heap.max_size=16g
dbms.memory.pagecache.size=8g

# Security
dbms.security.auth_enabled=true
EOF

# Start Neo4j
log_info "Starting Neo4j..."
sudo systemctl enable neo4j
sudo systemctl restart neo4j

# Wait for Neo4j to be ready and set password
log_info "Waiting for Neo4j to be ready..."
sleep 10

# Set initial password (requires curl to neo4j)
log_info "Setting Neo4j password..."
curl -s -X POST "http://localhost:7474/user/neo4j/password" \
    -H "Content-Type: application/json" \
    -d "{\"password\":\"${NEO4J_PASSWORD}\"}" \
    -u neo4j:neo4j 2>/dev/null || true

log_success "Neo4j configured with password: ${NEO4J_PASSWORD}"

#===============================================================================
# Step 4: Download Blockchain Data (Epochs 844-850)
#===============================================================================
echo ""
log_info "Step 4: Downloading blockchain data for epochs ${EPOCHS[*]}..."

mkdir -p "${DATA_DIR}/epochs"
cd "${DATA_DIR}/epochs"

# Function to download and import an epoch
download_epoch() {
    local epoch=$1
    local epoch_dir="${DATA_DIR}/epochs/epoch_${epoch}"

    log_info "Processing epoch ${epoch}..."

    # Check if already downloaded
    if [ -f "${epoch_dir}/.complete" ]; then
        log_info "Epoch ${epoch} already downloaded, skipping..."
        return 0
    fi

    mkdir -p "${epoch_dir}"
    cd "${epoch_dir}"

    # Download epoch data (adjust URL based on your data source)
    # This is a placeholder - replace with actual data source
    log_info "Downloading epoch ${epoch} data..."

    # Example: Download from S3 or other source
    # aria2c -x 16 -s 16 "https://your-data-source/solana/epoch_${epoch}.tar.gz" -o "epoch_${epoch}.tar.gz"

    # For now, create a placeholder script that you'll need to customize
    cat << 'DOWNLOAD_SCRIPT' > download_epoch_${epoch}.sh
#!/bin/bash
# Customize this script with your actual data source
# Example sources:
# 1. Google BigQuery export
# 2. S3 bucket with parquet files
# 3. Direct RPC node dump
# 4. Third-party data provider (Helius, QuickNode, etc.)

EPOCH=$1
echo "Download script for epoch ${EPOCH}"
echo "Please customize this script with your data source"

# Example: Download from S3
# aws s3 sync s3://your-bucket/solana/epoch_${EPOCH}/ ./

# Example: Download from HTTP
# wget -r -np -nH --cut-dirs=3 "https://data.example.com/solana/epoch_${EPOCH}/"

# After download, import to ClickHouse:
# clickhouse-client --query "INSERT INTO trades FORMAT Parquet" < trades.parquet
DOWNLOAD_SCRIPT

    chmod +x download_epoch_${epoch}.sh

    # Mark as needing manual completion
    log_warn "Epoch ${epoch}: Download script created at ${epoch_dir}/download_epoch_${epoch}.sh"
    log_warn "Please customize and run the download script manually"
}

# Process each epoch
for epoch in "${EPOCHS[@]}"; do
    download_epoch "$epoch"
done

#===============================================================================
# Step 5: Create ClickHouse Schema
#===============================================================================
echo ""
log_info "Step 5: Creating ClickHouse database schema..."

clickhouse-client << 'EOF'
-- Create database
CREATE DATABASE IF NOT EXISTS apollo;

-- Trades table
CREATE TABLE IF NOT EXISTS apollo.trades (
    timestamp DateTime64(3),
    signature String,
    token_address String,
    maker String,
    trade_direction UInt8,
    token_amount Float64,
    sol_amount Float64,
    price_usd Float64,
    total_usd Float64,
    dex_platform String,
    pool_address String,
    success UInt8,
    priority_fee Float64,
    slippage Float64,
    is_bundle UInt8,
    mev_protection UInt8
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(timestamp)
ORDER BY (token_address, timestamp)
SETTINGS index_granularity = 8192;

-- Transfers table
CREATE TABLE IF NOT EXISTS apollo.transfers (
    timestamp DateTime64(3),
    signature String,
    token_address String,
    source String,
    destination String,
    amount Float64,
    decimals UInt8
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(timestamp)
ORDER BY (token_address, timestamp)
SETTINGS index_granularity = 8192;

-- Mints table (token creation events)
CREATE TABLE IF NOT EXISTS apollo.mints (
    timestamp DateTime64(3),
    mint_address String,
    creator_address String,
    name String,
    symbol String,
    decimals UInt8,
    total_supply Float64,
    token_uri String,
    protocol String
) ENGINE = MergeTree()
ORDER BY (timestamp, mint_address)
SETTINGS index_granularity = 8192;

-- Wallet profiles table
CREATE TABLE IF NOT EXISTS apollo.wallet_profiles (
    wallet_address String,
    updated_at DateTime64(3),
    age Float64,
    balance Float64,
    deployed_tokens_count UInt32,
    total_buys_count UInt32,
    total_sells_count UInt32,
    total_winrate Float32,
    stats_1d_realized_profit_sol Float64,
    stats_1d_buy_count UInt32,
    stats_1d_sell_count UInt32,
    stats_7d_realized_profit_sol Float64,
    stats_7d_buy_count UInt32,
    stats_7d_sell_count UInt32
) ENGINE = ReplacingMergeTree(updated_at)
ORDER BY wallet_address
SETTINGS index_granularity = 8192;

-- Wallet holdings table
CREATE TABLE IF NOT EXISTS apollo.wallet_holdings (
    wallet_address String,
    mint_address String,
    current_balance Float64,
    start_holding_at DateTime64(3),
    end_holding_at Nullable(DateTime64(3)),
    bought_amount_sol Float64,
    sold_amount_sol Float64
) ENGINE = MergeTree()
ORDER BY (wallet_address, mint_address)
SETTINGS index_granularity = 8192;

-- Pool creations table
CREATE TABLE IF NOT EXISTS apollo.pool_creations (
    timestamp DateTime64(3),
    pool_address String,
    token_address String,
    quote_token_address String,
    creator_address String,
    protocol String,
    base_amount Float64,
    quote_amount Float64
) ENGINE = MergeTree()
ORDER BY (token_address, timestamp)
SETTINGS index_granularity = 8192;

-- Token holders snapshot table
CREATE TABLE IF NOT EXISTS apollo.token_holders (
    token_address String,
    snapshot_time DateTime64(3),
    wallet_address String,
    current_balance Float64,
    rank UInt32
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(snapshot_time)
ORDER BY (token_address, snapshot_time, rank)
SETTINGS index_granularity = 8192;

EOF

log_success "ClickHouse schema created"

#===============================================================================
# Step 6: Setup Python Environment
#===============================================================================
echo ""
log_info "Step 6: Setting up Python environment..."

cd "${APOLLO_DIR}"

# Create virtual environment if it doesn't exist
if [ ! -d "venv" ]; then
    log_info "Creating Python virtual environment..."
    python3 -m venv venv
fi

# Activate virtual environment
source venv/bin/activate

# Upgrade pip
pip install --upgrade pip

# Install PyTorch with CUDA support
log_info "Installing PyTorch with CUDA..."
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

# Install other dependencies
log_info "Installing Python dependencies..."
pip install \
    transformers \
    accelerate \
    clickhouse-driver \
    neo4j \
    requests \
    Pillow \
    tqdm \
    numpy \
    pandas \
    tensorboard \
    sentencepiece \
    protobuf

# Install project in development mode if setup.py exists
if [ -f "setup.py" ]; then
    pip install -e .
fi

log_success "Python environment ready"

#===============================================================================
# Step 7: Create Environment File
#===============================================================================
echo ""
log_info "Step 7: Creating environment configuration..."

cat << EOF > "${APOLLO_DIR}/.env"
# ClickHouse Configuration
CLICKHOUSE_HOST=${CLICKHOUSE_HOST}
CLICKHOUSE_PORT=${CLICKHOUSE_PORT}

# Neo4j Configuration
NEO4J_URI=bolt://${NEO4J_HOST}:${NEO4J_BOLT_PORT}
NEO4J_USER=neo4j
NEO4J_PASSWORD=${NEO4J_PASSWORD}

# Training Configuration
APOLLO_DATA_DIR=${DATA_DIR}
APOLLO_CACHE_DIR=${CACHE_DIR}

# CUDA Configuration
CUDA_VISIBLE_DEVICES=0
EOF

log_success "Environment file created at ${APOLLO_DIR}/.env"

#===============================================================================
# Step 8: Generate Cache (if data is available)
#===============================================================================
echo ""
log_info "Step 8: Cache generation setup..."

mkdir -p "${CACHE_DIR}"

# Check if we have data to cache
MINT_COUNT=$(clickhouse-client --query "SELECT count() FROM apollo.mints" 2>/dev/null || echo "0")

if [ "$MINT_COUNT" -gt 0 ]; then
    log_info "Found ${MINT_COUNT} mints in database. Ready for caching."
    log_info "To generate cache, run:"
    echo ""
    echo "  cd ${APOLLO_DIR}"
    echo "  source venv/bin/activate"
    echo "  python scripts/cache_parallel.py --output_dir ${CACHE_DIR} --num_workers ${CACHE_WORKERS}"
    echo ""
else
    log_warn "No mint data found in ClickHouse. Please import data first."
    log_info "After importing epoch data, run the caching script."
fi

#===============================================================================
# Step 9: Create Helper Scripts
#===============================================================================
echo ""
log_info "Step 9: Creating helper scripts..."

# Create start script
cat << 'EOF' > "${APOLLO_DIR}/start_services.sh"
#!/bin/bash
# Start all services for Apollo training

echo "Starting ClickHouse..."
sudo systemctl start clickhouse-server

echo "Starting Neo4j..."
sudo systemctl start neo4j

echo "Waiting for services to be ready..."
sleep 5

# Verify services
echo "Checking ClickHouse..."
clickhouse-client --query "SELECT 1" && echo "ClickHouse OK" || echo "ClickHouse FAILED"

echo "Checking Neo4j..."
curl -s http://localhost:7474 > /dev/null && echo "Neo4j OK" || echo "Neo4j FAILED"

echo "Services started!"
EOF
chmod +x "${APOLLO_DIR}/start_services.sh"

# Create training launch script
cat << EOF > "${APOLLO_DIR}/train_launch.sh"
#!/bin/bash
# Launch Apollo training with optimal settings

cd ${APOLLO_DIR}
source venv/bin/activate
source .env

# Optimal settings for 48GB VRAM
accelerate launch train.py \\
    --epochs 7 \\
    --batch_size 16 \\
    --grad_accum_steps 4 \\
    --learning_rate 1e-4 \\
    --warmup_ratio 0.1 \\
    --max_grad_norm 1.0 \\
    --mixed_precision bf16 \\
    --max_seq_len 8192 \\
    --horizons_seconds 60 180 300 600 1800 3600 7200 \\
    --quantiles 0.1 0.5 0.9 \\
    --num_workers 16 \\
    --pin_memory \\
    --val_split 0.1 \\
    --val_every 5000 \\
    --save_every 5000 \\
    --log_every 100 \\
    "\$@"
EOF
chmod +x "${APOLLO_DIR}/train_launch.sh"

# Create status check script
cat << 'EOF' > "${APOLLO_DIR}/check_status.sh"
#!/bin/bash
# Check status of all Apollo components

echo "============================================"
echo "Apollo Server Status"
echo "============================================"

# System resources
echo ""
echo "=== System Resources ==="
echo "CPU: $(nproc) cores"
echo "RAM: $(free -h | awk '/^Mem:/{print $2}') total, $(free -h | awk '/^Mem:/{print $3}') used"
echo "Disk: $(df -h /workspace 2>/dev/null | awk 'NR==2{print $4}' || df -h / | awk 'NR==2{print $4}') available"

# GPU status
if command -v nvidia-smi &> /dev/null; then
    echo ""
    echo "=== GPU Status ==="
    nvidia-smi --query-gpu=name,memory.used,memory.total,utilization.gpu --format=csv,noheader
fi

# ClickHouse status
echo ""
echo "=== ClickHouse ==="
if systemctl is-active --quiet clickhouse-server; then
    echo "Status: Running"
    MINT_COUNT=$(clickhouse-client --query "SELECT count() FROM apollo.mints" 2>/dev/null || echo "N/A")
    TRADE_COUNT=$(clickhouse-client --query "SELECT count() FROM apollo.trades" 2>/dev/null || echo "N/A")
    echo "Mints: ${MINT_COUNT}"
    echo "Trades: ${TRADE_COUNT}"
else
    echo "Status: STOPPED"
fi

# Neo4j status
echo ""
echo "=== Neo4j ==="
if systemctl is-active --quiet neo4j; then
    echo "Status: Running"
else
    echo "Status: STOPPED"
fi

# Cache status
echo ""
echo "=== Cache ==="
CACHE_DIR="${APOLLO_CACHE_DIR:-/workspace/apollo/data/cache}"
if [ -d "$CACHE_DIR" ]; then
    CACHE_COUNT=$(ls -1 "$CACHE_DIR"/sample_*.pt 2>/dev/null | wc -l)
    CACHE_SIZE=$(du -sh "$CACHE_DIR" 2>/dev/null | cut -f1)
    echo "Files: ${CACHE_COUNT}"
    echo "Size: ${CACHE_SIZE}"
else
    echo "Cache directory not found"
fi

echo ""
echo "============================================"
EOF
chmod +x "${APOLLO_DIR}/check_status.sh"

log_success "Helper scripts created"

#===============================================================================
# Final Summary
#===============================================================================
echo ""
echo "============================================================"
echo "  Setup Complete!"
echo "============================================================"
echo ""
log_success "Apollo training server setup finished"
echo ""
echo "Next steps:"
echo ""
echo "1. Import epoch data (844-850):"
echo "   - Customize download scripts in ${DATA_DIR}/epochs/"
echo "   - Run: ./download_epoch_XXX.sh for each epoch"
echo "   - Import to ClickHouse using provided schema"
echo ""
echo "2. Generate training cache:"
echo "   cd ${APOLLO_DIR}"
echo "   source venv/bin/activate"
echo "   python scripts/cache_parallel.py --output_dir ${CACHE_DIR}"
echo ""
echo "3. Start training:"
echo "   ./train_launch.sh"
echo ""
echo "Useful commands:"
echo "  ./start_services.sh  - Start ClickHouse and Neo4j"
echo "  ./check_status.sh    - Check system and service status"
echo "  ./train_launch.sh    - Launch training with optimal settings"
echo ""
echo "Environment variables saved to: ${APOLLO_DIR}/.env"
echo ""