| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -e |
|
|
| |
| RED='\033[0;31m' |
| GREEN='\033[0;32m' |
| YELLOW='\033[1;33m' |
| BLUE='\033[0;34m' |
| NC='\033[0m' |
|
|
| log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } |
| log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; } |
| log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } |
| log_error() { echo -e "${RED}[ERROR]${NC} $1"; } |
|
|
| |
| |
| |
| APOLLO_DIR="${APOLLO_DIR:-/workspace/apollo}" |
| DATA_DIR="${DATA_DIR:-/workspace/apollo/data}" |
| CACHE_DIR="${CACHE_DIR:-/workspace/apollo/data/cache}" |
| CLICKHOUSE_DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}" |
|
|
| |
| EPOCHS=(844 845 846 847 848 849 850) |
|
|
| |
| CLICKHOUSE_HOST="localhost" |
| CLICKHOUSE_PORT=9000 |
| CLICKHOUSE_HTTP_PORT=8123 |
|
|
| |
| NEO4J_HOST="localhost" |
| NEO4J_BOLT_PORT=7687 |
| NEO4J_HTTP_PORT=7474 |
| NEO4J_PASSWORD="apollo_neo4j_2024" |
|
|
| |
| CACHE_WORKERS=8 |
|
|
| |
| |
| |
| echo "" |
| echo "============================================================" |
| echo " Apollo Training Server Setup" |
| echo "============================================================" |
| echo "" |
|
|
| log_info "Checking system requirements..." |
|
|
| |
| TOTAL_MEM_GB=$(free -g | awk '/^Mem:/{print $2}') |
| if [ "$TOTAL_MEM_GB" -lt 64 ]; then |
| log_warn "System has ${TOTAL_MEM_GB}GB RAM. Recommended: 300GB+ for optimal performance." |
| fi |
|
|
| |
| AVAILABLE_DISK_GB=$(df -BG "${DATA_DIR%/*}" 2>/dev/null | awk 'NR==2 {print $4}' | tr -d 'G' || echo "0") |
| if [ "$AVAILABLE_DISK_GB" -lt 400 ]; then |
| log_warn "Only ${AVAILABLE_DISK_GB}GB disk space available. Recommended: 500GB+" |
| fi |
|
|
| |
| if command -v nvidia-smi &> /dev/null; then |
| GPU_INFO=$(nvidia-smi --query-gpu=name,memory.total --format=csv,noheader | head -1) |
| log_info "GPU detected: $GPU_INFO" |
| else |
| log_warn "No NVIDIA GPU detected. Training will be slow without GPU." |
| fi |
|
|
| log_success "System check complete" |
|
|
| |
| |
| |
| echo "" |
| log_info "Step 1: Installing system dependencies..." |
|
|
| |
| sudo apt-get update |
|
|
| |
| sudo apt-get install -y \ |
| curl \ |
| wget \ |
| git \ |
| build-essential \ |
| python3 \ |
| python3-pip \ |
| python3-venv \ |
| htop \ |
| tmux \ |
| unzip \ |
| aria2 \ |
| pigz \ |
| pv |
|
|
| log_success "System dependencies installed" |
|
|
| |
| |
| |
| echo "" |
| log_info "Step 2: Setting up ClickHouse..." |
|
|
| |
| if command -v clickhouse-server &> /dev/null; then |
| log_info "ClickHouse already installed, checking version..." |
| clickhouse-server --version |
| else |
| log_info "Installing ClickHouse..." |
|
|
| |
| sudo apt-get install -y apt-transport-https ca-certificates dirmngr |
| sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 |
| echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee /etc/apt/sources.list.d/clickhouse.list |
|
|
| |
| sudo apt-get update |
| sudo DEBIAN_FRONTEND=noninteractive apt-get install -y clickhouse-server clickhouse-client |
| fi |
|
|
| |
| log_info "Configuring ClickHouse for optimal performance..." |
|
|
| sudo mkdir -p /etc/clickhouse-server/config.d/ |
|
|
| cat << 'EOF' | sudo tee /etc/clickhouse-server/config.d/apollo.xml |
| <?xml version="1.0"?> |
| <clickhouse> |
| <!-- Memory settings for large dataset --> |
| <max_server_memory_usage_to_ram_ratio>0.8</max_server_memory_usage_to_ram_ratio> |
| <max_memory_usage>200000000000</max_memory_usage> |
|
|
| <!-- Performance settings --> |
| <max_threads>32</max_threads> |
| <max_concurrent_queries>100</max_concurrent_queries> |
|
|
| <!-- Listen on all interfaces --> |
| <listen_host>0.0.0.0</listen_host> |
|
|
| <!-- Logging --> |
| <logger> |
| <level>warning</level> |
| <log>/var/log/clickhouse-server/clickhouse-server.log</log> |
| <errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog> |
| <size>100M</size> |
| <count>3</count> |
| </logger> |
| </clickhouse> |
| EOF |
|
|
| |
| log_info "Starting ClickHouse server..." |
| sudo systemctl enable clickhouse-server |
| sudo systemctl restart clickhouse-server |
|
|
| |
| log_info "Waiting for ClickHouse to be ready..." |
| for i in {1..30}; do |
| if clickhouse-client --query "SELECT 1" &>/dev/null; then |
| log_success "ClickHouse is ready" |
| break |
| fi |
| sleep 2 |
| done |
|
|
| |
| |
| |
| echo "" |
| log_info "Step 3: Setting up Neo4j..." |
|
|
| |
| if command -v neo4j &> /dev/null; then |
| log_info "Neo4j already installed" |
| else |
| log_info "Installing Neo4j..." |
|
|
| |
| wget -O - https://debian.neo4j.com/neotechnology.gpg.key | sudo apt-key add - |
| echo 'deb https://debian.neo4j.com stable latest' | sudo tee /etc/apt/sources.list.d/neo4j.list |
|
|
| |
| sudo apt-get update |
| sudo apt-get install -y neo4j |
| fi |
|
|
| |
| log_info "Configuring Neo4j..." |
|
|
| sudo tee /etc/neo4j/neo4j.conf > /dev/null << EOF |
| # Network |
| dbms.default_listen_address=0.0.0.0 |
| dbms.connector.bolt.listen_address=:7687 |
| dbms.connector.http.listen_address=:7474 |
| |
| # Memory (adjust based on available RAM) |
| dbms.memory.heap.initial_size=4g |
| dbms.memory.heap.max_size=16g |
| dbms.memory.pagecache.size=8g |
| |
| # Security |
| dbms.security.auth_enabled=true |
| EOF |
|
|
| |
| log_info "Starting Neo4j..." |
| sudo systemctl enable neo4j |
| sudo systemctl restart neo4j |
|
|
| |
| log_info "Waiting for Neo4j to be ready..." |
| sleep 10 |
|
|
| |
| log_info "Setting Neo4j password..." |
| curl -s -X POST "http://localhost:7474/user/neo4j/password" \ |
| -H "Content-Type: application/json" \ |
| -d "{\"password\":\"${NEO4J_PASSWORD}\"}" \ |
| -u neo4j:neo4j 2>/dev/null || true |
|
|
| log_success "Neo4j configured with password: ${NEO4J_PASSWORD}" |
|
|
| |
| |
| |
| echo "" |
| log_info "Step 4: Downloading blockchain data for epochs ${EPOCHS[*]}..." |
|
|
| mkdir -p "${DATA_DIR}/epochs" |
| cd "${DATA_DIR}/epochs" |
|
|
| |
| download_epoch() { |
| local epoch=$1 |
| local epoch_dir="${DATA_DIR}/epochs/epoch_${epoch}" |
|
|
| log_info "Processing epoch ${epoch}..." |
|
|
| |
| if [ -f "${epoch_dir}/.complete" ]; then |
| log_info "Epoch ${epoch} already downloaded, skipping..." |
| return 0 |
| fi |
|
|
| mkdir -p "${epoch_dir}" |
| cd "${epoch_dir}" |
|
|
| |
| |
| log_info "Downloading epoch ${epoch} data..." |
|
|
| |
| |
|
|
| |
| cat << 'DOWNLOAD_SCRIPT' > download_epoch_${epoch}.sh |
| |
| |
| |
| |
| |
| |
| |
|
|
| EPOCH=$1 |
| echo "Download script for epoch ${EPOCH}" |
| echo "Please customize this script with your data source" |
|
|
| |
| |
|
|
| |
| |
|
|
| |
| |
| DOWNLOAD_SCRIPT |
|
|
| chmod +x download_epoch_${epoch}.sh |
|
|
| |
| log_warn "Epoch ${epoch}: Download script created at ${epoch_dir}/download_epoch_${epoch}.sh" |
| log_warn "Please customize and run the download script manually" |
| } |
|
|
| |
| for epoch in "${EPOCHS[@]}"; do |
| download_epoch "$epoch" |
| done |
|
|
| |
| |
| |
| echo "" |
| log_info "Step 5: Creating ClickHouse database schema..." |
|
|
| clickhouse-client << 'EOF' |
| -- Create database |
| CREATE DATABASE IF NOT EXISTS apollo; |
|
|
| -- Trades table |
| CREATE TABLE IF NOT EXISTS apollo.trades ( |
| timestamp DateTime64(3), |
| signature String, |
| token_address String, |
| maker String, |
| trade_direction UInt8, |
| token_amount Float64, |
| sol_amount Float64, |
| price_usd Float64, |
| total_usd Float64, |
| dex_platform String, |
| pool_address String, |
| success UInt8, |
| priority_fee Float64, |
| slippage Float64, |
| is_bundle UInt8, |
| mev_protection UInt8 |
| ) ENGINE = MergeTree() |
| PARTITION BY toYYYYMM(timestamp) |
| ORDER BY (token_address, timestamp) |
| SETTINGS index_granularity = 8192; |
|
|
| -- Transfers table |
| CREATE TABLE IF NOT EXISTS apollo.transfers ( |
| timestamp DateTime64(3), |
| signature String, |
| token_address String, |
| source String, |
| destination String, |
| amount Float64, |
| decimals UInt8 |
| ) ENGINE = MergeTree() |
| PARTITION BY toYYYYMM(timestamp) |
| ORDER BY (token_address, timestamp) |
| SETTINGS index_granularity = 8192; |
|
|
| -- Mints table (token creation events) |
| CREATE TABLE IF NOT EXISTS apollo.mints ( |
| timestamp DateTime64(3), |
| mint_address String, |
| creator_address String, |
| name String, |
| symbol String, |
| decimals UInt8, |
| total_supply Float64, |
| token_uri String, |
| protocol String |
| ) ENGINE = MergeTree() |
| ORDER BY (timestamp, mint_address) |
| SETTINGS index_granularity = 8192; |
|
|
| -- Wallet profiles table |
| CREATE TABLE IF NOT EXISTS apollo.wallet_profiles ( |
| wallet_address String, |
| updated_at DateTime64(3), |
| age Float64, |
| balance Float64, |
| deployed_tokens_count UInt32, |
| total_buys_count UInt32, |
| total_sells_count UInt32, |
| total_winrate Float32, |
| stats_1d_realized_profit_sol Float64, |
| stats_1d_buy_count UInt32, |
| stats_1d_sell_count UInt32, |
| stats_7d_realized_profit_sol Float64, |
| stats_7d_buy_count UInt32, |
| stats_7d_sell_count UInt32 |
| ) ENGINE = ReplacingMergeTree(updated_at) |
| ORDER BY wallet_address |
| SETTINGS index_granularity = 8192; |
|
|
| -- Wallet holdings table |
| CREATE TABLE IF NOT EXISTS apollo.wallet_holdings ( |
| wallet_address String, |
| mint_address String, |
| current_balance Float64, |
| start_holding_at DateTime64(3), |
| end_holding_at Nullable(DateTime64(3)), |
| bought_amount_sol Float64, |
| sold_amount_sol Float64 |
| ) ENGINE = MergeTree() |
| ORDER BY (wallet_address, mint_address) |
| SETTINGS index_granularity = 8192; |
|
|
| -- Pool creations table |
| CREATE TABLE IF NOT EXISTS apollo.pool_creations ( |
| timestamp DateTime64(3), |
| pool_address String, |
| token_address String, |
| quote_token_address String, |
| creator_address String, |
| protocol String, |
| base_amount Float64, |
| quote_amount Float64 |
| ) ENGINE = MergeTree() |
| ORDER BY (token_address, timestamp) |
| SETTINGS index_granularity = 8192; |
|
|
| -- Token holders snapshot table |
| CREATE TABLE IF NOT EXISTS apollo.token_holders ( |
| token_address String, |
| snapshot_time DateTime64(3), |
| wallet_address String, |
| current_balance Float64, |
| rank UInt32 |
| ) ENGINE = MergeTree() |
| PARTITION BY toYYYYMM(snapshot_time) |
| ORDER BY (token_address, snapshot_time, rank) |
| SETTINGS index_granularity = 8192; |
|
|
| EOF |
|
|
| log_success "ClickHouse schema created" |
|
|
| |
| |
| |
| echo "" |
| log_info "Step 6: Setting up Python environment..." |
|
|
| cd "${APOLLO_DIR}" |
|
|
| |
| if [ ! -d "venv" ]; then |
| log_info "Creating Python virtual environment..." |
| python3 -m venv venv |
| fi |
|
|
| |
| source venv/bin/activate |
|
|
| |
| pip install --upgrade pip |
|
|
| |
| log_info "Installing PyTorch with CUDA..." |
| pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 |
|
|
| |
| log_info "Installing Python dependencies..." |
| pip install \ |
| transformers \ |
| accelerate \ |
| clickhouse-driver \ |
| neo4j \ |
| requests \ |
| Pillow \ |
| tqdm \ |
| numpy \ |
| pandas \ |
| tensorboard \ |
| sentencepiece \ |
| protobuf |
|
|
| |
| if [ -f "setup.py" ]; then |
| pip install -e . |
| fi |
|
|
| log_success "Python environment ready" |
|
|
| |
| |
| |
| echo "" |
| log_info "Step 7: Creating environment configuration..." |
|
|
| cat << EOF > "${APOLLO_DIR}/.env" |
| # ClickHouse Configuration |
| CLICKHOUSE_HOST=${CLICKHOUSE_HOST} |
| CLICKHOUSE_PORT=${CLICKHOUSE_PORT} |
| |
| # Neo4j Configuration |
| NEO4J_URI=bolt://${NEO4J_HOST}:${NEO4J_BOLT_PORT} |
| NEO4J_USER=neo4j |
| NEO4J_PASSWORD=${NEO4J_PASSWORD} |
| |
| # Training Configuration |
| APOLLO_DATA_DIR=${DATA_DIR} |
| APOLLO_CACHE_DIR=${CACHE_DIR} |
| |
| # CUDA Configuration |
| CUDA_VISIBLE_DEVICES=0 |
| EOF |
|
|
| log_success "Environment file created at ${APOLLO_DIR}/.env" |
|
|
| |
| |
| |
| echo "" |
| log_info "Step 8: Cache generation setup..." |
|
|
| mkdir -p "${CACHE_DIR}" |
|
|
| |
| MINT_COUNT=$(clickhouse-client --query "SELECT count() FROM apollo.mints" 2>/dev/null || echo "0") |
|
|
| if [ "$MINT_COUNT" -gt 0 ]; then |
| log_info "Found ${MINT_COUNT} mints in database. Ready for caching." |
| log_info "To generate cache, run:" |
| echo "" |
| echo " cd ${APOLLO_DIR}" |
| echo " source venv/bin/activate" |
| echo " python scripts/cache_parallel.py --output_dir ${CACHE_DIR} --num_workers ${CACHE_WORKERS}" |
| echo "" |
| else |
| log_warn "No mint data found in ClickHouse. Please import data first." |
| log_info "After importing epoch data, run the caching script." |
| fi |
|
|
| |
| |
| |
| echo "" |
| log_info "Step 9: Creating helper scripts..." |
|
|
| |
| cat << 'EOF' > "${APOLLO_DIR}/start_services.sh" |
| |
| |
|
|
| echo "Starting ClickHouse..." |
| sudo systemctl start clickhouse-server |
|
|
| echo "Starting Neo4j..." |
| sudo systemctl start neo4j |
|
|
| echo "Waiting for services to be ready..." |
| sleep 5 |
|
|
| |
| echo "Checking ClickHouse..." |
| clickhouse-client --query "SELECT 1" && echo "ClickHouse OK" || echo "ClickHouse FAILED" |
|
|
| echo "Checking Neo4j..." |
| curl -s http://localhost:7474 > /dev/null && echo "Neo4j OK" || echo "Neo4j FAILED" |
|
|
| echo "Services started!" |
| EOF |
| chmod +x "${APOLLO_DIR}/start_services.sh" |
|
|
| |
| cat << EOF > "${APOLLO_DIR}/train_launch.sh" |
| #!/bin/bash |
| # Launch Apollo training with optimal settings |
| |
| cd ${APOLLO_DIR} |
| source venv/bin/activate |
| source .env |
| |
| # Optimal settings for 48GB VRAM |
| accelerate launch train.py \\ |
| --epochs 7 \\ |
| --batch_size 16 \\ |
| --grad_accum_steps 4 \\ |
| --learning_rate 1e-4 \\ |
| --warmup_ratio 0.1 \\ |
| --max_grad_norm 1.0 \\ |
| --mixed_precision bf16 \\ |
| --max_seq_len 8192 \\ |
| --horizons_seconds 60 180 300 600 1800 3600 7200 \\ |
| --quantiles 0.1 0.5 0.9 \\ |
| --num_workers 16 \\ |
| --pin_memory \\ |
| --val_split 0.1 \\ |
| --val_every 5000 \\ |
| --save_every 5000 \\ |
| --log_every 100 \\ |
| "\$@" |
| EOF |
| chmod +x "${APOLLO_DIR}/train_launch.sh" |
|
|
| |
| cat << 'EOF' > "${APOLLO_DIR}/check_status.sh" |
| |
| |
|
|
| echo "============================================" |
| echo "Apollo Server Status" |
| echo "============================================" |
|
|
| |
| echo "" |
| echo "=== System Resources ===" |
| echo "CPU: $(nproc) cores" |
| echo "RAM: $(free -h | awk '/^Mem:/{print $2}') total, $(free -h | awk '/^Mem:/{print $3}') used" |
| echo "Disk: $(df -h /workspace 2>/dev/null | awk 'NR==2{print $4}' || df -h / | awk 'NR==2{print $4}') available" |
|
|
| |
| if command -v nvidia-smi &> /dev/null; then |
| echo "" |
| echo "=== GPU Status ===" |
| nvidia-smi --query-gpu=name,memory.used,memory.total,utilization.gpu --format=csv,noheader |
| fi |
|
|
| |
| echo "" |
| echo "=== ClickHouse ===" |
| if systemctl is-active --quiet clickhouse-server; then |
| echo "Status: Running" |
| MINT_COUNT=$(clickhouse-client --query "SELECT count() FROM apollo.mints" 2>/dev/null || echo "N/A") |
| TRADE_COUNT=$(clickhouse-client --query "SELECT count() FROM apollo.trades" 2>/dev/null || echo "N/A") |
| echo "Mints: ${MINT_COUNT}" |
| echo "Trades: ${TRADE_COUNT}" |
| else |
| echo "Status: STOPPED" |
| fi |
|
|
| |
| echo "" |
| echo "=== Neo4j ===" |
| if systemctl is-active --quiet neo4j; then |
| echo "Status: Running" |
| else |
| echo "Status: STOPPED" |
| fi |
|
|
| |
| echo "" |
| echo "=== Cache ===" |
| CACHE_DIR="${APOLLO_CACHE_DIR:-/workspace/apollo/data/cache}" |
| if [ -d "$CACHE_DIR" ]; then |
| CACHE_COUNT=$(ls -1 "$CACHE_DIR"/sample_*.pt 2>/dev/null | wc -l) |
| CACHE_SIZE=$(du -sh "$CACHE_DIR" 2>/dev/null | cut -f1) |
| echo "Files: ${CACHE_COUNT}" |
| echo "Size: ${CACHE_SIZE}" |
| else |
| echo "Cache directory not found" |
| fi |
|
|
| echo "" |
| echo "============================================" |
| EOF |
| chmod +x "${APOLLO_DIR}/check_status.sh" |
|
|
| log_success "Helper scripts created" |
|
|
| |
| |
| |
| echo "" |
| echo "============================================================" |
| echo " Setup Complete!" |
| echo "============================================================" |
| echo "" |
| log_success "Apollo training server setup finished" |
| echo "" |
| echo "Next steps:" |
| echo "" |
| echo "1. Import epoch data (844-850):" |
| echo " - Customize download scripts in ${DATA_DIR}/epochs/" |
| echo " - Run: ./download_epoch_XXX.sh for each epoch" |
| echo " - Import to ClickHouse using provided schema" |
| echo "" |
| echo "2. Generate training cache:" |
| echo " cd ${APOLLO_DIR}" |
| echo " source venv/bin/activate" |
| echo " python scripts/cache_parallel.py --output_dir ${CACHE_DIR}" |
| echo "" |
| echo "3. Start training:" |
| echo " ./train_launch.sh" |
| echo "" |
| echo "Useful commands:" |
| echo " ./start_services.sh - Start ClickHouse and Neo4j" |
| echo " ./check_status.sh - Check system and service status" |
| echo " ./train_launch.sh - Launch training with optimal settings" |
| echo "" |
| echo "Environment variables saved to: ${APOLLO_DIR}/.env" |
| echo "" |
|
|