| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -e |
|
|
| |
| RED='\033[0;31m' |
| GREEN='\033[0;32m' |
| YELLOW='\033[1;33m' |
| BLUE='\033[0;34m' |
| CYAN='\033[0;36m' |
| NC='\033[0m' |
|
|
| log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } |
| log_success() { echo -e "${GREEN}[β]${NC} $1"; } |
| log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } |
| log_error() { echo -e "${RED}[ERROR]${NC} $1"; } |
| log_step() { echo -e "\n${CYAN}========================================${NC}"; echo -e "${CYAN} $1${NC}"; echo -e "${CYAN}========================================${NC}\n"; } |
|
|
| |
| |
| |
| APOLLO_DIR="${APOLLO_DIR:-$(cd "$(dirname "$0")/.." && pwd)}" |
| DATA_DIR="${DATA_DIR:-${APOLLO_DIR}/data}" |
| CACHE_DIR="${CACHE_DIR:-${DATA_DIR}/cache}" |
| PUMP_FUN_DIR="${DATA_DIR}/pump_fun" |
|
|
| |
| EPOCHS=(844 845 846 847 848 849 850) |
|
|
| |
| export CLICKHOUSE_HOST="${CLICKHOUSE_HOST:-localhost}" |
| export CLICKHOUSE_PORT="${CLICKHOUSE_PORT:-9000}" |
| export CLICKHOUSE_HTTP_PORT="${CLICKHOUSE_HTTP_PORT:-8123}" |
| export CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" |
| export CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" |
| export CLICKHOUSE_DATABASE="${CLICKHOUSE_DATABASE:-default}" |
|
|
| |
| export NEO4J_URI="${NEO4J_URI:-bolt://localhost:7687}" |
| export NEO4J_USER="${NEO4J_USER:-neo4j}" |
| export NEO4J_PASSWORD="${NEO4J_PASSWORD:-apollo2024}" |
|
|
| |
| CACHE_WORKERS="${CACHE_WORKERS:-8}" |
| MAX_CACHE_SAMPLES="${MAX_CACHE_SAMPLES:-}" |
|
|
| |
| BATCH_SIZE="${BATCH_SIZE:-16}" |
| NUM_EPOCHS="${NUM_EPOCHS:-7}" |
|
|
| |
| |
| |
| STEP="" |
| SKIP_CONFIRM=false |
|
|
| while [[ $# -gt 0 ]]; do |
| case $1 in |
| --step) |
| STEP="$2" |
| shift 2 |
| ;; |
| --yes|-y) |
| SKIP_CONFIRM=true |
| shift |
| ;; |
| --epochs) |
| IFS=',' read -ra EPOCHS <<< "$2" |
| shift 2 |
| ;; |
| --help|-h) |
| echo "Usage: $0 [OPTIONS]" |
| echo "" |
| echo "Options:" |
| echo " --step STEP Run only specific step:" |
| echo " install-deps, download-epochs, ingest-epochs," |
| echo " generate-cache, train, all (default)" |
| echo " --epochs X,Y,Z Comma-separated list of epochs (default: 844-850)" |
| echo " --yes, -y Skip confirmation prompts" |
| echo " --help, -h Show this help message" |
| exit 0 |
| ;; |
| *) |
| log_error "Unknown option: $1" |
| exit 1 |
| ;; |
| esac |
| done |
|
|
| |
| |
| |
| install_dependencies() { |
| log_step "Step 1: Installing System Dependencies" |
|
|
| |
| if [ -f /etc/os-release ]; then |
| . /etc/os-release |
| OS=$ID |
| else |
| log_error "Cannot detect OS. Please install dependencies manually." |
| exit 1 |
| fi |
|
|
| log_info "Detected OS: $OS" |
|
|
| |
| log_info "Updating package list..." |
| sudo apt-get update -qq |
|
|
| |
| log_info "Installing basic dependencies..." |
| sudo apt-get install -y -qq \ |
| curl wget git build-essential \ |
| python3 python3-pip python3-venv \ |
| htop tmux unzip pigz pv \ |
| apt-transport-https ca-certificates gnupg |
|
|
| |
| if ! command -v clickhouse-server &> /dev/null; then |
| log_info "Installing ClickHouse..." |
| sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 2>/dev/null || true |
| echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee /etc/apt/sources.list.d/clickhouse.list |
| sudo apt-get update -qq |
| sudo DEBIAN_FRONTEND=noninteractive apt-get install -y clickhouse-server clickhouse-client |
| else |
| log_info "ClickHouse already installed" |
| fi |
|
|
| |
| log_info "Configuring ClickHouse..." |
| sudo mkdir -p /etc/clickhouse-server/config.d/ |
| cat << 'EOF' | sudo tee /etc/clickhouse-server/config.d/apollo.xml > /dev/null |
| <?xml version="1.0"?> |
| <clickhouse> |
| <max_server_memory_usage_to_ram_ratio>0.8</max_server_memory_usage_to_ram_ratio> |
| <max_memory_usage>200000000000</max_memory_usage> |
| <max_threads>32</max_threads> |
| <listen_host>0.0.0.0</listen_host> |
| </clickhouse> |
| EOF |
|
|
| sudo systemctl enable clickhouse-server |
| sudo systemctl start clickhouse-server || sudo systemctl restart clickhouse-server |
| sleep 3 |
|
|
| |
| if clickhouse-client --query "SELECT 1" &>/dev/null; then |
| log_success "ClickHouse is running" |
| else |
| log_error "ClickHouse failed to start" |
| exit 1 |
| fi |
|
|
| |
| if ! command -v neo4j &> /dev/null; then |
| log_info "Installing Neo4j..." |
| wget -O - https://debian.neo4j.com/neotechnology.gpg.key 2>/dev/null | sudo apt-key add - 2>/dev/null || true |
| echo 'deb https://debian.neo4j.com stable latest' | sudo tee /etc/apt/sources.list.d/neo4j.list |
| sudo apt-get update -qq |
| sudo apt-get install -y neo4j |
| else |
| log_info "Neo4j already installed" |
| fi |
|
|
| |
| log_info "Configuring Neo4j..." |
| sudo tee /etc/neo4j/neo4j.conf > /dev/null << EOF |
| dbms.default_listen_address=0.0.0.0 |
| dbms.connector.bolt.listen_address=:7687 |
| dbms.connector.http.listen_address=:7474 |
| dbms.memory.heap.initial_size=4g |
| dbms.memory.heap.max_size=16g |
| dbms.memory.pagecache.size=8g |
| dbms.security.auth_enabled=true |
| EOF |
|
|
| sudo systemctl enable neo4j |
| sudo systemctl start neo4j || sudo systemctl restart neo4j |
| sleep 5 |
|
|
| |
| log_info "Setting Neo4j password..." |
| curl -s -X POST "http://localhost:7474/user/neo4j/password" \ |
| -H "Content-Type: application/json" \ |
| -d "{\"password\":\"${NEO4J_PASSWORD}\"}" \ |
| -u neo4j:neo4j 2>/dev/null || true |
|
|
| log_success "System dependencies installed" |
| } |
|
|
| |
| |
| |
| install_python_deps() { |
| log_step "Step 2: Installing Python Dependencies" |
|
|
| cd "$APOLLO_DIR" |
|
|
| |
| if [ ! -d "venv" ]; then |
| log_info "Creating Python virtual environment..." |
| python3 -m venv venv |
| fi |
|
|
| |
| source venv/bin/activate |
|
|
| |
| log_info "Upgrading pip..." |
| pip install --upgrade pip -q |
|
|
| |
| log_info "Installing PyTorch with CUDA support..." |
| pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 -q |
|
|
| |
| log_info "Installing project requirements..." |
| pip install -r requirements.txt -q |
|
|
| |
| pip install Pillow requests -q |
|
|
| log_success "Python dependencies installed" |
| } |
|
|
| |
| |
| |
| download_epochs() { |
| log_step "Step 3: Downloading Epochs ${EPOCHS[*]}" |
|
|
| cd "$APOLLO_DIR" |
| source venv/bin/activate |
|
|
| |
| if [ -z "$HF_TOKEN" ]; then |
| log_warn "HF_TOKEN not set. Some downloads may fail." |
| log_info "Set it with: export HF_TOKEN=your_token" |
| fi |
|
|
| |
| for epoch in "${EPOCHS[@]}"; do |
| log_info "Downloading epoch ${epoch}..." |
|
|
| |
| EPOCH_DIR="${PUMP_FUN_DIR}/epoch_${epoch}" |
| if [ -d "$EPOCH_DIR" ] && [ "$(ls -A "$EPOCH_DIR" 2>/dev/null)" ]; then |
| PARQUET_COUNT=$(ls -1 "$EPOCH_DIR"/*.parquet 2>/dev/null | wc -l) |
| if [ "$PARQUET_COUNT" -gt 10 ]; then |
| log_info "Epoch ${epoch} already downloaded (${PARQUET_COUNT} files), skipping..." |
| continue |
| fi |
| fi |
|
|
| |
| python scripts/download_epoch_artifacts.py --epoch "$epoch" ${HF_TOKEN:+--token "$HF_TOKEN"} || { |
| log_warn "Failed to download epoch ${epoch}, continuing..." |
| } |
| done |
|
|
| log_success "Epoch downloads complete" |
| } |
|
|
| |
| |
| |
| ingest_epochs() { |
| log_step "Step 4: Ingesting Epochs into ClickHouse and Neo4j" |
|
|
| cd "$APOLLO_DIR" |
| source venv/bin/activate |
|
|
| |
| for epoch in "${EPOCHS[@]}"; do |
| log_info "Ingesting epoch ${epoch}..." |
|
|
| EPOCH_DIR="${PUMP_FUN_DIR}/epoch_${epoch}" |
| if [ ! -d "$EPOCH_DIR" ]; then |
| log_warn "Epoch ${epoch} not found at ${EPOCH_DIR}, skipping..." |
| continue |
| fi |
|
|
| |
| python scripts/ingest_epoch.py --epoch "$epoch" --merge-neo4j || { |
| log_warn "Failed to ingest epoch ${epoch}, continuing..." |
| } |
|
|
| |
| |
| |
| |
| done |
|
|
| |
| log_info "Verifying data ingestion..." |
| MINT_COUNT=$(clickhouse-client --query "SELECT count() FROM mints" 2>/dev/null || echo "0") |
| TRADE_COUNT=$(clickhouse-client --query "SELECT count() FROM trades" 2>/dev/null || echo "0") |
| log_info " Mints: ${MINT_COUNT}" |
| log_info " Trades: ${TRADE_COUNT}" |
|
|
| log_success "Epoch ingestion complete" |
| } |
|
|
| |
| |
| |
| generate_cache() { |
| log_step "Step 5: Generating Training Cache" |
|
|
| cd "$APOLLO_DIR" |
| source venv/bin/activate |
|
|
| |
| mkdir -p "$CACHE_DIR" |
|
|
| |
| EXISTING_CACHE=$(ls -1 "$CACHE_DIR"/sample_*.pt 2>/dev/null | wc -l) |
| if [ "$EXISTING_CACHE" -gt 1000 ]; then |
| log_warn "Found ${EXISTING_CACHE} existing cache files" |
| if [ "$SKIP_CONFIRM" = false ]; then |
| read -p "Continue caching (will add to existing)? [y/N] " -n 1 -r |
| echo |
| if [[ ! $REPLY =~ ^[Yy]$ ]]; then |
| log_info "Skipping cache generation" |
| return 0 |
| fi |
| fi |
| fi |
|
|
| |
| log_info "Generating cache with ${CACHE_WORKERS} workers..." |
| log_info "This may take several hours for 230k+ samples..." |
|
|
| CACHE_ARGS="--output_dir $CACHE_DIR --num_workers $CACHE_WORKERS" |
| if [ -n "$MAX_CACHE_SAMPLES" ]; then |
| CACHE_ARGS="$CACHE_ARGS --max_samples $MAX_CACHE_SAMPLES" |
| fi |
|
|
| python scripts/cache_parallel.py $CACHE_ARGS || { |
| log_error "Cache generation failed" |
| exit 1 |
| } |
|
|
| |
| log_info "Validating cache..." |
| python scripts/validate_cache_v2.py --cache_dir "$CACHE_DIR" --sample_size 100 || true |
|
|
| FINAL_CACHE=$(ls -1 "$CACHE_DIR"/sample_*.pt 2>/dev/null | wc -l) |
| log_success "Cache generation complete: ${FINAL_CACHE} samples" |
| } |
|
|
| |
| |
| |
| launch_training() { |
| log_step "Step 6: Launching Training" |
|
|
| cd "$APOLLO_DIR" |
| source venv/bin/activate |
|
|
| |
| CACHE_COUNT=$(ls -1 "$CACHE_DIR"/sample_*.pt 2>/dev/null | wc -l) |
| if [ "$CACHE_COUNT" -lt 100 ]; then |
| log_error "Not enough cache files (${CACHE_COUNT}). Run cache generation first." |
| exit 1 |
| fi |
|
|
| log_info "Starting training with ${CACHE_COUNT} cached samples..." |
| log_info " Batch size: ${BATCH_SIZE}" |
| log_info " Epochs: ${NUM_EPOCHS}" |
| log_info " GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null || echo 'N/A')" |
|
|
| |
| accelerate launch train.py \ |
| --epochs "$NUM_EPOCHS" \ |
| --batch_size "$BATCH_SIZE" \ |
| --grad_accum_steps 4 \ |
| --learning_rate 1e-4 \ |
| --warmup_ratio 0.1 \ |
| --max_grad_norm 1.0 \ |
| --mixed_precision bf16 \ |
| --max_seq_len 8192 \ |
| --horizons_seconds 60 180 300 600 1800 3600 7200 \ |
| --quantiles 0.1 0.5 0.9 \ |
| --num_workers 16 \ |
| --pin_memory \ |
| --val_split 0.1 \ |
| --val_every 5000 \ |
| --save_every 5000 \ |
| --log_every 100 |
| } |
|
|
| |
| |
| |
| create_env_file() { |
| log_info "Creating .env file..." |
|
|
| cat << EOF > "${APOLLO_DIR}/.env" |
| # ClickHouse |
| CLICKHOUSE_HOST=${CLICKHOUSE_HOST} |
| CLICKHOUSE_PORT=${CLICKHOUSE_PORT} |
| CLICKHOUSE_HTTP_PORT=${CLICKHOUSE_HTTP_PORT} |
| CLICKHOUSE_USER=${CLICKHOUSE_USER} |
| CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD} |
| CLICKHOUSE_DATABASE=${CLICKHOUSE_DATABASE} |
| |
| # Neo4j |
| NEO4J_URI=${NEO4J_URI} |
| NEO4J_USER=${NEO4J_USER} |
| NEO4J_PASSWORD=${NEO4J_PASSWORD} |
| |
| # Paths |
| APOLLO_DATA_DIR=${DATA_DIR} |
| APOLLO_CACHE_DIR=${CACHE_DIR} |
| |
| # Hugging Face (set your token here) |
| HF_TOKEN=${HF_TOKEN:-} |
| EOF |
|
|
| log_success "Environment file created at ${APOLLO_DIR}/.env" |
| } |
|
|
| |
| |
| |
| main() { |
| echo "" |
| echo "βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" |
| echo "β Apollo Training Server - Complete Setup β" |
| echo "βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" |
| echo "" |
| echo " Apollo Directory: ${APOLLO_DIR}" |
| echo " Data Directory: ${DATA_DIR}" |
| echo " Cache Directory: ${CACHE_DIR}" |
| echo " Epochs: ${EPOCHS[*]}" |
| echo "" |
|
|
| |
| if [ -z "$HF_TOKEN" ]; then |
| log_warn "HF_TOKEN not set. Downloads may fail." |
| log_info "Set it with: export HF_TOKEN=your_huggingface_token" |
| echo "" |
| fi |
|
|
| |
| case "$STEP" in |
| install-deps) |
| install_dependencies |
| install_python_deps |
| create_env_file |
| ;; |
| download-epochs) |
| download_epochs |
| ;; |
| ingest-epochs) |
| ingest_epochs |
| ;; |
| generate-cache) |
| generate_cache |
| ;; |
| train) |
| launch_training |
| ;; |
| ""|all) |
| |
| if [ "$SKIP_CONFIRM" = false ]; then |
| echo "This will run the complete setup pipeline:" |
| echo " 1. Install system dependencies (ClickHouse, Neo4j)" |
| echo " 2. Install Python dependencies" |
| echo " 3. Download epochs ${EPOCHS[*]}" |
| echo " 4. Ingest data into databases" |
| echo " 5. Generate training cache" |
| echo " 6. Launch training" |
| echo "" |
| read -p "Continue? [y/N] " -n 1 -r |
| echo |
| if [[ ! $REPLY =~ ^[Yy]$ ]]; then |
| log_info "Aborted." |
| exit 0 |
| fi |
| fi |
|
|
| install_dependencies |
| install_python_deps |
| create_env_file |
| download_epochs |
| ingest_epochs |
| generate_cache |
| launch_training |
| ;; |
| *) |
| log_error "Unknown step: $STEP" |
| echo "Valid steps: install-deps, download-epochs, ingest-epochs, generate-cache, train, all" |
| exit 1 |
| ;; |
| esac |
|
|
| echo "" |
| log_success "Setup complete!" |
| echo "" |
| echo "Useful commands:" |
| echo " source venv/bin/activate # Activate Python environment" |
| echo " ./scripts/check_status.sh # Check system status" |
| echo " accelerate launch train.py # Start training" |
| echo "" |
| } |
|
|
| |
| main "$@" |
|
|