oracle / install.sh
zirobtc's picture
Fix: Neo4j permissions + stop-on-failure (install.sh)
5e564f3
#!/bin/bash
#===============================================================================
# Apollo Training Server - Complete Automated Setup
#===============================================================================
# This script sets up a fresh server for Apollo training:
# 1. Installs ClickHouse, Neo4j, Python dependencies
# 2. Downloads epochs 844-846 from Hugging Face
# 3. Ingests all data into databases
# 4. Generates training cache (fully offline)
# 5. Ready to train!
#
# Usage:
# huggingface-cli login # or export HF_TOKEN=your_token
# hf download --repo-type model zirobtc/oracle --local-dir ./apollo
# cd apollo && chmod +x install.sh && source install.sh
#===============================================================================
set -e
# Colors
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
BLUE='\033[0;34m'; CYAN='\033[0;36m'; BOLD='\033[1m'; NC='\033[0m'
log() { echo -e "${BLUE}[$(date +%H:%M:%S)]${NC} $1"; }
success() { echo -e "${GREEN}[โœ“]${NC} $1"; }
warn() { echo -e "${YELLOW}[!]${NC} $1"; }
error() { echo -e "${RED}[โœ—]${NC} $1"; exit 1; }
header() { echo -e "\n${CYAN}${BOLD}โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•${NC}"; echo -e "${CYAN}${BOLD} $1${NC}"; echo -e "${CYAN}${BOLD}โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•${NC}\n"; }
#===============================================================================
# Configuration
#===============================================================================
APOLLO_DIR="$(pwd)"
DATA_DIR="${APOLLO_DIR}/data"
CACHE_DIR="${DATA_DIR}/cache"
PUMP_FUN_DIR="${DATA_DIR}/pump_fun"
EPOCHS=(844 845 846)
CACHE_WORKERS=8
export CLICKHOUSE_HOST="localhost"
export CLICKHOUSE_PORT="9000"
export CLICKHOUSE_HTTP_PORT="8123"
export CLICKHOUSE_USER="default"
export CLICKHOUSE_PASSWORD=""
export CLICKHOUSE_DATABASE="default"
export NEO4J_URI="bolt://localhost:7687"
export NEO4J_USER="neo4j"
export NEO4J_PASSWORD="neo4j123"
echo ""
echo "โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—"
echo "โ•‘ ๐Ÿš€ Apollo Training Server Setup ๐Ÿš€ โ•‘"
echo "โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"
echo ""
echo " ๐Ÿ“ Directory: ${APOLLO_DIR}"
echo " ๐Ÿ“… Epochs: ${EPOCHS[*]}"
echo " ๐Ÿ”ง Workers: ${CACHE_WORKERS}"
echo ""
#===============================================================================
# Step 1: System Dependencies
#===============================================================================
header "Step 1/7: Installing System Dependencies"
log "Updating package list..."
sudo apt update -qq
log "Installing base packages..."
sudo apt install -y -qq \
curl wget gnupg apt-transport-https ca-certificates dirmngr \
pkg-config libudev-dev build-essential \
python3 python3-pip python3-venv \
htop tmux unzip pigz pv \
openjdk-11-jre-headless
# Rust (needed for some deps)
if ! command -v cargo &> /dev/null; then
log "Installing Rust..."
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
source $HOME/.cargo/env
fi
success "Base dependencies installed"
#===============================================================================
# Step 2: Install ClickHouse
#===============================================================================
header "Step 2/7: Installing ClickHouse"
if ! command -v clickhouse-server &> /dev/null; then
log "Adding ClickHouse repository..."
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 2>/dev/null || true
echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee /etc/apt/sources.list.d/clickhouse.list
sudo apt update -qq
log "Installing ClickHouse..."
sudo DEBIAN_FRONTEND=noninteractive apt install -y clickhouse-server clickhouse-client
else
log "ClickHouse already installed"
fi
# Configure for high performance
log "Configuring ClickHouse..."
sudo mkdir -p /etc/clickhouse-server/config.d/
cat << 'CHXML' | sudo tee /etc/clickhouse-server/config.d/apollo.xml > /dev/null
<?xml version="1.0"?>
<clickhouse>
<max_server_memory_usage_to_ram_ratio>0.8</max_server_memory_usage_to_ram_ratio>
<max_threads>32</max_threads>
<listen_host>0.0.0.0</listen_host>
</clickhouse>
CHXML
log "Starting ClickHouse..."
sudo systemctl enable clickhouse-server 2>/dev/null || true
sudo systemctl start clickhouse-server 2>/dev/null || sudo clickhouse-server --daemon
sleep 3
if clickhouse-client --query "SELECT 1" &>/dev/null; then
success "ClickHouse is running"
else
warn "ClickHouse may need manual start: sudo clickhouse-server --daemon"
fi
#===============================================================================
# Step 3: Install Neo4j
#===============================================================================
header "Step 3/7: Installing Neo4j"
if ! command -v neo4j &> /dev/null; then
log "Adding Neo4j repository..."
sudo wget -qO - https://debian.neo4j.com/neotechnology.gpg.key | sudo gpg --dearmor -o /usr/share/keyrings/neo4j.gpg 2>/dev/null || true
echo "deb [signed-by=/usr/share/keyrings/neo4j.gpg] https://debian.neo4j.com stable latest" | sudo tee /etc/apt/sources.list.d/neo4j.list
sudo apt update -qq
log "Installing Neo4j..."
sudo apt install -y neo4j
else
log "Neo4j already installed"
fi
# Configure
log "Configuring Neo4j..."
sudo tee /etc/neo4j/neo4j.conf > /dev/null << 'NEOCONF'
server.default_listen_address=0.0.0.0
server.bolt.listen_address=:7687
server.http.listen_address=:7474
server.memory.heap.initial_size=4g
server.memory.heap.max_size=16g
server.memory.pagecache.size=8g
dbms.security.auth_enabled=true
NEOCONF
log "Setting Neo4j password..."
sudo neo4j-admin dbms set-initial-password ${NEO4J_PASSWORD} 2>/dev/null || true
log "Starting Neo4j..."
sudo systemctl enable neo4j 2>/dev/null || true
sudo systemctl start neo4j 2>/dev/null || neo4j start
sleep 5
success "Neo4j configured (password: ${NEO4J_PASSWORD})"
#===============================================================================
# Step 4: Python Environment
#===============================================================================
header "Step 4/7: Setting up Python Environment"
cd "$APOLLO_DIR"
if [ ! -d "venv" ]; then
log "Creating virtual environment..."
python3 -m venv venv
fi
log "Activating environment..."
source venv/bin/activate
log "Upgrading pip..."
pip install --upgrade pip -q
log "Installing PyTorch with CUDA..."
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 -q
log "Installing requirements..."
pip install -r requirements.txt -q
pip install Pillow requests huggingface_hub -q
success "Python environment ready"
#===============================================================================
# Step 5+6: Download, Ingest, Delete (one epoch at a time to save disk)
#===============================================================================
header "Step 5-6/7: Processing Epochs (Download โ†’ Ingest โ†’ Delete)"
cd "$APOLLO_DIR"
source venv/bin/activate
log "Processing epochs one at a time to minimize disk usage..."
log "Each epoch: ~20GB download โ†’ ingest โ†’ delete"
echo ""
for epoch in "${EPOCHS[@]}"; do
EPOCH_DIR="${PUMP_FUN_DIR}/epoch_${epoch}"
log "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”"
log "Processing epoch ${epoch}..."
# Step 1: Download
log " [1/3] Downloading epoch ${epoch}..."
python scripts/download_epoch_artifacts.py --epoch "$epoch" || {
error "Failed to download epoch ${epoch}. Cannot continue."
}
# Step 2: Ingest (always pass --merge-neo4j; auto-detect handles empty DB)
log " [2/3] Ingesting epoch ${epoch} into databases..."
python scripts/ingest_epoch.py --epoch "$epoch" --merge-neo4j || {
error "Ingestion failed for epoch ${epoch}. Cannot continue."
}
# Step 3: Delete parquet files to free disk space
log " [3/3] Cleaning up epoch ${epoch} parquet files..."
rm -rf "$EPOCH_DIR"
# Show progress
CURRENT_MINTS=$(clickhouse-client --query "SELECT count() FROM mints" 2>/dev/null || echo "0")
CURRENT_TRADES=$(clickhouse-client --query "SELECT count() FROM trades" 2>/dev/null || echo "0")
log " Progress: ${CURRENT_MINTS} mints, ${CURRENT_TRADES} trades"
log " Disk free: $(df -h . | awk 'NR==2{print $4}')"
done
# Final verification
log ""
log "Verifying final data..."
MINTS=$(clickhouse-client --query "SELECT count() FROM mints" 2>/dev/null || echo "0")
TRADES=$(clickhouse-client --query "SELECT count() FROM trades" 2>/dev/null || echo "0")
log " ๐Ÿ“Š Mints: ${MINTS}"
log " ๐Ÿ“Š Trades: ${TRADES}"
success "All epochs processed and ingested"
#===============================================================================
# Step 7: Generate Training Cache
#===============================================================================
header "Step 7/7: Generating Training Cache (Offline Mode)"
cd "$APOLLO_DIR"
source venv/bin/activate
mkdir -p "$CACHE_DIR"
log "Generating balanced cache with ${CACHE_WORKERS} workers (context mode)..."
log "Target: ~15,000 balanced cache files across all classes"
log "โณ This may take 1-3 hours depending on data size..."
echo ""
python scripts/cache_dataset.py \
--output_dir "$CACHE_DIR" \
--num_workers "$CACHE_WORKERS" \
--cache_mode context \
--context_length 4096 \
--horizons_seconds 30 60 120 240 420 \
--quantiles 0.1 0.5 0.9 \
--min_trades 10 || {
warn "Cache generation had errors - check logs"
}
CACHE_COUNT=$(ls -1 "$CACHE_DIR"/sample_*.pt 2>/dev/null | wc -l)
success "Cache complete: ${CACHE_COUNT} samples"
#===============================================================================
# Create Helper Files
#===============================================================================
log "Creating configuration files..."
# .env file
cat << ENVFILE > "${APOLLO_DIR}/.env"
CLICKHOUSE_HOST=${CLICKHOUSE_HOST}
CLICKHOUSE_PORT=${CLICKHOUSE_PORT}
CLICKHOUSE_HTTP_PORT=${CLICKHOUSE_HTTP_PORT}
CLICKHOUSE_USER=${CLICKHOUSE_USER}
CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD}
CLICKHOUSE_DATABASE=${CLICKHOUSE_DATABASE}
NEO4J_URI=${NEO4J_URI}
NEO4J_USER=${NEO4J_USER}
NEO4J_PASSWORD=${NEO4J_PASSWORD}
APOLLO_CACHE_DIR=${CACHE_DIR}
ENVFILE
# Training launch script
cat << 'TRAINSH' > "${APOLLO_DIR}/train_launch.sh"
#!/bin/bash
cd "$(dirname "$0")"
source venv/bin/activate
source .env 2>/dev/null || true
echo "๐Ÿš€ Starting Apollo training..."
echo " GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null || echo 'N/A')"
echo ""
accelerate launch train.py \
--epochs 10 \
--batch_size 8 \
--grad_accum_steps 2 \
--learning_rate 1e-4 \
--warmup_ratio 0.1 \
--max_grad_norm 1.0 \
--mixed_precision bf16 \
--max_seq_len 4096 \
--horizons_seconds 30 60 120 240 420 \
--quantiles 0.1 0.5 0.9 \
--ohlc_stats_path ./data/ohlc_stats.npz \
--num_workers 4 \
--pin_memory \
--val_split 0.1 \
--val_every 2000 \
--save_every 2000 \
--log_every 50 \
"$@"
TRAINSH
chmod +x "${APOLLO_DIR}/train_launch.sh"
# Status check script
cat << 'STATUSSH' > "${APOLLO_DIR}/check_status.sh"
#!/bin/bash
echo "โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"
echo " Apollo Server Status"
echo "โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"
echo ""
echo "๐Ÿ“Š System:"
echo " CPU: $(nproc) cores"
echo " RAM: $(free -h | awk '/^Mem:/{print $2}') total"
echo " Disk: $(df -h . | awk 'NR==2{print $4}') free"
echo ""
if command -v nvidia-smi &> /dev/null; then
echo "๐ŸŽฎ GPU:"
nvidia-smi --query-gpu=name,memory.used,memory.total --format=csv,noheader
echo ""
fi
echo "๐Ÿ’พ ClickHouse:"
clickhouse-client --query "SELECT 'Mints: ' || toString(count()) FROM mints" 2>/dev/null || echo " Not running"
clickhouse-client --query "SELECT 'Trades: ' || toString(count()) FROM trades" 2>/dev/null || echo ""
echo ""
echo "๐Ÿ“ฆ Cache:"
echo " Files: $(ls -1 ./data/cache/sample_*.pt 2>/dev/null | wc -l)"
echo " Size: $(du -sh ./data/cache 2>/dev/null | cut -f1 || echo 'N/A')"
echo ""
STATUSSH
chmod +x "${APOLLO_DIR}/check_status.sh"
#===============================================================================
# Done!
#===============================================================================
echo ""
echo "โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—"
echo "โ•‘ ๐ŸŽ‰ Setup Complete! ๐ŸŽ‰ โ•‘"
echo "โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"
echo ""
echo " ๐Ÿ“ฆ Cache: ${CACHE_COUNT} samples ready"
echo " ๐Ÿ“ Location: ${CACHE_DIR}"
echo ""
echo " ๐Ÿš€ To start training:"
echo ""
echo " ./train_launch.sh"
echo ""
echo " ๐Ÿ“Š To check status:"
echo ""
echo " ./check_status.sh"
echo ""
success "Apollo is ready to train!"