File size: 20,742 Bytes
e605733
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
#!/bin/bash
#===============================================================================
# Apollo Training Server Setup Script
#===============================================================================
# This script sets up a fresh server for Apollo training:
# 1. Installs system dependencies
# 2. Sets up ClickHouse and imports blockchain data (epochs 844-850)
# 3. Sets up Neo4j for graph data
# 4. Installs Python dependencies
# 5. Generates/migrates cache for offline training
#
# Usage:
#   chmod +x scripts/setup_server.sh
#   ./scripts/setup_server.sh
#
# Requirements:
#   - Ubuntu 20.04/22.04 or similar Linux
#   - At least 300GB RAM, 48GB+ VRAM
#   - ~500GB free disk space for data
#   - Root/sudo access
#===============================================================================

set -e  # Exit on error

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }

#===============================================================================
# Configuration
#===============================================================================
APOLLO_DIR="${APOLLO_DIR:-/workspace/apollo}"
DATA_DIR="${DATA_DIR:-/workspace/apollo/data}"
CACHE_DIR="${CACHE_DIR:-/workspace/apollo/data/cache}"
CLICKHOUSE_DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}"

# Epochs to download (Solana epochs with memecoin data)
EPOCHS=(844 845 846 847 848 849 850)

# ClickHouse settings
CLICKHOUSE_HOST="localhost"
CLICKHOUSE_PORT=9000
CLICKHOUSE_HTTP_PORT=8123

# Neo4j settings
NEO4J_HOST="localhost"
NEO4J_BOLT_PORT=7687
NEO4J_HTTP_PORT=7474
NEO4J_PASSWORD="apollo_neo4j_2024"

# Number of parallel workers for caching
CACHE_WORKERS=8

#===============================================================================
# Step 0: System Check
#===============================================================================
echo ""
echo "============================================================"
echo "  Apollo Training Server Setup"
echo "============================================================"
echo ""

log_info "Checking system requirements..."

# Check available memory
TOTAL_MEM_GB=$(free -g | awk '/^Mem:/{print $2}')
if [ "$TOTAL_MEM_GB" -lt 64 ]; then
    log_warn "System has ${TOTAL_MEM_GB}GB RAM. Recommended: 300GB+ for optimal performance."
fi

# Check available disk space
AVAILABLE_DISK_GB=$(df -BG "${DATA_DIR%/*}" 2>/dev/null | awk 'NR==2 {print $4}' | tr -d 'G' || echo "0")
if [ "$AVAILABLE_DISK_GB" -lt 400 ]; then
    log_warn "Only ${AVAILABLE_DISK_GB}GB disk space available. Recommended: 500GB+"
fi

# Check for NVIDIA GPU
if command -v nvidia-smi &> /dev/null; then
    GPU_INFO=$(nvidia-smi --query-gpu=name,memory.total --format=csv,noheader | head -1)
    log_info "GPU detected: $GPU_INFO"
else
    log_warn "No NVIDIA GPU detected. Training will be slow without GPU."
fi

log_success "System check complete"

#===============================================================================
# Step 1: Install System Dependencies
#===============================================================================
echo ""
log_info "Step 1: Installing system dependencies..."

# Update package list
sudo apt-get update

# Install essential packages
sudo apt-get install -y \
    curl \
    wget \
    git \
    build-essential \
    python3 \
    python3-pip \
    python3-venv \
    htop \
    tmux \
    unzip \
    aria2 \
    pigz \
    pv

log_success "System dependencies installed"

#===============================================================================
# Step 2: Install and Configure ClickHouse
#===============================================================================
echo ""
log_info "Step 2: Setting up ClickHouse..."

# Check if ClickHouse is already installed
if command -v clickhouse-server &> /dev/null; then
    log_info "ClickHouse already installed, checking version..."
    clickhouse-server --version
else
    log_info "Installing ClickHouse..."

    # Add ClickHouse repository
    sudo apt-get install -y apt-transport-https ca-certificates dirmngr
    sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
    echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee /etc/apt/sources.list.d/clickhouse.list

    # Install ClickHouse
    sudo apt-get update
    sudo DEBIAN_FRONTEND=noninteractive apt-get install -y clickhouse-server clickhouse-client
fi

# Configure ClickHouse for high performance
log_info "Configuring ClickHouse for optimal performance..."

sudo mkdir -p /etc/clickhouse-server/config.d/

cat << 'EOF' | sudo tee /etc/clickhouse-server/config.d/apollo.xml
<?xml version="1.0"?>
<clickhouse>
    <!-- Memory settings for large dataset -->
    <max_server_memory_usage_to_ram_ratio>0.8</max_server_memory_usage_to_ram_ratio>
    <max_memory_usage>200000000000</max_memory_usage>

    <!-- Performance settings -->
    <max_threads>32</max_threads>
    <max_concurrent_queries>100</max_concurrent_queries>

    <!-- Listen on all interfaces -->
    <listen_host>0.0.0.0</listen_host>

    <!-- Logging -->
    <logger>
        <level>warning</level>
        <log>/var/log/clickhouse-server/clickhouse-server.log</log>
        <errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
        <size>100M</size>
        <count>3</count>
    </logger>
</clickhouse>
EOF

# Start ClickHouse
log_info "Starting ClickHouse server..."
sudo systemctl enable clickhouse-server
sudo systemctl restart clickhouse-server

# Wait for ClickHouse to be ready
log_info "Waiting for ClickHouse to be ready..."
for i in {1..30}; do
    if clickhouse-client --query "SELECT 1" &>/dev/null; then
        log_success "ClickHouse is ready"
        break
    fi
    sleep 2
done

#===============================================================================
# Step 3: Install and Configure Neo4j
#===============================================================================
echo ""
log_info "Step 3: Setting up Neo4j..."

# Check if Neo4j is already installed
if command -v neo4j &> /dev/null; then
    log_info "Neo4j already installed"
else
    log_info "Installing Neo4j..."

    # Add Neo4j repository
    wget -O - https://debian.neo4j.com/neotechnology.gpg.key | sudo apt-key add -
    echo 'deb https://debian.neo4j.com stable latest' | sudo tee /etc/apt/sources.list.d/neo4j.list

    # Install Neo4j
    sudo apt-get update
    sudo apt-get install -y neo4j
fi

# Configure Neo4j
log_info "Configuring Neo4j..."

sudo tee /etc/neo4j/neo4j.conf > /dev/null << EOF
# Network
dbms.default_listen_address=0.0.0.0
dbms.connector.bolt.listen_address=:7687
dbms.connector.http.listen_address=:7474

# Memory (adjust based on available RAM)
dbms.memory.heap.initial_size=4g
dbms.memory.heap.max_size=16g
dbms.memory.pagecache.size=8g

# Security
dbms.security.auth_enabled=true
EOF

# Start Neo4j
log_info "Starting Neo4j..."
sudo systemctl enable neo4j
sudo systemctl restart neo4j

# Wait for Neo4j to be ready and set password
log_info "Waiting for Neo4j to be ready..."
sleep 10

# Set initial password (requires curl to neo4j)
log_info "Setting Neo4j password..."
curl -s -X POST "http://localhost:7474/user/neo4j/password" \
    -H "Content-Type: application/json" \
    -d "{\"password\":\"${NEO4J_PASSWORD}\"}" \
    -u neo4j:neo4j 2>/dev/null || true

log_success "Neo4j configured with password: ${NEO4J_PASSWORD}"

#===============================================================================
# Step 4: Download Blockchain Data (Epochs 844-850)
#===============================================================================
echo ""
log_info "Step 4: Downloading blockchain data for epochs ${EPOCHS[*]}..."

mkdir -p "${DATA_DIR}/epochs"
cd "${DATA_DIR}/epochs"

# Function to download and import an epoch
download_epoch() {
    local epoch=$1
    local epoch_dir="${DATA_DIR}/epochs/epoch_${epoch}"

    log_info "Processing epoch ${epoch}..."

    # Check if already downloaded
    if [ -f "${epoch_dir}/.complete" ]; then
        log_info "Epoch ${epoch} already downloaded, skipping..."
        return 0
    fi

    mkdir -p "${epoch_dir}"
    cd "${epoch_dir}"

    # Download epoch data (adjust URL based on your data source)
    # This is a placeholder - replace with actual data source
    log_info "Downloading epoch ${epoch} data..."

    # Example: Download from S3 or other source
    # aria2c -x 16 -s 16 "https://your-data-source/solana/epoch_${epoch}.tar.gz" -o "epoch_${epoch}.tar.gz"

    # For now, create a placeholder script that you'll need to customize
    cat << 'DOWNLOAD_SCRIPT' > download_epoch_${epoch}.sh
#!/bin/bash
# Customize this script with your actual data source
# Example sources:
# 1. Google BigQuery export
# 2. S3 bucket with parquet files
# 3. Direct RPC node dump
# 4. Third-party data provider (Helius, QuickNode, etc.)

EPOCH=$1
echo "Download script for epoch ${EPOCH}"
echo "Please customize this script with your data source"

# Example: Download from S3
# aws s3 sync s3://your-bucket/solana/epoch_${EPOCH}/ ./

# Example: Download from HTTP
# wget -r -np -nH --cut-dirs=3 "https://data.example.com/solana/epoch_${EPOCH}/"

# After download, import to ClickHouse:
# clickhouse-client --query "INSERT INTO trades FORMAT Parquet" < trades.parquet
DOWNLOAD_SCRIPT

    chmod +x download_epoch_${epoch}.sh

    # Mark as needing manual completion
    log_warn "Epoch ${epoch}: Download script created at ${epoch_dir}/download_epoch_${epoch}.sh"
    log_warn "Please customize and run the download script manually"
}

# Process each epoch
for epoch in "${EPOCHS[@]}"; do
    download_epoch "$epoch"
done

#===============================================================================
# Step 5: Create ClickHouse Schema
#===============================================================================
echo ""
log_info "Step 5: Creating ClickHouse database schema..."

clickhouse-client << 'EOF'
-- Create database
CREATE DATABASE IF NOT EXISTS apollo;

-- Trades table
CREATE TABLE IF NOT EXISTS apollo.trades (
    timestamp DateTime64(3),
    signature String,
    token_address String,
    maker String,
    trade_direction UInt8,
    token_amount Float64,
    sol_amount Float64,
    price_usd Float64,
    total_usd Float64,
    dex_platform String,
    pool_address String,
    success UInt8,
    priority_fee Float64,
    slippage Float64,
    is_bundle UInt8,
    mev_protection UInt8
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(timestamp)
ORDER BY (token_address, timestamp)
SETTINGS index_granularity = 8192;

-- Transfers table
CREATE TABLE IF NOT EXISTS apollo.transfers (
    timestamp DateTime64(3),
    signature String,
    token_address String,
    source String,
    destination String,
    amount Float64,
    decimals UInt8
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(timestamp)
ORDER BY (token_address, timestamp)
SETTINGS index_granularity = 8192;

-- Mints table (token creation events)
CREATE TABLE IF NOT EXISTS apollo.mints (
    timestamp DateTime64(3),
    mint_address String,
    creator_address String,
    name String,
    symbol String,
    decimals UInt8,
    total_supply Float64,
    token_uri String,
    protocol String
) ENGINE = MergeTree()
ORDER BY (timestamp, mint_address)
SETTINGS index_granularity = 8192;

-- Wallet profiles table
CREATE TABLE IF NOT EXISTS apollo.wallet_profiles (
    wallet_address String,
    updated_at DateTime64(3),
    age Float64,
    balance Float64,
    deployed_tokens_count UInt32,
    total_buys_count UInt32,
    total_sells_count UInt32,
    total_winrate Float32,
    stats_1d_realized_profit_sol Float64,
    stats_1d_buy_count UInt32,
    stats_1d_sell_count UInt32,
    stats_7d_realized_profit_sol Float64,
    stats_7d_buy_count UInt32,
    stats_7d_sell_count UInt32
) ENGINE = ReplacingMergeTree(updated_at)
ORDER BY wallet_address
SETTINGS index_granularity = 8192;

-- Wallet holdings table
CREATE TABLE IF NOT EXISTS apollo.wallet_holdings (
    wallet_address String,
    mint_address String,
    current_balance Float64,
    start_holding_at DateTime64(3),
    end_holding_at Nullable(DateTime64(3)),
    bought_amount_sol Float64,
    sold_amount_sol Float64
) ENGINE = MergeTree()
ORDER BY (wallet_address, mint_address)
SETTINGS index_granularity = 8192;

-- Pool creations table
CREATE TABLE IF NOT EXISTS apollo.pool_creations (
    timestamp DateTime64(3),
    pool_address String,
    token_address String,
    quote_token_address String,
    creator_address String,
    protocol String,
    base_amount Float64,
    quote_amount Float64
) ENGINE = MergeTree()
ORDER BY (token_address, timestamp)
SETTINGS index_granularity = 8192;

-- Token holders snapshot table
CREATE TABLE IF NOT EXISTS apollo.token_holders (
    token_address String,
    snapshot_time DateTime64(3),
    wallet_address String,
    current_balance Float64,
    rank UInt32
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(snapshot_time)
ORDER BY (token_address, snapshot_time, rank)
SETTINGS index_granularity = 8192;

EOF

log_success "ClickHouse schema created"

#===============================================================================
# Step 6: Setup Python Environment
#===============================================================================
echo ""
log_info "Step 6: Setting up Python environment..."

cd "${APOLLO_DIR}"

# Create virtual environment if it doesn't exist
if [ ! -d "venv" ]; then
    log_info "Creating Python virtual environment..."
    python3 -m venv venv
fi

# Activate virtual environment
source venv/bin/activate

# Upgrade pip
pip install --upgrade pip

# Install PyTorch with CUDA support
log_info "Installing PyTorch with CUDA..."
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

# Install other dependencies
log_info "Installing Python dependencies..."
pip install \
    transformers \
    accelerate \
    clickhouse-driver \
    neo4j \
    requests \
    Pillow \
    tqdm \
    numpy \
    pandas \
    tensorboard \
    sentencepiece \
    protobuf

# Install project in development mode if setup.py exists
if [ -f "setup.py" ]; then
    pip install -e .
fi

log_success "Python environment ready"

#===============================================================================
# Step 7: Create Environment File
#===============================================================================
echo ""
log_info "Step 7: Creating environment configuration..."

cat << EOF > "${APOLLO_DIR}/.env"
# ClickHouse Configuration
CLICKHOUSE_HOST=${CLICKHOUSE_HOST}
CLICKHOUSE_PORT=${CLICKHOUSE_PORT}

# Neo4j Configuration
NEO4J_URI=bolt://${NEO4J_HOST}:${NEO4J_BOLT_PORT}
NEO4J_USER=neo4j
NEO4J_PASSWORD=${NEO4J_PASSWORD}

# Training Configuration
APOLLO_DATA_DIR=${DATA_DIR}
APOLLO_CACHE_DIR=${CACHE_DIR}

# CUDA Configuration
CUDA_VISIBLE_DEVICES=0
EOF

log_success "Environment file created at ${APOLLO_DIR}/.env"

#===============================================================================
# Step 8: Generate Cache (if data is available)
#===============================================================================
echo ""
log_info "Step 8: Cache generation setup..."

mkdir -p "${CACHE_DIR}"

# Check if we have data to cache
MINT_COUNT=$(clickhouse-client --query "SELECT count() FROM apollo.mints" 2>/dev/null || echo "0")

if [ "$MINT_COUNT" -gt 0 ]; then
    log_info "Found ${MINT_COUNT} mints in database. Ready for caching."
    log_info "To generate cache, run:"
    echo ""
    echo "  cd ${APOLLO_DIR}"
    echo "  source venv/bin/activate"
    echo "  python scripts/cache_parallel.py --output_dir ${CACHE_DIR} --num_workers ${CACHE_WORKERS}"
    echo ""
else
    log_warn "No mint data found in ClickHouse. Please import data first."
    log_info "After importing epoch data, run the caching script."
fi

#===============================================================================
# Step 9: Create Helper Scripts
#===============================================================================
echo ""
log_info "Step 9: Creating helper scripts..."

# Create start script
cat << 'EOF' > "${APOLLO_DIR}/start_services.sh"
#!/bin/bash
# Start all services for Apollo training

echo "Starting ClickHouse..."
sudo systemctl start clickhouse-server

echo "Starting Neo4j..."
sudo systemctl start neo4j

echo "Waiting for services to be ready..."
sleep 5

# Verify services
echo "Checking ClickHouse..."
clickhouse-client --query "SELECT 1" && echo "ClickHouse OK" || echo "ClickHouse FAILED"

echo "Checking Neo4j..."
curl -s http://localhost:7474 > /dev/null && echo "Neo4j OK" || echo "Neo4j FAILED"

echo "Services started!"
EOF
chmod +x "${APOLLO_DIR}/start_services.sh"

# Create training launch script
cat << EOF > "${APOLLO_DIR}/train_launch.sh"
#!/bin/bash
# Launch Apollo training with optimal settings

cd ${APOLLO_DIR}
source venv/bin/activate
source .env

# Optimal settings for 48GB VRAM
accelerate launch train.py \\
    --epochs 7 \\
    --batch_size 16 \\
    --grad_accum_steps 4 \\
    --learning_rate 1e-4 \\
    --warmup_ratio 0.1 \\
    --max_grad_norm 1.0 \\
    --mixed_precision bf16 \\
    --max_seq_len 8192 \\
    --horizons_seconds 60 180 300 600 1800 3600 7200 \\
    --quantiles 0.1 0.5 0.9 \\
    --num_workers 16 \\
    --pin_memory \\
    --val_split 0.1 \\
    --val_every 5000 \\
    --save_every 5000 \\
    --log_every 100 \\
    "\$@"
EOF
chmod +x "${APOLLO_DIR}/train_launch.sh"

# Create status check script
cat << 'EOF' > "${APOLLO_DIR}/check_status.sh"
#!/bin/bash
# Check status of all Apollo components

echo "============================================"
echo "Apollo Server Status"
echo "============================================"

# System resources
echo ""
echo "=== System Resources ==="
echo "CPU: $(nproc) cores"
echo "RAM: $(free -h | awk '/^Mem:/{print $2}') total, $(free -h | awk '/^Mem:/{print $3}') used"
echo "Disk: $(df -h /workspace 2>/dev/null | awk 'NR==2{print $4}' || df -h / | awk 'NR==2{print $4}') available"

# GPU status
if command -v nvidia-smi &> /dev/null; then
    echo ""
    echo "=== GPU Status ==="
    nvidia-smi --query-gpu=name,memory.used,memory.total,utilization.gpu --format=csv,noheader
fi

# ClickHouse status
echo ""
echo "=== ClickHouse ==="
if systemctl is-active --quiet clickhouse-server; then
    echo "Status: Running"
    MINT_COUNT=$(clickhouse-client --query "SELECT count() FROM apollo.mints" 2>/dev/null || echo "N/A")
    TRADE_COUNT=$(clickhouse-client --query "SELECT count() FROM apollo.trades" 2>/dev/null || echo "N/A")
    echo "Mints: ${MINT_COUNT}"
    echo "Trades: ${TRADE_COUNT}"
else
    echo "Status: STOPPED"
fi

# Neo4j status
echo ""
echo "=== Neo4j ==="
if systemctl is-active --quiet neo4j; then
    echo "Status: Running"
else
    echo "Status: STOPPED"
fi

# Cache status
echo ""
echo "=== Cache ==="
CACHE_DIR="${APOLLO_CACHE_DIR:-/workspace/apollo/data/cache}"
if [ -d "$CACHE_DIR" ]; then
    CACHE_COUNT=$(ls -1 "$CACHE_DIR"/sample_*.pt 2>/dev/null | wc -l)
    CACHE_SIZE=$(du -sh "$CACHE_DIR" 2>/dev/null | cut -f1)
    echo "Files: ${CACHE_COUNT}"
    echo "Size: ${CACHE_SIZE}"
else
    echo "Cache directory not found"
fi

echo ""
echo "============================================"
EOF
chmod +x "${APOLLO_DIR}/check_status.sh"

log_success "Helper scripts created"

#===============================================================================
# Final Summary
#===============================================================================
echo ""
echo "============================================================"
echo "  Setup Complete!"
echo "============================================================"
echo ""
log_success "Apollo training server setup finished"
echo ""
echo "Next steps:"
echo ""
echo "1. Import epoch data (844-850):"
echo "   - Customize download scripts in ${DATA_DIR}/epochs/"
echo "   - Run: ./download_epoch_XXX.sh for each epoch"
echo "   - Import to ClickHouse using provided schema"
echo ""
echo "2. Generate training cache:"
echo "   cd ${APOLLO_DIR}"
echo "   source venv/bin/activate"
echo "   python scripts/cache_parallel.py --output_dir ${CACHE_DIR}"
echo ""
echo "3. Start training:"
echo "   ./train_launch.sh"
echo ""
echo "Useful commands:"
echo "  ./start_services.sh  - Start ClickHouse and Neo4j"
echo "  ./check_status.sh    - Check system and service status"
echo "  ./train_launch.sh    - Launch training with optimal settings"
echo ""
echo "Environment variables saved to: ${APOLLO_DIR}/.env"
echo ""