oracle / ingest.sh
zirobtc's picture
Upload folder using huggingface_hub
2c39730 verified
#!/bin/bash
set -e
trap 'read -p "Press Enter to exit..."' EXIT
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
# Helper functions
header() { echo -e "\n${CYAN}========================================${NC}\n${CYAN} $1${NC}\n${CYAN}========================================${NC}\n"; }
log() { echo -e "${BLUE}[INFO]${NC} $1"; }
success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; }
#===============================================================================
# Step 5+6: Download, Ingest, Delete (one epoch at a time to save disk)
#===============================================================================
header "Step 5-6/7: Processing Epochs (Download β†’ Ingest β†’ Delete)"
EPOCHS=(852 853)
log "Processing epochs one at a time to minimize disk usage..."
log "Each epoch: ~20GB download β†’ ingest β†’ delete"
echo ""
for epoch in "${EPOCHS[@]}"; do
EPOCH_DIR="./data/pump_fun/epoch_${epoch}"
log "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
log "Processing epoch ${epoch}..."
# Step 1: Download
log " [1/3] Downloading epoch ${epoch}..."
python scripts/download_epoch_artifacts.py --epoch "$epoch" || {
error "Failed to download epoch ${epoch}. Cannot continue."
}
# Step 2: Ingest (ClickHouse only)
log " [2/3] Ingesting epoch ${epoch} into ClickHouse database..."
python scripts/ingest_epoch.py --epoch "$epoch" --skip-neo4j || {
error "Ingestion failed for epoch ${epoch}. Cannot continue."
}
# Step 3: Delete parquet files to free disk space
log " [3/3] Cleaning up epoch ${epoch} parquet files..."
rm -rf "$EPOCH_DIR"
# Show progress
CURRENT_MINTS=$(clickhouse-client --query "SELECT count() FROM mints" 2>/dev/null || echo "0")
CURRENT_TRADES=$(clickhouse-client --query "SELECT count() FROM trades" 2>/dev/null || echo "0")
log " Progress: ${CURRENT_MINTS} mints, ${CURRENT_TRADES} trades"
log " Disk free: $(df -h . | awk 'NR==2{print $4}')"
done
# Final verification
log ""
log "Verifying final data..."
MINTS=$(clickhouse-client --query "SELECT count() FROM mints" 2>/dev/null || echo "0")
TRADES=$(clickhouse-client --query "SELECT count() FROM trades" 2>/dev/null || echo "0")
log " πŸ“Š Mints: ${MINTS}"
log " πŸ“Š Trades: ${TRADES}"
success "All epochs processed and ingested"