# ========================= # Hardcoded cache settings # ========================= CONTEXT_LENGTH=4096 MIN_TRADES=10 SAMPLES_PER_TOKEN=1 TARGET_CONTEXTS_PER_CLASS=10 NUM_WORKERS=1 OUTPUT_DIR="data/cache" # Label horizons in seconds, relative to each sampled T_cutoff. # Tuned for memecoin timing distribution (less micro-noise, more actionable windows): # [300, 900, 1800, 3600, 7200] = [5m, 15m, 30m, 60m, 120m] HORIZONS_SECONDS=(300 900 1800 3600 7200) QUANTILES=(0.1 0.5 0.9) echo "========================================" echo "Apollo Dataset Pre-Caching (Context Mode)" echo "========================================" echo "Context Length (H/B/H threshold): $CONTEXT_LENGTH" echo "Min Trades (T_cutoff threshold): $MIN_TRADES" echo "Samples per Token: $SAMPLES_PER_TOKEN" echo "Target Contexts per Class: $TARGET_CONTEXTS_PER_CLASS" echo "Num Workers: $NUM_WORKERS" echo "Horizons (sec): ${HORIZONS_SECONDS[*]}" echo "Quantiles: ${QUANTILES[*]}" echo "Output Directory: $OUTPUT_DIR" echo "========================================" echo "Starting dataset caching..." mkdir -p "$OUTPUT_DIR" python3 scripts/cache_dataset.py \ --output_dir "$OUTPUT_DIR" \ --context_length "$CONTEXT_LENGTH" \ --min_trades "$MIN_TRADES" \ --samples_per_token "$SAMPLES_PER_TOKEN" \ --target_contexts_per_class "$TARGET_CONTEXTS_PER_CLASS" \ --num_workers "$NUM_WORKERS" \ --horizons_seconds "${HORIZONS_SECONDS[@]}" \ --quantiles "${QUANTILES[@]}" \ "$@" echo "Done!" echo "Cache saved to: $OUTPUT_DIR"