File size: 1,533 Bytes
4ec4791
 
 
 
 
 
d195287
4ec4791
3780496
4ec4791
85e02a7
4ec4791
 
 
 
 
85e02a7
 
64ff574
85e02a7
64ff574
 
 
d195287
4ec4791
 
 
85e02a7
3780496
85e02a7
bf92148
 
85e02a7
4ec4791
 
bf92148
85e02a7
 
 
 
d195287
4ec4791
 
 
85e02a7
 
bf92148
85e02a7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# =========================
# Hardcoded cache settings
# =========================
CONTEXT_LENGTH=4096
MIN_TRADES=10
SAMPLES_PER_TOKEN=1
TARGET_CONTEXTS_PER_CLASS=10
NUM_WORKERS=1

OUTPUT_DIR="data/cache"

# Label horizons in seconds, relative to each sampled T_cutoff.
# Tuned for memecoin timing distribution (less micro-noise, more actionable windows):
# [300, 900, 1800, 3600, 7200] = [5m, 15m, 30m, 60m, 120m]
HORIZONS_SECONDS=(300 900 1800 3600 7200)
QUANTILES=(0.1 0.5 0.9)

echo "========================================"
echo "Apollo Dataset Pre-Caching (Context Mode)"
echo "========================================"
echo "Context Length (H/B/H threshold): $CONTEXT_LENGTH"
echo "Min Trades (T_cutoff threshold): $MIN_TRADES"
echo "Samples per Token: $SAMPLES_PER_TOKEN"
echo "Target Contexts per Class: $TARGET_CONTEXTS_PER_CLASS"
echo "Num Workers: $NUM_WORKERS"
echo "Horizons (sec): ${HORIZONS_SECONDS[*]}"
echo "Quantiles: ${QUANTILES[*]}"
echo "Output Directory: $OUTPUT_DIR"

echo "========================================"

echo "Starting dataset caching..."

mkdir -p "$OUTPUT_DIR"

python3 scripts/cache_dataset.py \
    --output_dir "$OUTPUT_DIR" \
    --context_length "$CONTEXT_LENGTH" \
    --min_trades "$MIN_TRADES" \
    --samples_per_token "$SAMPLES_PER_TOKEN" \
    --target_contexts_per_class "$TARGET_CONTEXTS_PER_CLASS" \
    --num_workers "$NUM_WORKERS" \
    --horizons_seconds "${HORIZONS_SECONDS[@]}" \
    --quantiles "${QUANTILES[@]}" \
    "$@"

echo "Done!"
echo "Cache saved to: $OUTPUT_DIR"