|
|
#!/bin/bash |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
set -e |
|
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
|
|
PROJECT_DIR="$(dirname "$SCRIPT_DIR")" |
|
|
RESULTS_DIR="$SCRIPT_DIR/results" |
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S) |
|
|
RESULTS_FILE="$RESULTS_DIR/benchmark_results_$TIMESTAMP.txt" |
|
|
|
|
|
|
|
|
RED='\033[0;31m' |
|
|
GREEN='\033[0;32m' |
|
|
YELLOW='\033[1;33m' |
|
|
BLUE='\033[0;34m' |
|
|
NC='\033[0m' |
|
|
|
|
|
|
|
|
QUICK_MODE=false |
|
|
if [[ "$1" == "--quick" ]]; then |
|
|
QUICK_MODE=true |
|
|
echo -e "${YELLOW}Running in quick mode (abbreviated benchmarks)${NC}" |
|
|
fi |
|
|
|
|
|
|
|
|
mkdir -p "$RESULTS_DIR" |
|
|
|
|
|
echo "========================================================================" |
|
|
echo " HAT Benchmark Reproducibility Suite" |
|
|
echo " $(date)" |
|
|
echo "========================================================================" |
|
|
echo "" |
|
|
echo "Project directory: $PROJECT_DIR" |
|
|
echo "Results will be saved to: $RESULTS_FILE" |
|
|
echo "" |
|
|
|
|
|
|
|
|
cat > "$RESULTS_FILE" << EOF |
|
|
HAT Benchmark Results |
|
|
===================== |
|
|
Date: $(date) |
|
|
Host: $(hostname) |
|
|
Rust: $(rustc --version) |
|
|
Quick mode: $QUICK_MODE |
|
|
|
|
|
EOF |
|
|
|
|
|
cd "$PROJECT_DIR" |
|
|
|
|
|
|
|
|
run_benchmark() { |
|
|
local name="$1" |
|
|
local test_name="$2" |
|
|
|
|
|
echo -e "${BLUE}[$name]${NC} Running..." |
|
|
echo "" >> "$RESULTS_FILE" |
|
|
echo "=== $name ===" >> "$RESULTS_FILE" |
|
|
echo "" >> "$RESULTS_FILE" |
|
|
|
|
|
if cargo test --test "$test_name" -- --nocapture 2>&1 | tee -a "$RESULTS_FILE"; then |
|
|
echo -e "${GREEN}[$name]${NC} PASSED" |
|
|
else |
|
|
echo -e "${RED}[$name]${NC} FAILED" |
|
|
echo "FAILED" >> "$RESULTS_FILE" |
|
|
fi |
|
|
echo "" |
|
|
} |
|
|
|
|
|
echo "========================================================================" |
|
|
echo " Phase 1: Building Project" |
|
|
echo "========================================================================" |
|
|
|
|
|
echo "Building release version..." |
|
|
cargo build --release 2>&1 | tail -5 |
|
|
|
|
|
echo "Building test suite..." |
|
|
cargo build --tests 2>&1 | tail -5 |
|
|
|
|
|
echo "" |
|
|
echo "========================================================================" |
|
|
echo " Phase 2: Running Core Benchmarks" |
|
|
echo "========================================================================" |
|
|
|
|
|
|
|
|
echo "" |
|
|
echo "--- Phase 3.1: HAT vs HNSW Comparative Benchmark ---" |
|
|
run_benchmark "HAT vs HNSW" "phase31_hat_vs_hnsw" |
|
|
|
|
|
|
|
|
echo "" |
|
|
echo "--- Phase 3.2: Real Embedding Dimensions ---" |
|
|
run_benchmark "Real Embeddings" "phase32_real_embeddings" |
|
|
|
|
|
|
|
|
echo "" |
|
|
echo "--- Phase 3.3: Persistence Layer ---" |
|
|
run_benchmark "Persistence" "phase33_persistence" |
|
|
|
|
|
|
|
|
echo "" |
|
|
echo "--- Phase 4.2: Attention State Format ---" |
|
|
run_benchmark "Attention State" "phase42_attention_state" |
|
|
|
|
|
echo "" |
|
|
echo "========================================================================" |
|
|
echo " Phase 3: Python Integration Tests" |
|
|
echo "========================================================================" |
|
|
|
|
|
|
|
|
VENV_DIR="/tmp/arms-hat-bench-venv" |
|
|
|
|
|
if [[ ! -d "$VENV_DIR" ]]; then |
|
|
echo "Creating Python virtual environment..." |
|
|
python3 -m venv "$VENV_DIR" |
|
|
fi |
|
|
|
|
|
source "$VENV_DIR/bin/activate" |
|
|
|
|
|
|
|
|
echo "Installing Python dependencies..." |
|
|
pip install -q maturin pytest 2>/dev/null || true |
|
|
|
|
|
|
|
|
echo "Building Python extension..." |
|
|
maturin develop --features python 2>&1 | tail -3 |
|
|
|
|
|
|
|
|
echo "" |
|
|
echo "--- Python Binding Tests ---" |
|
|
echo "" >> "$RESULTS_FILE" |
|
|
echo "=== Python Binding Tests ===" >> "$RESULTS_FILE" |
|
|
echo "" >> "$RESULTS_FILE" |
|
|
|
|
|
if python -m pytest "$PROJECT_DIR/python/tests/" -v 2>&1 | tee -a "$RESULTS_FILE"; then |
|
|
echo -e "${GREEN}[Python Tests]${NC} PASSED" |
|
|
else |
|
|
echo -e "${RED}[Python Tests]${NC} FAILED" |
|
|
fi |
|
|
|
|
|
echo "" |
|
|
echo "========================================================================" |
|
|
echo " Phase 4: End-to-End Demo" |
|
|
echo "========================================================================" |
|
|
|
|
|
echo "" >> "$RESULTS_FILE" |
|
|
echo "=== End-to-End Demo ===" >> "$RESULTS_FILE" |
|
|
echo "" >> "$RESULTS_FILE" |
|
|
|
|
|
|
|
|
if pip show sentence-transformers >/dev/null 2>&1; then |
|
|
echo "Running end-to-end demo with real embeddings..." |
|
|
python "$PROJECT_DIR/examples/demo_hat_memory.py" 2>&1 | tee -a "$RESULTS_FILE" |
|
|
else |
|
|
echo "Installing sentence-transformers for full demo..." |
|
|
pip install -q sentence-transformers 2>/dev/null || true |
|
|
|
|
|
if pip show sentence-transformers >/dev/null 2>&1; then |
|
|
python "$PROJECT_DIR/examples/demo_hat_memory.py" 2>&1 | tee -a "$RESULTS_FILE" |
|
|
else |
|
|
echo "Running demo with pseudo-embeddings (sentence-transformers not available)..." |
|
|
python "$PROJECT_DIR/examples/demo_hat_memory.py" 2>&1 | tee -a "$RESULTS_FILE" |
|
|
fi |
|
|
fi |
|
|
|
|
|
deactivate |
|
|
|
|
|
echo "" |
|
|
echo "========================================================================" |
|
|
echo " Summary" |
|
|
echo "========================================================================" |
|
|
|
|
|
|
|
|
echo "" >> "$RESULTS_FILE" |
|
|
echo "=== Summary ===" >> "$RESULTS_FILE" |
|
|
echo "" >> "$RESULTS_FILE" |
|
|
|
|
|
|
|
|
RUST_PASSED=$(grep -c "test .* ok" "$RESULTS_FILE" 2>/dev/null || echo "0") |
|
|
PYTHON_PASSED=$(grep -c "PASSED" "$RESULTS_FILE" 2>/dev/null || echo "0") |
|
|
|
|
|
echo "Results saved to: $RESULTS_FILE" |
|
|
echo "" |
|
|
echo "Key Results:" |
|
|
echo " - Rust tests passed: ~$RUST_PASSED" |
|
|
echo " - Python tests passed: ~$PYTHON_PASSED" |
|
|
echo "" |
|
|
|
|
|
|
|
|
if grep -q "HAT enables 100% recall" "$RESULTS_FILE"; then |
|
|
echo -e "${GREEN}Core claim validated: 100% recall achieved${NC}" |
|
|
fi |
|
|
|
|
|
if grep -q "Average retrieval latency" "$RESULTS_FILE"; then |
|
|
LATENCY=$(grep "Average retrieval latency" "$RESULTS_FILE" | tail -1 | grep -oE '[0-9]+\.[0-9]+ms') |
|
|
echo " - Retrieval latency: $LATENCY" |
|
|
fi |
|
|
|
|
|
echo "" |
|
|
echo "========================================================================" |
|
|
echo " Benchmark Complete" |
|
|
echo "========================================================================" |
|
|
echo "" |
|
|
echo "Full results: $RESULTS_FILE" |
|
|
echo "" |
|
|
|