HAT / benchmarks /run_all_benchmarks.sh
Andrew Young
Upload folder using huggingface_hub
8ef2d83 verified
#!/bin/bash
#
# HAT Benchmark Reproducibility Suite
# ===================================
#
# This script runs all benchmarks from the HAT paper and generates
# a comprehensive results report.
#
# Usage:
# ./run_all_benchmarks.sh [--quick]
#
# Options:
# --quick Run abbreviated benchmarks (faster, less thorough)
#
# Requirements:
# - Rust toolchain (cargo)
# - Python 3.8+ with venv
# - ~2GB free disk space
# - ~10 minutes for full suite, ~2 minutes for quick
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
RESULTS_DIR="$SCRIPT_DIR/results"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
RESULTS_FILE="$RESULTS_DIR/benchmark_results_$TIMESTAMP.txt"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Parse arguments
QUICK_MODE=false
if [[ "$1" == "--quick" ]]; then
QUICK_MODE=true
echo -e "${YELLOW}Running in quick mode (abbreviated benchmarks)${NC}"
fi
# Create results directory
mkdir -p "$RESULTS_DIR"
echo "========================================================================"
echo " HAT Benchmark Reproducibility Suite"
echo " $(date)"
echo "========================================================================"
echo ""
echo "Project directory: $PROJECT_DIR"
echo "Results will be saved to: $RESULTS_FILE"
echo ""
# Initialize results file
cat > "$RESULTS_FILE" << EOF
HAT Benchmark Results
=====================
Date: $(date)
Host: $(hostname)
Rust: $(rustc --version)
Quick mode: $QUICK_MODE
EOF
cd "$PROJECT_DIR"
# Function to run a test and capture results
run_benchmark() {
local name="$1"
local test_name="$2"
echo -e "${BLUE}[$name]${NC} Running..."
echo "" >> "$RESULTS_FILE"
echo "=== $name ===" >> "$RESULTS_FILE"
echo "" >> "$RESULTS_FILE"
if cargo test --test "$test_name" -- --nocapture 2>&1 | tee -a "$RESULTS_FILE"; then
echo -e "${GREEN}[$name]${NC} PASSED"
else
echo -e "${RED}[$name]${NC} FAILED"
echo "FAILED" >> "$RESULTS_FILE"
fi
echo ""
}
echo "========================================================================"
echo " Phase 1: Building Project"
echo "========================================================================"
echo "Building release version..."
cargo build --release 2>&1 | tail -5
echo "Building test suite..."
cargo build --tests 2>&1 | tail -5
echo ""
echo "========================================================================"
echo " Phase 2: Running Core Benchmarks"
echo "========================================================================"
# Phase 3.1: HAT vs HNSW
echo ""
echo "--- Phase 3.1: HAT vs HNSW Comparative Benchmark ---"
run_benchmark "HAT vs HNSW" "phase31_hat_vs_hnsw"
# Phase 3.2: Real Embeddings
echo ""
echo "--- Phase 3.2: Real Embedding Dimensions ---"
run_benchmark "Real Embeddings" "phase32_real_embeddings"
# Phase 3.3: Persistence
echo ""
echo "--- Phase 3.3: Persistence Layer ---"
run_benchmark "Persistence" "phase33_persistence"
# Phase 4.2: Attention State
echo ""
echo "--- Phase 4.2: Attention State Format ---"
run_benchmark "Attention State" "phase42_attention_state"
echo ""
echo "========================================================================"
echo " Phase 3: Python Integration Tests"
echo "========================================================================"
# Check for Python venv
VENV_DIR="/tmp/arms-hat-bench-venv"
if [[ ! -d "$VENV_DIR" ]]; then
echo "Creating Python virtual environment..."
python3 -m venv "$VENV_DIR"
fi
source "$VENV_DIR/bin/activate"
# Install dependencies
echo "Installing Python dependencies..."
pip install -q maturin pytest 2>/dev/null || true
# Build Python extension
echo "Building Python extension..."
maturin develop --features python 2>&1 | tail -3
# Run Python tests
echo ""
echo "--- Python Binding Tests ---"
echo "" >> "$RESULTS_FILE"
echo "=== Python Binding Tests ===" >> "$RESULTS_FILE"
echo "" >> "$RESULTS_FILE"
if python -m pytest "$PROJECT_DIR/python/tests/" -v 2>&1 | tee -a "$RESULTS_FILE"; then
echo -e "${GREEN}[Python Tests]${NC} PASSED"
else
echo -e "${RED}[Python Tests]${NC} FAILED"
fi
echo ""
echo "========================================================================"
echo " Phase 4: End-to-End Demo"
echo "========================================================================"
echo "" >> "$RESULTS_FILE"
echo "=== End-to-End Demo ===" >> "$RESULTS_FILE"
echo "" >> "$RESULTS_FILE"
# Check for sentence-transformers
if pip show sentence-transformers >/dev/null 2>&1; then
echo "Running end-to-end demo with real embeddings..."
python "$PROJECT_DIR/examples/demo_hat_memory.py" 2>&1 | tee -a "$RESULTS_FILE"
else
echo "Installing sentence-transformers for full demo..."
pip install -q sentence-transformers 2>/dev/null || true
if pip show sentence-transformers >/dev/null 2>&1; then
python "$PROJECT_DIR/examples/demo_hat_memory.py" 2>&1 | tee -a "$RESULTS_FILE"
else
echo "Running demo with pseudo-embeddings (sentence-transformers not available)..."
python "$PROJECT_DIR/examples/demo_hat_memory.py" 2>&1 | tee -a "$RESULTS_FILE"
fi
fi
deactivate
echo ""
echo "========================================================================"
echo " Summary"
echo "========================================================================"
# Extract key metrics from results
echo "" >> "$RESULTS_FILE"
echo "=== Summary ===" >> "$RESULTS_FILE"
echo "" >> "$RESULTS_FILE"
# Count passed tests
RUST_PASSED=$(grep -c "test .* ok" "$RESULTS_FILE" 2>/dev/null || echo "0")
PYTHON_PASSED=$(grep -c "PASSED" "$RESULTS_FILE" 2>/dev/null || echo "0")
echo "Results saved to: $RESULTS_FILE"
echo ""
echo "Key Results:"
echo " - Rust tests passed: ~$RUST_PASSED"
echo " - Python tests passed: ~$PYTHON_PASSED"
echo ""
# Extract recall metrics if available
if grep -q "HAT enables 100% recall" "$RESULTS_FILE"; then
echo -e "${GREEN}Core claim validated: 100% recall achieved${NC}"
fi
if grep -q "Average retrieval latency" "$RESULTS_FILE"; then
LATENCY=$(grep "Average retrieval latency" "$RESULTS_FILE" | tail -1 | grep -oE '[0-9]+\.[0-9]+ms')
echo " - Retrieval latency: $LATENCY"
fi
echo ""
echo "========================================================================"
echo " Benchmark Complete"
echo "========================================================================"
echo ""
echo "Full results: $RESULTS_FILE"
echo ""