#!/usr/bin/env bash
set -euo pipefail

repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
runner="$repo_root/script/run_llmpruner_llama.sh"

base_model="${BASE_MODEL:-meta-llama/Llama-3.1-8B}"
device="${DEVICE:-cpu}"
eval_device="${EVAL_DEVICE:-cuda}"
num_examples="${NUM_EXAMPLES:-10}"
model_tag="$(basename "$base_model" | tr '[:upper:]' '[:lower:]' | tr -c 'a-z0-9' '_')"

run_case() {
  local label="$1"
  local ratio="$2"
  shift 2

  echo "[LLM-Pruner] ${label}: PRUNING_RATIO=${ratio}"
  BASE_MODEL="$base_model" \
  PRUNE_CKPT_PATH="${model_tag}_${label}" \
  PRUNING_RATIO="$ratio" \
  DEVICE="$device" \
  EVAL_DEVICE="$eval_device" \
  bash "$runner" --num_examples "$num_examples" "$@"
}

# Equivalent block-only pruning scales for dropping layers from a 32-layer Llama-2 7B.
run_case "drop6eq" "0.23"
run_case "drop11eq" "0.45" 
run_case "drop16eq" "0.70" # Ratio = 54.3965%


# for llama2 7b
# run_case "drop6eq" "0.23"
# run_case "drop11eq" "0.42"
# run_case "drop16eq" "0.62"

# for llama3 8b
# run_case "drop6eq" "0.23"
# run_case "drop11eq" "0.45" 
# run_case "drop16eq" "0.70"


# "0.23" 86.1997%
# "0.48" 68.1928%
# "0.51" # 68.1928%
# "0.70" # 56.6762%


# llama 7b depth
# 18.02%
# 33.04%
# 48.05%

# llama 8b depth 
# 16.30%, 86.1997%
# 29.88%, 72.2934%
# 43.46%, 56.6762%