#!/usr/bin/env bash
set -euo pipefail

export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"

# meta-llama/Llama-3.2-3B
# meta-llama/Llama-2-7b-hf

repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
workdir="$repo_root/compare_model/LLM-Streamline"

pruned_blocks="${COMMON_PRUNED_BLOCKS:-14}"
layer_intervals="${LAYER_INTERVALS:-$pruned_blocks}"
output_dir="${OUTDIR:-$repo_root/results/llama2_7b_streamline_mse_common_${layer_intervals}}"
model_name="${MODEL_NAME:-meta-llama/Llama-3.2-3B}"
cosine_num_data="${COSINE_NUM_DATA:-300}"
train_num_data="${TRAIN_NUM_DATA:-5000}"
epochs="${EPOCHS:-15}"
batch_size="${BATCH_SIZE:-8}"
train_batch_size="${TRAIN_BATCH_SIZE:-$batch_size}"
grad_accum="${GRAD_ACCUM:-16}"
lr="${LR:-1e-5}"
min_lr="${MIN_LR:-5e-5}"
wd="${WD:-1e-3}"
dtype="${DTYPE:-bfloat16}"

python_args=(
  --model_name "$model_name"
  --output_dir "$output_dir"
  --layer_intervals "$layer_intervals"
  --cosine_num_data "$cosine_num_data"
  --train_num_data "$train_num_data"
  --epoches "$epochs"
  --batch_size "$batch_size"
  --train_batch_size "$train_batch_size"
  --dtype "$dtype"
  --gradient_accumulation_step "$grad_accum"
  --lr "$lr"
  --min_lr "$min_lr"
  --wd "$wd"
)
python_args+=("$@")

mkdir -p "$output_dir"
git_commit="unknown"
if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
  git_commit=$(git -C "$repo_root" rev-parse HEAD)
fi
{
  echo "git_commit=$git_commit"
  echo "command:"
  printf '%q ' python "$repo_root/compare_model/LLM-Streamline/mseloss_entry.py" "${python_args[@]}"
  echo
} > "$output_dir/run_args.txt"

cd "$workdir"
PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
python mseloss_entry.py "${python_args[@]}"