#!/usr/bin/env bash

set -euo pipefail

recipes=(
  "
  MIX=Q5_K
  TYPE_FFN_GATE_UP_EXPS=IQ3_S
  TYPE_FFN_DOWN_EXPS=Q5_K
  TYPE_DEFAULT=Q8_0
  "

  "
  MIX=Q4_K
  TYPE_FFN_GATE_UP_EXPS=IQ3_S
  TYPE_FFN_DOWN_EXPS=Q4_K
  TYPE_DEFAULT=Q8_0
  "
  )

# Validate that 2 or 3 arguments are provided
if [ $# -lt 2 ] || [ $# -gt 3 ]; then
  echo "Error: Exactly 2 arguments required (plus optional --dry-run)."
  echo "Usage: $0 <llama_cpp_dir> <quant_type> [--dry-run]"
  echo "Example: $0 ~/code/llama.cpp IQ4_XS"
  echo "Example: $0 ~/code/llama.cpp IQ4_XS --dry-run"
  exit 1
fi

# Assign arguments to variables for clarity
LLAMA_CPP_DIR="$1"
QUANT_TYPE="$2"

# Handle optional --dry-run argument
DRY_RUN=false
if [ $# -eq 3 ]; then
  if [ "$3" != "--dry-run" ]; then
    echo "Error: Unexpected third argument: $3"
    echo "Usage: $0 <llama_cpp_dir> <quant_type> [--dry-run]"
    exit 1
  fi
  DRY_RUN=true
fi

# Validate that the llama.cpp directory exists
if [ ! -d "$LLAMA_CPP_DIR" ]; then
  echo "Error: llama.cpp directory not found: $LLAMA_CPP_DIR"
  exit 1
fi

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(cd $SCRIPT_DIR/.. && pwd)"

# Validate that the BF16 directory exists
BF16_DIR="$PROJECT_DIR/BF16"
if [ ! -d "$BF16_DIR" ]; then
  echo "Error: BF16 directory not found: $BF16_DIR"
  exit 1
fi

# Discover the input GGUF file using glob
# We look for files matching *BF16*.gguf in the BF16 directory.
# If split, we select the first part (00001) via sorting.
INPUT_GGUF=$(find "$BF16_DIR" -maxdepth 1 -name "*BF16*.gguf" -type f | sort | head -n 1)

if [ -z "$INPUT_GGUF" ]; then
  echo "Error: No BF16 GGUF files found in $BF16_DIR"
  echo "Expected pattern: *BF16*.gguf"
  exit 1
fi

echo "Found input file: $INPUT_GGUF"

# Extract model name from filename for output naming (strips -BF16... suffix)
# Example: Qwen3.5-122B-A10B-BF16-00001-of-00003.gguf -> Qwen3.5-122B-A10B
MODEL_NAME=$(basename "$INPUT_GGUF" | sed 's/-BF16.*\.gguf//')

# Validate that the imatrix file exists (required for this quantization strategy)
IMATRIX_PATH="$PROJECT_DIR/imatrix.gguf"
if [ ! -e "$IMATRIX_PATH" ]; then
  echo "Error: imatrix file not found: $IMATRIX_PATH"
  echo "Please generate imatrix.gguf before running quantization."
  exit 1
fi

# Validate that the required binaries exist
QUANTIZE_BIN="$LLAMA_CPP_DIR/build/bin/llama-quantize"
SPLIT_BIN="$LLAMA_CPP_DIR/build/bin/llama-gguf-split"

if [ ! -x "$QUANTIZE_BIN" ]; then
  echo "Error: llama-quantize binary not found: $QUANTIZE_BIN"
  exit 1
fi

if [ ! -x "$SPLIT_BIN" ]; then
  echo "Error: llama-gguf-split binary not found: $SPLIT_BIN"
  exit 1
fi

# Derive output filenames
INTERMEDIATE_OUTPUT="$PROJECT_DIR/${MODEL_NAME}-${QUANT_TYPE}.gguf"

# Check if intermediate output already exists to prevent accidental overwrite
# Skip this check if dry-run is enabled
if [ "$DRY_RUN" = false ] && [ -e "$INTERMEDIATE_OUTPUT" ]; then
  echo "Error: Intermediate output already exists: $INTERMEDIATE_OUTPUT"
  exit 1
fi

echo "Starting quantization..."

# Determine dry-run argument
DRY_RUN_ARG=""
if [ "$DRY_RUN" = true ]; then
  DRY_RUN_ARG="--dry-run"
fi

# Run quantization
for recipe in "${recipes[@]}"; do
  MIX=
  TYPE_FFN_GATE_UP_EXPS=
  TYPE_FFN_DOWN_EXPS=
  TYPE_TOKEN_EMBEDDING=
  TYPE_OUTPUT=
  TYPE_DEFAULT=

  eval "$recipe"

  if [ "$MIX" != "$QUANT_TYPE" ]; then
    continue
  fi

  if [ -z "${TYPE_DEFAULT}" ]; then
    echo "TYPE_DEFAULT not defined for recipe $MIX!" >&2
    exit 1
  fi

  TYPE_ARGS=()

  if [ -n "${TYPE_FFN_GATE_UP_EXPS:-}" ]; then
    TYPE_ARGS+=(
    "--tensor-type" "ffn_gate_up_exps=${TYPE_FFN_GATE_UP_EXPS}"
    "--tensor-type" "ffn_gate_exps=${TYPE_FFN_GATE_UP_EXPS}"
    "--tensor-type" "ffn_up_exps=${TYPE_FFN_GATE_UP_EXPS}"
  )
  fi

  if [ -n "${TYPE_FFN_DOWN_EXPS:-}" ]; then
    TYPE_ARGS+=("--tensor-type" "ffn_down_exps=${TYPE_FFN_DOWN_EXPS}")
  fi

  if [ -n "${TYPE_OUTPUT:-}" ]; then
    TYPE_ARGS+=("--output-tensor-type" "${TYPE_OUTPUT}")
  fi

  if [ -n "${TYPE_TOKEN_EMBEDDING:-}" ]; then
    TYPE_ARGS+=("--token-embedding-type" "${TYPE_TOKEN_EMBEDDING}")
  fi

  "$QUANTIZE_BIN" \
    $DRY_RUN_ARG \
    "${TYPE_ARGS[@]}" \
    --imatrix "$IMATRIX_PATH" \
    "$INPUT_GGUF" \
    "$INTERMEDIATE_OUTPUT" \
    $TYPE_DEFAULT

  if [ "$DRY_RUN" = false ]; then
    echo "Starting split..."
    OUTPUT_DIR="${PROJECT_DIR}/$QUANT_TYPE"
    mkdir -p $OUTPUT_DIR

    OUTPUT_PREFIX="${OUTPUT_DIR}/${MODEL_NAME}-${QUANT_TYPE}"

    # Run split
    "$SPLIT_BIN" \
      --split-max-size 42G \
      --no-tensor-first-split \
      "$INTERMEDIATE_OUTPUT" \
      "$OUTPUT_PREFIX"

    # Cleanup intermediate file
    rm -f "$INTERMEDIATE_OUTPUT"

    echo "Quantization complete. Output saved to: $OUTPUT_DIR"
  fi

  exit 0
done

echo "Quantization recipe $QUANT_TYPE not found!" >&2
exit 1