File size: 4,827 Bytes
c364566
 
 
 
 
 
ce4aeca
c364566
 
 
 
 
 
ce4aeca
c364566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/env bash

set -euo pipefail

recipes=(
  "
  MIX=Q5_K
  TYPE_FFN_GATE_UP_EXPS=IQ3_S
  TYPE_FFN_DOWN_EXPS=Q5_K
  TYPE_DEFAULT=Q8_0
  "

  "
  MIX=Q4_K
  TYPE_FFN_GATE_UP_EXPS=IQ3_S
  TYPE_FFN_DOWN_EXPS=Q4_K
  TYPE_DEFAULT=Q8_0
  "
  )

# Validate that 2 or 3 arguments are provided
if [ $# -lt 2 ] || [ $# -gt 3 ]; then
  echo "Error: Exactly 2 arguments required (plus optional --dry-run)."
  echo "Usage: $0 <llama_cpp_dir> <quant_type> [--dry-run]"
  echo "Example: $0 ~/code/llama.cpp IQ4_XS"
  echo "Example: $0 ~/code/llama.cpp IQ4_XS --dry-run"
  exit 1
fi

# Assign arguments to variables for clarity
LLAMA_CPP_DIR="$1"
QUANT_TYPE="$2"

# Handle optional --dry-run argument
DRY_RUN=false
if [ $# -eq 3 ]; then
  if [ "$3" != "--dry-run" ]; then
    echo "Error: Unexpected third argument: $3"
    echo "Usage: $0 <llama_cpp_dir> <quant_type> [--dry-run]"
    exit 1
  fi
  DRY_RUN=true
fi

# Validate that the llama.cpp directory exists
if [ ! -d "$LLAMA_CPP_DIR" ]; then
  echo "Error: llama.cpp directory not found: $LLAMA_CPP_DIR"
  exit 1
fi

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(cd $SCRIPT_DIR/.. && pwd)"

# Validate that the BF16 directory exists
BF16_DIR="$PROJECT_DIR/BF16"
if [ ! -d "$BF16_DIR" ]; then
  echo "Error: BF16 directory not found: $BF16_DIR"
  exit 1
fi

# Discover the input GGUF file using glob
# We look for files matching *BF16*.gguf in the BF16 directory.
# If split, we select the first part (00001) via sorting.
INPUT_GGUF=$(find "$BF16_DIR" -maxdepth 1 -name "*BF16*.gguf" -type f | sort | head -n 1)

if [ -z "$INPUT_GGUF" ]; then
  echo "Error: No BF16 GGUF files found in $BF16_DIR"
  echo "Expected pattern: *BF16*.gguf"
  exit 1
fi

echo "Found input file: $INPUT_GGUF"

# Extract model name from filename for output naming (strips -BF16... suffix)
# Example: Qwen3.5-122B-A10B-BF16-00001-of-00003.gguf -> Qwen3.5-122B-A10B
MODEL_NAME=$(basename "$INPUT_GGUF" | sed 's/-BF16.*\.gguf//')

# Validate that the imatrix file exists (required for this quantization strategy)
IMATRIX_PATH="$PROJECT_DIR/imatrix.gguf"
if [ ! -e "$IMATRIX_PATH" ]; then
  echo "Error: imatrix file not found: $IMATRIX_PATH"
  echo "Please generate imatrix.gguf before running quantization."
  exit 1
fi

# Validate that the required binaries exist
QUANTIZE_BIN="$LLAMA_CPP_DIR/build/bin/llama-quantize"
SPLIT_BIN="$LLAMA_CPP_DIR/build/bin/llama-gguf-split"

if [ ! -x "$QUANTIZE_BIN" ]; then
  echo "Error: llama-quantize binary not found: $QUANTIZE_BIN"
  exit 1
fi

if [ ! -x "$SPLIT_BIN" ]; then
  echo "Error: llama-gguf-split binary not found: $SPLIT_BIN"
  exit 1
fi

# Derive output filenames
INTERMEDIATE_OUTPUT="$PROJECT_DIR/${MODEL_NAME}-${QUANT_TYPE}.gguf"

# Check if intermediate output already exists to prevent accidental overwrite
# Skip this check if dry-run is enabled
if [ "$DRY_RUN" = false ] && [ -e "$INTERMEDIATE_OUTPUT" ]; then
  echo "Error: Intermediate output already exists: $INTERMEDIATE_OUTPUT"
  exit 1
fi

echo "Starting quantization..."

# Determine dry-run argument
DRY_RUN_ARG=""
if [ "$DRY_RUN" = true ]; then
  DRY_RUN_ARG="--dry-run"
fi

# Run quantization
for recipe in "${recipes[@]}"; do
  MIX=
  TYPE_FFN_GATE_UP_EXPS=
  TYPE_FFN_DOWN_EXPS=
  TYPE_TOKEN_EMBEDDING=
  TYPE_OUTPUT=
  TYPE_DEFAULT=

  eval "$recipe"

  if [ "$MIX" != "$QUANT_TYPE" ]; then
    continue
  fi

  if [ -z "${TYPE_DEFAULT}" ]; then
    echo "TYPE_DEFAULT not defined for recipe $MIX!" >&2
    exit 1
  fi

  TYPE_ARGS=()

  if [ -n "${TYPE_FFN_GATE_UP_EXPS:-}" ]; then
    TYPE_ARGS+=(
    "--tensor-type" "ffn_gate_up_exps=${TYPE_FFN_GATE_UP_EXPS}"
    "--tensor-type" "ffn_gate_exps=${TYPE_FFN_GATE_UP_EXPS}"
    "--tensor-type" "ffn_up_exps=${TYPE_FFN_GATE_UP_EXPS}"
  )
  fi

  if [ -n "${TYPE_FFN_DOWN_EXPS:-}" ]; then
    TYPE_ARGS+=("--tensor-type" "ffn_down_exps=${TYPE_FFN_DOWN_EXPS}")
  fi

  if [ -n "${TYPE_OUTPUT:-}" ]; then
    TYPE_ARGS+=("--output-tensor-type" "${TYPE_OUTPUT}")
  fi

  if [ -n "${TYPE_TOKEN_EMBEDDING:-}" ]; then
    TYPE_ARGS+=("--token-embedding-type" "${TYPE_TOKEN_EMBEDDING}")
  fi

  "$QUANTIZE_BIN" \
    $DRY_RUN_ARG \
    "${TYPE_ARGS[@]}" \
    --imatrix "$IMATRIX_PATH" \
    "$INPUT_GGUF" \
    "$INTERMEDIATE_OUTPUT" \
    $TYPE_DEFAULT

  if [ "$DRY_RUN" = false ]; then
    echo "Starting split..."
    OUTPUT_DIR="${PROJECT_DIR}/$QUANT_TYPE"
    mkdir -p $OUTPUT_DIR

    OUTPUT_PREFIX="${OUTPUT_DIR}/${MODEL_NAME}-${QUANT_TYPE}"

    # Run split
    "$SPLIT_BIN" \
      --split-max-size 42G \
      --no-tensor-first-split \
      "$INTERMEDIATE_OUTPUT" \
      "$OUTPUT_PREFIX"

    # Cleanup intermediate file
    rm -f "$INTERMEDIATE_OUTPUT"

    echo "Quantization complete. Output saved to: $OUTPUT_DIR"
  fi

  exit 0
done

echo "Quantization recipe $QUANT_TYPE not found!" >&2
exit 1