davezaxh commited on
Commit
d4e1b97
·
verified ·
1 Parent(s): d1cc352

Upload scripts/quantizer.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. scripts/quantizer.sh +40 -0
scripts/quantizer.sh ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+
4
+ # Simple quantization script for llama.cpp models
5
+ # Usage: ./quantizer.sh <imatrix_file> <input_model> [quant_type]
6
+
7
+ IMATRIX="${1:?Error: imatrix file required}"
8
+ INPUT_MODEL="${2:?Error: input model required}"
9
+ QUANT_TYPE="${3:-Q4_K_M}"
10
+
11
+ # Extract base name for output
12
+ BASE_NAME=$(basename "$INPUT_MODEL" .gguf)
13
+ OUTPUT_MODEL="${BASE_NAME}-${QUANT_TYPE}.gguf"
14
+
15
+ # Find llama-quantize binary
16
+ if command -v llama-quantize &>/dev/null; then
17
+ QUANTIZE_BIN="llama-quantize"
18
+ elif [ -f "$HOME/llama.cpp/build/bin/llama-quantize" ]; then
19
+ QUANTIZE_BIN="$HOME/llama.cpp/build/bin/llama-quantize"
20
+ else
21
+ echo "Error: llama-quantize not found"
22
+ echo "Install llama.cpp or add it to PATH"
23
+ exit 1
24
+ fi
25
+
26
+ echo "Quantizing model..."
27
+ echo " Input: $INPUT_MODEL"
28
+ echo " Output: $OUTPUT_MODEL"
29
+ echo " Type: $QUANT_TYPE"
30
+ echo " Imatrix: $IMATRIX"
31
+ echo ""
32
+
33
+ "$QUANTIZE_BIN" \
34
+ --imatrix "$IMATRIX" \
35
+ "$INPUT_MODEL" \
36
+ "$OUTPUT_MODEL" \
37
+ "$QUANT_TYPE"
38
+
39
+ echo ""
40
+ echo "Done! Output: $OUTPUT_MODEL"