SimpleTool / llama-cpp-quantize.sh
Cialtion's picture
Upload llama-cpp-quantize.sh with huggingface_hub
55271ca verified
raw
history blame
4.55 kB
#!/bin/bash
# ============================================================
# llama.cpp GGUF 批量量化脚本
# 用途:将F16 GGUF模型量化为Ollama主流格式
# ============================================================
# 配置区
# 配置区
LLAMA_CPP_DIR="/inspire/hdd/global_user/shixiaoxin-253107030017/sxx/llama.cpp"
QUANTIZE_BIN="${LLAMA_CPP_DIR}/build/bin/llama-quantize"
INPUT_DIR="./gguf_models"
OUTPUT_DIR="./gguf_quantized"
export LD_LIBRARY_PATH="${LLAMA_CPP_DIR}/build/bin:${LD_LIBRARY_PATH}"
# Ollama主流量化格式(按推荐程度排序)
# Q4_K_M: 最流行,平衡质量与大小(推荐)
# Q5_K_M: 质量更好,文件稍大
# Q8_0: 高质量,文件较大
# Q4_0: 基础4bit,兼容性好
# Q3_K_M: 更小文件,质量损失较大
QUANT_TYPES=("Q4_K_M" "Q5_K_M" "Q8_0")
# ============================================================
# 检查环境
# ============================================================
echo "🔧 检查环境..."
# 检查量化工具是否存在
if [ ! -f "$QUANTIZE_BIN" ]; then
echo "❌ 错误:找不到量化工具 $QUANTIZE_BIN"
echo "请先编译llama.cpp:"
echo " cd $LLAMA_CPP_DIR"
echo " mkdir -p build && cd build"
echo " cmake .. -DGGML_CUDA=ON"
echo " make -j$(nproc)"
exit 1
fi
# 检查输入目录
if [ ! -d "$INPUT_DIR" ]; then
echo "❌ 错误:输入目录不存在 $INPUT_DIR"
echo "请先运行convert_hf_to_gguf.py转换模型"
exit 1
fi
# 创建输出目录
mkdir -p "$OUTPUT_DIR"
echo "✅ 环境检查通过"
echo ""
# ============================================================
# 获取待量化模型列表
# ============================================================
echo "📂 扫描待量化模型..."
MODELS=($(find "$INPUT_DIR" -maxdepth 1 -name "*.gguf" -type f))
if [ ${#MODELS[@]} -eq 0 ]; then
echo "❌ 未找到任何.gguf文件在 $INPUT_DIR"
exit 1
fi
echo "✅ 找到 ${#MODELS[@]} 个模型待量化"
for m in "${MODELS[@]}"; do
echo " - $(basename "$m")"
done
echo ""
# ============================================================
# 批量量化
# ============================================================
echo "🚀 开始批量量化..."
echo "量化格式: ${QUANT_TYPES[*]}"
echo ""
TOTAL=$((${#MODELS[@]} * ${#QUANT_TYPES[@]}))
CURRENT=0
SUCCESS=0
FAILED=0
for MODEL_PATH in "${MODELS[@]}"; do
MODEL_NAME=$(basename "$MODEL_PATH" .gguf)
for QTYPE in "${QUANT_TYPES[@]}"; do
CURRENT=$((CURRENT + 1))
OUTPUT_FILE="${OUTPUT_DIR}/${MODEL_NAME}-${QTYPE}.gguf"
echo "=============================================="
echo "[$CURRENT/$TOTAL] 量化: ${MODEL_NAME} -> ${QTYPE}"
echo "输入: $MODEL_PATH"
echo "输出: $OUTPUT_FILE"
echo "=============================================="
# 跳过已存在的文件
if [ -f "$OUTPUT_FILE" ]; then
echo "⏭️ 文件已存在,跳过"
SUCCESS=$((SUCCESS + 1))
continue
fi
# 执行量化
"$QUANTIZE_BIN" "$MODEL_PATH" "$OUTPUT_FILE" "$QTYPE"
if [ $? -eq 0 ]; then
# 显示文件大小
SIZE=$(du -h "$OUTPUT_FILE" | cut -f1)
echo "✅ 成功!文件大小: $SIZE"
SUCCESS=$((SUCCESS + 1))
else
echo "❌ 量化失败"
FAILED=$((FAILED + 1))
fi
echo ""
done
done
# ============================================================
# 汇总报告
# ============================================================
echo ""
echo "=============================================="
echo "📊 批量量化完成"
echo "=============================================="
echo "✅ 成功: $SUCCESS"
echo "❌ 失败: $FAILED"
echo ""
echo "输出目录: $OUTPUT_DIR"
echo ""
# 列出所有生成的文件
echo "生成的模型文件:"
ls -lh "$OUTPUT_DIR"/*.gguf 2>/dev/null | awk '{print " " $9 " (" $5 ")"}'
echo ""
echo "=============================================="
echo "📦 Ollama发布指南"
echo "=============================================="
echo "1. 创建Modelfile:"
echo ' FROM ./your_model-Q4_K_M.gguf'
echo ' TEMPLATE """{{ .Prompt }}"""'
echo ' PARAMETER temperature 0.7'
echo ""
echo "2. 创建本地模型:"
echo " ollama create your-model-name -f Modelfile"
echo ""
echo "3. 推送到Ollama Hub:"
echo " ollama push username/model-name"
echo "=============================================="