#!/bin/bash # ============================================================ # llama.cpp GGUF 批量量化脚本 # 用途:将F16 GGUF模型量化为Ollama主流格式 # ============================================================ # 配置区 # 配置区 LLAMA_CPP_DIR="/inspire/hdd/global_user/shixiaoxin-253107030017/sxx/llama.cpp" QUANTIZE_BIN="${LLAMA_CPP_DIR}/build/bin/llama-quantize" INPUT_DIR="./gguf_models" OUTPUT_DIR="./gguf_quantized" export LD_LIBRARY_PATH="${LLAMA_CPP_DIR}/build/bin:${LD_LIBRARY_PATH}" # Ollama主流量化格式(按推荐程度排序) # Q4_K_M: 最流行,平衡质量与大小(推荐) # Q5_K_M: 质量更好,文件稍大 # Q8_0: 高质量,文件较大 # Q4_0: 基础4bit,兼容性好 # Q3_K_M: 更小文件,质量损失较大 QUANT_TYPES=("Q4_K_M" "Q5_K_M" "Q8_0") # ============================================================ # 检查环境 # ============================================================ echo "🔧 检查环境..." # 检查量化工具是否存在 if [ ! -f "$QUANTIZE_BIN" ]; then echo "❌ 错误:找不到量化工具 $QUANTIZE_BIN" echo "请先编译llama.cpp:" echo " cd $LLAMA_CPP_DIR" echo " mkdir -p build && cd build" echo " cmake .. -DGGML_CUDA=ON" echo " make -j$(nproc)" exit 1 fi # 检查输入目录 if [ ! -d "$INPUT_DIR" ]; then echo "❌ 错误:输入目录不存在 $INPUT_DIR" echo "请先运行convert_hf_to_gguf.py转换模型" exit 1 fi # 创建输出目录 mkdir -p "$OUTPUT_DIR" echo "✅ 环境检查通过" echo "" # ============================================================ # 获取待量化模型列表 # ============================================================ echo "📂 扫描待量化模型..." MODELS=($(find "$INPUT_DIR" -maxdepth 1 -name "*.gguf" -type f)) if [ ${#MODELS[@]} -eq 0 ]; then echo "❌ 未找到任何.gguf文件在 $INPUT_DIR" exit 1 fi echo "✅ 找到 ${#MODELS[@]} 个模型待量化" for m in "${MODELS[@]}"; do echo " - $(basename "$m")" done echo "" # ============================================================ # 批量量化 # ============================================================ echo "🚀 开始批量量化..." echo "量化格式: ${QUANT_TYPES[*]}" echo "" TOTAL=$((${#MODELS[@]} * ${#QUANT_TYPES[@]})) CURRENT=0 SUCCESS=0 FAILED=0 for MODEL_PATH in "${MODELS[@]}"; do MODEL_NAME=$(basename "$MODEL_PATH" .gguf) for QTYPE in "${QUANT_TYPES[@]}"; do CURRENT=$((CURRENT + 1)) OUTPUT_FILE="${OUTPUT_DIR}/${MODEL_NAME}-${QTYPE}.gguf" echo "==============================================" echo "[$CURRENT/$TOTAL] 量化: ${MODEL_NAME} -> ${QTYPE}" echo "输入: $MODEL_PATH" echo "输出: $OUTPUT_FILE" echo "==============================================" # 跳过已存在的文件 if [ -f "$OUTPUT_FILE" ]; then echo "⏭️ 文件已存在,跳过" SUCCESS=$((SUCCESS + 1)) continue fi # 执行量化 "$QUANTIZE_BIN" "$MODEL_PATH" "$OUTPUT_FILE" "$QTYPE" if [ $? -eq 0 ]; then # 显示文件大小 SIZE=$(du -h "$OUTPUT_FILE" | cut -f1) echo "✅ 成功!文件大小: $SIZE" SUCCESS=$((SUCCESS + 1)) else echo "❌ 量化失败" FAILED=$((FAILED + 1)) fi echo "" done done # ============================================================ # 汇总报告 # ============================================================ echo "" echo "==============================================" echo "📊 批量量化完成" echo "==============================================" echo "✅ 成功: $SUCCESS" echo "❌ 失败: $FAILED" echo "" echo "输出目录: $OUTPUT_DIR" echo "" # 列出所有生成的文件 echo "生成的模型文件:" ls -lh "$OUTPUT_DIR"/*.gguf 2>/dev/null | awk '{print " " $9 " (" $5 ")"}' echo "" echo "==============================================" echo "📦 Ollama发布指南" echo "==============================================" echo "1. 创建Modelfile:" echo ' FROM ./your_model-Q4_K_M.gguf' echo ' TEMPLATE """{{ .Prompt }}"""' echo ' PARAMETER temperature 0.7' echo "" echo "2. 创建本地模型:" echo " ollama create your-model-name -f Modelfile" echo "" echo "3. 推送到Ollama Hub:" echo " ollama push username/model-name" echo "=============================================="