File size: 4,550 Bytes
55271ca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | #!/bin/bash
# ============================================================
# llama.cpp GGUF 批量量化脚本
# 用途:将F16 GGUF模型量化为Ollama主流格式
# ============================================================
# 配置区
# 配置区
LLAMA_CPP_DIR="/inspire/hdd/global_user/shixiaoxin-253107030017/sxx/llama.cpp"
QUANTIZE_BIN="${LLAMA_CPP_DIR}/build/bin/llama-quantize"
INPUT_DIR="./gguf_models"
OUTPUT_DIR="./gguf_quantized"
export LD_LIBRARY_PATH="${LLAMA_CPP_DIR}/build/bin:${LD_LIBRARY_PATH}"
# Ollama主流量化格式(按推荐程度排序)
# Q4_K_M: 最流行,平衡质量与大小(推荐)
# Q5_K_M: 质量更好,文件稍大
# Q8_0: 高质量,文件较大
# Q4_0: 基础4bit,兼容性好
# Q3_K_M: 更小文件,质量损失较大
QUANT_TYPES=("Q4_K_M" "Q5_K_M" "Q8_0")
# ============================================================
# 检查环境
# ============================================================
echo "🔧 检查环境..."
# 检查量化工具是否存在
if [ ! -f "$QUANTIZE_BIN" ]; then
echo "❌ 错误:找不到量化工具 $QUANTIZE_BIN"
echo "请先编译llama.cpp:"
echo " cd $LLAMA_CPP_DIR"
echo " mkdir -p build && cd build"
echo " cmake .. -DGGML_CUDA=ON"
echo " make -j$(nproc)"
exit 1
fi
# 检查输入目录
if [ ! -d "$INPUT_DIR" ]; then
echo "❌ 错误:输入目录不存在 $INPUT_DIR"
echo "请先运行convert_hf_to_gguf.py转换模型"
exit 1
fi
# 创建输出目录
mkdir -p "$OUTPUT_DIR"
echo "✅ 环境检查通过"
echo ""
# ============================================================
# 获取待量化模型列表
# ============================================================
echo "📂 扫描待量化模型..."
MODELS=($(find "$INPUT_DIR" -maxdepth 1 -name "*.gguf" -type f))
if [ ${#MODELS[@]} -eq 0 ]; then
echo "❌ 未找到任何.gguf文件在 $INPUT_DIR"
exit 1
fi
echo "✅ 找到 ${#MODELS[@]} 个模型待量化"
for m in "${MODELS[@]}"; do
echo " - $(basename "$m")"
done
echo ""
# ============================================================
# 批量量化
# ============================================================
echo "🚀 开始批量量化..."
echo "量化格式: ${QUANT_TYPES[*]}"
echo ""
TOTAL=$((${#MODELS[@]} * ${#QUANT_TYPES[@]}))
CURRENT=0
SUCCESS=0
FAILED=0
for MODEL_PATH in "${MODELS[@]}"; do
MODEL_NAME=$(basename "$MODEL_PATH" .gguf)
for QTYPE in "${QUANT_TYPES[@]}"; do
CURRENT=$((CURRENT + 1))
OUTPUT_FILE="${OUTPUT_DIR}/${MODEL_NAME}-${QTYPE}.gguf"
echo "=============================================="
echo "[$CURRENT/$TOTAL] 量化: ${MODEL_NAME} -> ${QTYPE}"
echo "输入: $MODEL_PATH"
echo "输出: $OUTPUT_FILE"
echo "=============================================="
# 跳过已存在的文件
if [ -f "$OUTPUT_FILE" ]; then
echo "⏭️ 文件已存在,跳过"
SUCCESS=$((SUCCESS + 1))
continue
fi
# 执行量化
"$QUANTIZE_BIN" "$MODEL_PATH" "$OUTPUT_FILE" "$QTYPE"
if [ $? -eq 0 ]; then
# 显示文件大小
SIZE=$(du -h "$OUTPUT_FILE" | cut -f1)
echo "✅ 成功!文件大小: $SIZE"
SUCCESS=$((SUCCESS + 1))
else
echo "❌ 量化失败"
FAILED=$((FAILED + 1))
fi
echo ""
done
done
# ============================================================
# 汇总报告
# ============================================================
echo ""
echo "=============================================="
echo "📊 批量量化完成"
echo "=============================================="
echo "✅ 成功: $SUCCESS"
echo "❌ 失败: $FAILED"
echo ""
echo "输出目录: $OUTPUT_DIR"
echo ""
# 列出所有生成的文件
echo "生成的模型文件:"
ls -lh "$OUTPUT_DIR"/*.gguf 2>/dev/null | awk '{print " " $9 " (" $5 ")"}'
echo ""
echo "=============================================="
echo "📦 Ollama发布指南"
echo "=============================================="
echo "1. 创建Modelfile:"
echo ' FROM ./your_model-Q4_K_M.gguf'
echo ' TEMPLATE """{{ .Prompt }}"""'
echo ' PARAMETER temperature 0.7'
echo ""
echo "2. 创建本地模型:"
echo " ollama create your-model-name -f Modelfile"
echo ""
echo "3. 推送到Ollama Hub:"
echo " ollama push username/model-name"
echo "=============================================="
|