| | #!/bin/bash |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | LLAMA_CPP_DIR="/inspire/hdd/global_user/shixiaoxin-253107030017/sxx/llama.cpp" |
| | QUANTIZE_BIN="${LLAMA_CPP_DIR}/build/bin/llama-quantize" |
| | INPUT_DIR="./gguf_models" |
| | OUTPUT_DIR="./gguf_quantized" |
| |
|
| | export LD_LIBRARY_PATH="${LLAMA_CPP_DIR}/build/bin:${LD_LIBRARY_PATH}" |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | QUANT_TYPES=("Q4_K_M" "Q5_K_M" "Q8_0") |
| |
|
| | |
| | |
| | |
| | echo "🔧 检查环境..." |
| |
|
| | |
| | if [ ! -f "$QUANTIZE_BIN" ]; then |
| | echo "❌ 错误:找不到量化工具 $QUANTIZE_BIN" |
| | echo "请先编译llama.cpp:" |
| | echo " cd $LLAMA_CPP_DIR" |
| | echo " mkdir -p build && cd build" |
| | echo " cmake .. -DGGML_CUDA=ON" |
| | echo " make -j$(nproc)" |
| | exit 1 |
| | fi |
| |
|
| | |
| | if [ ! -d "$INPUT_DIR" ]; then |
| | echo "❌ 错误:输入目录不存在 $INPUT_DIR" |
| | echo "请先运行convert_hf_to_gguf.py转换模型" |
| | exit 1 |
| | fi |
| |
|
| | |
| | mkdir -p "$OUTPUT_DIR" |
| |
|
| | echo "✅ 环境检查通过" |
| | echo "" |
| |
|
| | |
| | |
| | |
| | echo "📂 扫描待量化模型..." |
| |
|
| | MODELS=($(find "$INPUT_DIR" -maxdepth 1 -name "*.gguf" -type f)) |
| |
|
| | if [ ${#MODELS[@]} -eq 0 ]; then |
| | echo "❌ 未找到任何.gguf文件在 $INPUT_DIR" |
| | exit 1 |
| | fi |
| |
|
| | echo "✅ 找到 ${#MODELS[@]} 个模型待量化" |
| | for m in "${MODELS[@]}"; do |
| | echo " - $(basename "$m")" |
| | done |
| | echo "" |
| |
|
| | |
| | |
| | |
| | echo "🚀 开始批量量化..." |
| | echo "量化格式: ${QUANT_TYPES[*]}" |
| | echo "" |
| |
|
| | TOTAL=$((${#MODELS[@]} * ${#QUANT_TYPES[@]})) |
| | CURRENT=0 |
| | SUCCESS=0 |
| | FAILED=0 |
| |
|
| | for MODEL_PATH in "${MODELS[@]}"; do |
| | MODEL_NAME=$(basename "$MODEL_PATH" .gguf) |
| | |
| | for QTYPE in "${QUANT_TYPES[@]}"; do |
| | CURRENT=$((CURRENT + 1)) |
| | OUTPUT_FILE="${OUTPUT_DIR}/${MODEL_NAME}-${QTYPE}.gguf" |
| | |
| | echo "==============================================" |
| | echo "[$CURRENT/$TOTAL] 量化: ${MODEL_NAME} -> ${QTYPE}" |
| | echo "输入: $MODEL_PATH" |
| | echo "输出: $OUTPUT_FILE" |
| | echo "==============================================" |
| | |
| | |
| | if [ -f "$OUTPUT_FILE" ]; then |
| | echo "⏭️ 文件已存在,跳过" |
| | SUCCESS=$((SUCCESS + 1)) |
| | continue |
| | fi |
| | |
| | |
| | "$QUANTIZE_BIN" "$MODEL_PATH" "$OUTPUT_FILE" "$QTYPE" |
| | |
| | if [ $? -eq 0 ]; then |
| | |
| | SIZE=$(du -h "$OUTPUT_FILE" | cut -f1) |
| | echo "✅ 成功!文件大小: $SIZE" |
| | SUCCESS=$((SUCCESS + 1)) |
| | else |
| | echo "❌ 量化失败" |
| | FAILED=$((FAILED + 1)) |
| | fi |
| | echo "" |
| | done |
| | done |
| |
|
| | |
| | |
| | |
| | echo "" |
| | echo "==============================================" |
| | echo "📊 批量量化完成" |
| | echo "==============================================" |
| | echo "✅ 成功: $SUCCESS" |
| | echo "❌ 失败: $FAILED" |
| | echo "" |
| | echo "输出目录: $OUTPUT_DIR" |
| | echo "" |
| |
|
| | |
| | echo "生成的模型文件:" |
| | ls -lh "$OUTPUT_DIR"/*.gguf 2>/dev/null | awk '{print " " $9 " (" $5 ")"}' |
| |
|
| | echo "" |
| | echo "==============================================" |
| | echo "📦 Ollama发布指南" |
| | echo "==============================================" |
| | echo "1. 创建Modelfile:" |
| | echo ' FROM ./your_model-Q4_K_M.gguf' |
| | echo ' TEMPLATE """{{ .Prompt }}"""' |
| | echo ' PARAMETER temperature 0.7' |
| | echo "" |
| | echo "2. 创建本地模型:" |
| | echo " ollama create your-model-name -f Modelfile" |
| | echo "" |
| | echo "3. 推送到Ollama Hub:" |
| | echo " ollama push username/model-name" |
| | echo "==============================================" |
| |
|