frankenstallm / source /scripts /convert_3b_gguf.sh
pathcosmos's picture
Upload folder using huggingface_hub (#17)
48ecd01
#!/usr/bin/env bash
# =============================================================================
# convert_3b_gguf.sh β€” 3B λͺ¨λΈ HuggingFace β†’ GGUF λ³€ν™˜ + 닀쀑 μ–‘μžν™”
#
# Usage:
# bash scripts/convert_3b_gguf.sh [options]
#
# Options:
# --input_dir DIR HF 포맷 λͺ¨λΈ 디렉토리 (default: outputs/hf_korean_3b_orpo)
# --out_dir DIR GGUF 좜λ ₯ 디렉토리 (default: outputs/gguf)
# --checkpoint DIR μ»€μŠ€ν…€ 체크포인트 디렉토리 (μ§€μ • μ‹œ HF λ³€ν™˜ μ„ ν–‰ μ‹€ν–‰)
# --skip_hf_conv HF λ³€ν™˜ 단계 κ±΄λ„ˆλœ€ (이미 HF 포맷 쑴재 μ‹œ)
# --skip_quant μ–‘μžν™” 단계 κ±΄λ„ˆλœ€ (F16 GGUF만 생성)
#
# Pipeline:
# 1. [선택] μ»€μŠ€ν…€ 체크포인트 β†’ HF transformers 포맷 (convert_to_hf.py)
# 2. HF β†’ F16 GGUF (llama.cpp/convert_hf_to_gguf.py)
# 3. F16 GGUF β†’ Q4_K_M, Q5_K_M, Q8_0 μ–‘μžν™” (llama-quantize)
#
# Outputs:
# outputs/gguf/frankenstallm-3b-f16.gguf
# outputs/gguf/frankenstallm-3b-Q4_K_M.gguf β€” ꢌμž₯ (Ollama용)
# outputs/gguf/frankenstallm-3b-Q5_K_M.gguf
# outputs/gguf/frankenstallm-3b-Q8_0.gguf
#
# μ „μ œ 쑰건:
# - python scripts/convert_to_hf.py 둜 HF λ³€ν™˜ μ™„λ£Œ (λ˜λŠ” --checkpoint μ˜΅μ…˜)
# - git, cmake, make μ„€μΉ˜
# - pip install safetensors
# =============================================================================
set -euo pipefail
# ---------------------------------------------------------------------------
# 인자 νŒŒμ‹±
# ---------------------------------------------------------------------------
INPUT_DIR="outputs/hf_korean_3b_orpo"
OUT_DIR="outputs/gguf"
CHECKPOINT_DIR=""
SKIP_HF_CONV=false
SKIP_QUANT=false
while [[ $# -gt 0 ]]; do
case "$1" in
--input_dir) INPUT_DIR="$2"; shift 2 ;;
--out_dir) OUT_DIR="$2"; shift 2 ;;
--checkpoint) CHECKPOINT_DIR="$2"; shift 2 ;;
--skip_hf_conv) SKIP_HF_CONV=true; shift ;;
--skip_quant) SKIP_QUANT=true; shift ;;
-h|--help)
grep '^#' "$0" | head -40 | sed 's/^# \{0,1\}//'
exit 0 ;;
*)
echo "ERROR: μ•Œ 수 μ—†λŠ” μ˜΅μ…˜: $1"
echo "Usage: bash scripts/convert_3b_gguf.sh [--input_dir DIR] [--out_dir DIR] [--checkpoint DIR] [--skip_hf_conv] [--skip_quant]"
exit 1 ;;
esac
done
PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
LLAMA_CPP_DIR="${LLAMA_CPP_DIR:-$PROJECT_DIR/outputs/llama.cpp}"
MODEL_NAME="frankenstallm-3b"
cd "$PROJECT_DIR"
echo "=================================================================="
echo " 3B λͺ¨λΈ GGUF λ³€ν™˜ νŒŒμ΄ν”„λΌμΈ"
echo " μž…λ ₯ HF 디렉토리 : $INPUT_DIR"
echo " GGUF 좜λ ₯ 디렉토리: $OUT_DIR"
echo " llama.cpp 경둜 : $LLAMA_CPP_DIR"
echo "=================================================================="
echo ""
# ---------------------------------------------------------------------------
# Step 0: llama.cpp 쑴재 μ—¬λΆ€ 확인 / 클둠
# ---------------------------------------------------------------------------
if [[ ! -d "$LLAMA_CPP_DIR" ]]; then
echo "[SETUP] llama.cpp 디렉토리가 μ—†μŠ΅λ‹ˆλ‹€."
echo " λ‹€μŒ λͺ…λ ΉμœΌλ‘œ μ„€μΉ˜ν•˜μ„Έμš”:"
echo ""
echo " git clone --depth 1 https://github.com/ggerganov/llama.cpp $LLAMA_CPP_DIR"
echo ""
echo " λ˜λŠ” LLAMA_CPP_DIR ν™˜κ²½λ³€μˆ˜λ‘œ κΈ°μ‘΄ 경둜λ₯Ό μ§€μ •ν•˜μ„Έμš”:"
echo " LLAMA_CPP_DIR=/path/to/llama.cpp bash scripts/convert_3b_gguf.sh"
echo ""
read -r -p "μ§€κΈˆ μžλ™ ν΄λ‘ ν•˜μ‹œκ² μŠ΅λ‹ˆκΉŒ? [y/N] " _yn
if [[ "${_yn:-N}" =~ ^[Yy]$ ]]; then
echo "Cloning llama.cpp ..."
git clone --depth 1 https://github.com/ggerganov/llama.cpp "$LLAMA_CPP_DIR"
else
echo "μ€‘λ‹¨ν•©λ‹ˆλ‹€. llama.cppλ₯Ό μ„€μΉ˜ν•œ λ’€ λ‹€μ‹œ μ‹€ν–‰ν•˜μ„Έμš”."
exit 1
fi
fi
# llama.cpp Python μ˜μ‘΄μ„±
echo "[SETUP] llama.cpp Python μ˜μ‘΄μ„± μ„€μΉ˜ 쀑 ..."
pip install -r "$LLAMA_CPP_DIR/requirements.txt" --break-system-packages -q
# ---------------------------------------------------------------------------
# Step 1: μ»€μŠ€ν…€ 체크포인트 β†’ HF 포맷 λ³€ν™˜ (선택)
# ---------------------------------------------------------------------------
if [[ -n "$CHECKPOINT_DIR" && "$SKIP_HF_CONV" == "false" ]]; then
echo ""
echo "[STEP 1] μ»€μŠ€ν…€ 체크포인트 β†’ HF 포맷 λ³€ν™˜"
echo " 체크포인트: $CHECKPOINT_DIR"
echo " 좜λ ₯ : $INPUT_DIR"
echo ""
if [[ ! -d "$CHECKPOINT_DIR" ]]; then
echo "ERROR: 체크포인트 디렉토리λ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€: $CHECKPOINT_DIR"
exit 1
fi
python "$PROJECT_DIR/scripts/convert_to_hf.py" \
--checkpoint "$CHECKPOINT_DIR" \
--output "$INPUT_DIR" \
--tokenizer "tokenizer/korean_sp/tokenizer.json"
echo " [OK] HF λ³€ν™˜ μ™„λ£Œ β†’ $INPUT_DIR"
elif [[ "$SKIP_HF_CONV" == "true" ]]; then
echo "[STEP 1] HF λ³€ν™˜ κ±΄λ„ˆλœ€ (--skip_hf_conv)"
else
echo "[STEP 1] 체크포인트 λ―Έμ§€μ • β€” HF 디렉토리λ₯Ό 직접 μ‚¬μš©ν•©λ‹ˆλ‹€."
fi
# HF 디렉토리 μ΅œμ’… 검증
if [[ ! -d "$INPUT_DIR" ]]; then
echo "ERROR: HF λͺ¨λΈ 디렉토리λ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€: $INPUT_DIR"
echo " --checkpoint μ˜΅μ…˜μœΌλ‘œ 체크포인트λ₯Ό μ§€μ •ν•˜κ±°λ‚˜,"
echo " python scripts/convert_to_hf.py λ₯Ό λ¨Όμ € μ‹€ν–‰ν•˜μ„Έμš”."
exit 1
fi
if [[ ! -f "$INPUT_DIR/config.json" ]]; then
echo "ERROR: config.json 이 μ—†μŠ΅λ‹ˆλ‹€: $INPUT_DIR/config.json"
exit 1
fi
mkdir -p "$OUT_DIR"
# ---------------------------------------------------------------------------
# Step 2: llama.cpp λΉŒλ“œ (llama-quantize λ°”μ΄λ„ˆλ¦¬)
# ---------------------------------------------------------------------------
QUANTIZE_BIN="$LLAMA_CPP_DIR/build/bin/llama-quantize"
if [[ ! -f "$QUANTIZE_BIN" ]]; then
echo ""
echo "[STEP 2] llama.cpp λΉŒλ“œ 쀑 (llama-quantize) ..."
cmake -S "$LLAMA_CPP_DIR" -B "$LLAMA_CPP_DIR/build" \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_CUDA=ON \
2>&1 | tail -10
cmake --build "$LLAMA_CPP_DIR/build" --target llama-quantize -j "$(nproc)" \
2>&1 | tail -10
echo " [OK] λΉŒλ“œ μ™„λ£Œ: $QUANTIZE_BIN"
else
echo "[STEP 2] llama-quantize λ°”μ΄λ„ˆλ¦¬ 이미 쑴재 β€” λΉŒλ“œ κ±΄λ„ˆλœ€"
fi
# ---------------------------------------------------------------------------
# Step 3: HF β†’ F16 GGUF λ³€ν™˜
# ---------------------------------------------------------------------------
F16_GGUF="$OUT_DIR/${MODEL_NAME}-f16.gguf"
echo ""
echo "[STEP 3] HF β†’ F16 GGUF λ³€ν™˜"
echo " μž…λ ₯: $INPUT_DIR"
echo " 좜λ ₯: $F16_GGUF"
echo ""
python "$LLAMA_CPP_DIR/convert_hf_to_gguf.py" "$INPUT_DIR" \
--outfile "$F16_GGUF" \
--outtype f16
echo " [OK] F16 GGUF 크기: $(du -sh "$F16_GGUF" | cut -f1) ($F16_GGUF)"
# ---------------------------------------------------------------------------
# Step 4: 닀쀑 μ–‘μžν™” (Q4_K_M, Q5_K_M, Q8_0)
# ---------------------------------------------------------------------------
if [[ "$SKIP_QUANT" == "true" ]]; then
echo ""
echo "[STEP 4] μ–‘μžν™” κ±΄λ„ˆλœ€ (--skip_quant)"
else
echo ""
echo "[STEP 4] 닀쀑 μ–‘μžν™” μ‹œμž‘ ..."
if [[ ! -f "$QUANTIZE_BIN" ]]; then
echo "[WARN] llama-quantize λ°”μ΄λ„ˆλ¦¬λ₯Ό 찾을 수 μ—†μŠ΅λ‹ˆλ‹€: $QUANTIZE_BIN"
echo " μ–‘μžν™”λ₯Ό κ±΄λ„ˆλœλ‹ˆλ‹€. F16 GGUF만 μƒμ„±λ˜μ—ˆμŠ΅λ‹ˆλ‹€."
echo " μˆ˜λ™ λΉŒλ“œ: cmake --build $LLAMA_CPP_DIR/build --target llama-quantize"
else
# Q4_K_M β€” κ°€μž₯ μž‘μ€ 크기, ν’ˆμ§ˆ/속도 κ· ν˜• (Ollama κΈ°λ³Έ ꢌμž₯)
Q4KM_GGUF="$OUT_DIR/${MODEL_NAME}-Q4_K_M.gguf"
echo " β†’ Q4_K_M μ–‘μžν™”: $Q4KM_GGUF ..."
"$QUANTIZE_BIN" "$F16_GGUF" "$Q4KM_GGUF" Q4_K_M
echo " 크기: $(du -sh "$Q4KM_GGUF" | cut -f1)"
# Q5_K_M β€” 쀑간 크기, 더 높은 ν’ˆμ§ˆ
Q5KM_GGUF="$OUT_DIR/${MODEL_NAME}-Q5_K_M.gguf"
echo " β†’ Q5_K_M μ–‘μžν™”: $Q5KM_GGUF ..."
"$QUANTIZE_BIN" "$F16_GGUF" "$Q5KM_GGUF" Q5_K_M
echo " 크기: $(du -sh "$Q5KM_GGUF" | cut -f1)"
# Q8_0 β€” κ°€μž₯ 높은 ν’ˆμ§ˆ (F16 근사)
Q8_GGUF="$OUT_DIR/${MODEL_NAME}-Q8_0.gguf"
echo " β†’ Q8_0 μ–‘μžν™”: $Q8_GGUF ..."
"$QUANTIZE_BIN" "$F16_GGUF" "$Q8_GGUF" Q8_0
echo " 크기: $(du -sh "$Q8_GGUF" | cut -f1)"
echo ""
echo " [OK] λͺ¨λ“  μ–‘μžν™” μ™„λ£Œ"
fi
fi
# ---------------------------------------------------------------------------
# μ™„λ£Œ μš”μ•½
# ---------------------------------------------------------------------------
echo ""
echo "=================================================================="
echo " 3B GGUF λ³€ν™˜ μ™„λ£Œ"
echo ""
echo " 좜λ ₯ 파일 λͺ©λ‘:"
ls -lh "$OUT_DIR/${MODEL_NAME}"*.gguf 2>/dev/null | awk '{print " " $5 " " $9}' || \
echo " (파일 λͺ©λ‘ 확인: ls -lh $OUT_DIR/)"
echo ""
echo " λ‹€μŒ 단계:"
echo " bash scripts/deploy_3b_ollama.sh"
echo " bash scripts/quality_gate.sh deploy"
echo "=================================================================="