| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| set -euo pipefail |
|
|
| |
| |
| |
| INPUT_DIR="outputs/hf_korean_3b_orpo" |
| OUT_DIR="outputs/gguf" |
| CHECKPOINT_DIR="" |
| SKIP_HF_CONV=false |
| SKIP_QUANT=false |
|
|
| while [[ $# -gt 0 ]]; do |
| case "$1" in |
| --input_dir) INPUT_DIR="$2"; shift 2 ;; |
| --out_dir) OUT_DIR="$2"; shift 2 ;; |
| --checkpoint) CHECKPOINT_DIR="$2"; shift 2 ;; |
| --skip_hf_conv) SKIP_HF_CONV=true; shift ;; |
| --skip_quant) SKIP_QUANT=true; shift ;; |
| -h|--help) |
| grep '^#' "$0" | head -40 | sed 's/^# \{0,1\}//' |
| exit 0 ;; |
| *) |
| echo "ERROR: μ μ μλ μ΅μ
: $1" |
| echo "Usage: bash scripts/convert_3b_gguf.sh [--input_dir DIR] [--out_dir DIR] [--checkpoint DIR] [--skip_hf_conv] [--skip_quant]" |
| exit 1 ;; |
| esac |
| done |
|
|
| PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" |
| LLAMA_CPP_DIR="${LLAMA_CPP_DIR:-$PROJECT_DIR/outputs/llama.cpp}" |
| MODEL_NAME="frankenstallm-3b" |
|
|
| cd "$PROJECT_DIR" |
|
|
| echo "==================================================================" |
| echo " 3B λͺ¨λΈ GGUF λ³ν νμ΄νλΌμΈ" |
| echo " μ
λ ₯ HF λλ ν 리 : $INPUT_DIR" |
| echo " GGUF μΆλ ₯ λλ ν 리: $OUT_DIR" |
| echo " llama.cpp κ²½λ‘ : $LLAMA_CPP_DIR" |
| echo "==================================================================" |
| echo "" |
|
|
| |
| |
| |
| if [[ ! -d "$LLAMA_CPP_DIR" ]]; then |
| echo "[SETUP] llama.cpp λλ ν λ¦¬κ° μμ΅λλ€." |
| echo " λ€μ λͺ
λ ΉμΌλ‘ μ€μΉνμΈμ:" |
| echo "" |
| echo " git clone --depth 1 https://github.com/ggerganov/llama.cpp $LLAMA_CPP_DIR" |
| echo "" |
| echo " λλ LLAMA_CPP_DIR νκ²½λ³μλ‘ κΈ°μ‘΄ κ²½λ‘λ₯Ό μ§μ νμΈμ:" |
| echo " LLAMA_CPP_DIR=/path/to/llama.cpp bash scripts/convert_3b_gguf.sh" |
| echo "" |
| read -r -p "μ§κΈ μλ ν΄λ‘ νμκ² μ΅λκΉ? [y/N] " _yn |
| if [[ "${_yn:-N}" =~ ^[Yy]$ ]]; then |
| echo "Cloning llama.cpp ..." |
| git clone --depth 1 https://github.com/ggerganov/llama.cpp "$LLAMA_CPP_DIR" |
| else |
| echo "μ€λ¨ν©λλ€. llama.cppλ₯Ό μ€μΉν λ€ λ€μ μ€ννμΈμ." |
| exit 1 |
| fi |
| fi |
|
|
| |
| echo "[SETUP] llama.cpp Python μμ‘΄μ± μ€μΉ μ€ ..." |
| pip install -r "$LLAMA_CPP_DIR/requirements.txt" --break-system-packages -q |
|
|
| |
| |
| |
| if [[ -n "$CHECKPOINT_DIR" && "$SKIP_HF_CONV" == "false" ]]; then |
| echo "" |
| echo "[STEP 1] 컀μ€ν
체ν¬ν¬μΈνΈ β HF ν¬λ§· λ³ν" |
| echo " 체ν¬ν¬μΈνΈ: $CHECKPOINT_DIR" |
| echo " μΆλ ₯ : $INPUT_DIR" |
| echo "" |
|
|
| if [[ ! -d "$CHECKPOINT_DIR" ]]; then |
| echo "ERROR: 체ν¬ν¬μΈνΈ λλ ν 리λ₯Ό μ°Ύμ μ μμ΅λλ€: $CHECKPOINT_DIR" |
| exit 1 |
| fi |
|
|
| python "$PROJECT_DIR/scripts/convert_to_hf.py" \ |
| --checkpoint "$CHECKPOINT_DIR" \ |
| --output "$INPUT_DIR" \ |
| --tokenizer "tokenizer/korean_sp/tokenizer.json" |
|
|
| echo " [OK] HF λ³ν μλ£ β $INPUT_DIR" |
| elif [[ "$SKIP_HF_CONV" == "true" ]]; then |
| echo "[STEP 1] HF λ³ν 건λλ (--skip_hf_conv)" |
| else |
| echo "[STEP 1] 체ν¬ν¬μΈνΈ λ―Έμ§μ β HF λλ ν 리λ₯Ό μ§μ μ¬μ©ν©λλ€." |
| fi |
|
|
| |
| if [[ ! -d "$INPUT_DIR" ]]; then |
| echo "ERROR: HF λͺ¨λΈ λλ ν 리λ₯Ό μ°Ύμ μ μμ΅λλ€: $INPUT_DIR" |
| echo " --checkpoint μ΅μ
μΌλ‘ 체ν¬ν¬μΈνΈλ₯Ό μ§μ νκ±°λ," |
| echo " python scripts/convert_to_hf.py λ₯Ό λ¨Όμ μ€ννμΈμ." |
| exit 1 |
| fi |
|
|
| if [[ ! -f "$INPUT_DIR/config.json" ]]; then |
| echo "ERROR: config.json μ΄ μμ΅λλ€: $INPUT_DIR/config.json" |
| exit 1 |
| fi |
|
|
| mkdir -p "$OUT_DIR" |
|
|
| |
| |
| |
| QUANTIZE_BIN="$LLAMA_CPP_DIR/build/bin/llama-quantize" |
|
|
| if [[ ! -f "$QUANTIZE_BIN" ]]; then |
| echo "" |
| echo "[STEP 2] llama.cpp λΉλ μ€ (llama-quantize) ..." |
| cmake -S "$LLAMA_CPP_DIR" -B "$LLAMA_CPP_DIR/build" \ |
| -DCMAKE_BUILD_TYPE=Release \ |
| -DGGML_CUDA=ON \ |
| 2>&1 | tail -10 |
| cmake --build "$LLAMA_CPP_DIR/build" --target llama-quantize -j "$(nproc)" \ |
| 2>&1 | tail -10 |
| echo " [OK] λΉλ μλ£: $QUANTIZE_BIN" |
| else |
| echo "[STEP 2] llama-quantize λ°μ΄λ리 μ΄λ―Έ μ‘΄μ¬ β λΉλ 건λλ" |
| fi |
|
|
| |
| |
| |
| F16_GGUF="$OUT_DIR/${MODEL_NAME}-f16.gguf" |
|
|
| echo "" |
| echo "[STEP 3] HF β F16 GGUF λ³ν" |
| echo " μ
λ ₯: $INPUT_DIR" |
| echo " μΆλ ₯: $F16_GGUF" |
| echo "" |
|
|
| python "$LLAMA_CPP_DIR/convert_hf_to_gguf.py" "$INPUT_DIR" \ |
| --outfile "$F16_GGUF" \ |
| --outtype f16 |
|
|
| echo " [OK] F16 GGUF ν¬κΈ°: $(du -sh "$F16_GGUF" | cut -f1) ($F16_GGUF)" |
|
|
| |
| |
| |
| if [[ "$SKIP_QUANT" == "true" ]]; then |
| echo "" |
| echo "[STEP 4] μμν 건λλ (--skip_quant)" |
| else |
| echo "" |
| echo "[STEP 4] λ€μ€ μμν μμ ..." |
|
|
| if [[ ! -f "$QUANTIZE_BIN" ]]; then |
| echo "[WARN] llama-quantize λ°μ΄λ리λ₯Ό μ°Ύμ μ μμ΅λλ€: $QUANTIZE_BIN" |
| echo " μμνλ₯Ό 건λλλλ€. F16 GGUFλ§ μμ±λμμ΅λλ€." |
| echo " μλ λΉλ: cmake --build $LLAMA_CPP_DIR/build --target llama-quantize" |
| else |
| |
| Q4KM_GGUF="$OUT_DIR/${MODEL_NAME}-Q4_K_M.gguf" |
| echo " β Q4_K_M μμν: $Q4KM_GGUF ..." |
| "$QUANTIZE_BIN" "$F16_GGUF" "$Q4KM_GGUF" Q4_K_M |
| echo " ν¬κΈ°: $(du -sh "$Q4KM_GGUF" | cut -f1)" |
|
|
| |
| Q5KM_GGUF="$OUT_DIR/${MODEL_NAME}-Q5_K_M.gguf" |
| echo " β Q5_K_M μμν: $Q5KM_GGUF ..." |
| "$QUANTIZE_BIN" "$F16_GGUF" "$Q5KM_GGUF" Q5_K_M |
| echo " ν¬κΈ°: $(du -sh "$Q5KM_GGUF" | cut -f1)" |
|
|
| |
| Q8_GGUF="$OUT_DIR/${MODEL_NAME}-Q8_0.gguf" |
| echo " β Q8_0 μμν: $Q8_GGUF ..." |
| "$QUANTIZE_BIN" "$F16_GGUF" "$Q8_GGUF" Q8_0 |
| echo " ν¬κΈ°: $(du -sh "$Q8_GGUF" | cut -f1)" |
|
|
| echo "" |
| echo " [OK] λͺ¨λ μμν μλ£" |
| fi |
| fi |
|
|
| |
| |
| |
| echo "" |
| echo "==================================================================" |
| echo " 3B GGUF λ³ν μλ£" |
| echo "" |
| echo " μΆλ ₯ νμΌ λͺ©λ‘:" |
| ls -lh "$OUT_DIR/${MODEL_NAME}"*.gguf 2>/dev/null | awk '{print " " $5 " " $9}' || \ |
| echo " (νμΌ λͺ©λ‘ νμΈ: ls -lh $OUT_DIR/)" |
| echo "" |
| echo " λ€μ λ¨κ³:" |
| echo " bash scripts/deploy_3b_ollama.sh" |
| echo " bash scripts/quality_gate.sh deploy" |
| echo "==================================================================" |
|
|