File size: 5,787 Bytes
48ecd01 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | #!/usr/bin/env bash
# ============================================================
# run_eval_quick.sh β λΉ λ₯Έ νκ° μ²΄ν¬ (λͺ©ν: 20-30λΆ)
#
# μ¬μ©λ²:
# bash scripts/run_eval_quick.sh [CHECKPOINT_DIR] [OUTPUT_DIR]
#
# μμ:
# bash scripts/run_eval_quick.sh \
# checkpoints/korean_1b_sft/checkpoint-0005000 \
# eval/outputs/quick_5000
#
# νμ€ν¬: kobest_boolq, kobest_copa, haerae_general_knowledge,
# haerae_history, paws_ko
# ============================================================
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
# βββ μΈμ μ²λ¦¬ ββββββββββββββββββββββββββββββββββββββββββββ
CHECKPOINT="${1:-checkpoints/korean_1b_sft/checkpoint-0005000}"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
OUTPUT_DIR="${2:-eval/outputs/quick_${TIMESTAMP}}"
# μλ κ²½λ‘ β μ λ κ²½λ‘
[[ "$CHECKPOINT" != /* ]] && CHECKPOINT="$PROJECT_DIR/$CHECKPOINT"
[[ "$OUTPUT_DIR" != /* ]] && OUTPUT_DIR="$PROJECT_DIR/$OUTPUT_DIR"
# βββ μ€μ ββββββββββββββββββββββββββββββββββββββββββββββββ
HF_MODEL_DIR="$PROJECT_DIR/outputs/hf_$(basename "$CHECKPOINT")"
TOKENIZER="$PROJECT_DIR/tokenizer/korean_sp/tokenizer.json"
DEVICE="${CUDA_VISIBLE_DEVICES:-0}" # κΈ°λ³Έ: GPU 0λ²λ§ μ¬μ©
BATCH_SIZE="auto"
# λΉ λ₯Έ μ²΄ν¬ νμ€ν¬ (μ½ 2,000 μν, ~20λΆ)
TASKS="kobest_boolq,kobest_copa,haerae_general_knowledge,haerae_history,paws_ko"
# βββ μμ‘΄μ± νμΈ βββββββββββββββββββββββββββββββββββββββββ
check_dep() {
python3 -c "import $1" 2>/dev/null || { echo "β $1 not found. pip install $2"; exit 1; }
}
check_dep lm_eval lm-eval
check_dep transformers transformers
check_dep safetensors safetensors
echo "=================================================="
echo " Ko-LLM Quick Eval"
echo "=================================================="
echo " Checkpoint : $CHECKPOINT"
echo " HF output : $HF_MODEL_DIR"
echo " Tasks : $TASKS"
echo " Output : $OUTPUT_DIR"
echo " Device : cuda:$DEVICE"
echo "=================================================="
mkdir -p "$OUTPUT_DIR"
# βββ Step 1: HF ν¬λ§· λ³ν βββββββββββββββββββββββββββββββ
if [ ! -f "$HF_MODEL_DIR/config.json" ]; then
echo ""
echo "βΆ Step 1: 컀μ€ν
체ν¬ν¬μΈνΈ β HF ν¬λ§· λ³ν..."
python3 "$PROJECT_DIR/scripts/convert_to_hf.py" \
--checkpoint "$CHECKPOINT" \
--output "$HF_MODEL_DIR" \
--tokenizer "$TOKENIZER"
echo "β
HF λ³ν μλ£: $HF_MODEL_DIR"
else
echo "βΆ Step 1: HF λͺ¨λΈ μ΄λ―Έ μ‘΄μ¬, λ³ν μ€ν΅"
echo " $HF_MODEL_DIR"
fi
# βββ Step 2: lm-eval μ€ν βββββββββββββββββββββββββββββββ
echo ""
echo "βΆ Step 2: lm-eval νκ° μμ..."
START_TIME=$(date +%s)
CUDA_VISIBLE_DEVICES="$DEVICE" python3 -m lm_eval \
--model hf \
--model_args "pretrained=$HF_MODEL_DIR,dtype=float16" \
--tasks "$TASKS" \
--num_fewshot 0 \
--batch_size "$BATCH_SIZE" \
--output_path "$OUTPUT_DIR" \
--log_samples \
--verbosity INFO \
2>&1 | tee "$OUTPUT_DIR/eval.log"
END_TIME=$(date +%s)
ELAPSED=$(( END_TIME - START_TIME ))
echo ""
echo "=================================================="
echo "β
νκ° μλ£!"
echo " μμμκ°: $((ELAPSED / 60))λΆ $((ELAPSED % 60))μ΄"
echo " κ²°κ³Ό μ μ₯: $OUTPUT_DIR"
echo "=================================================="
# βββ Step 3: κ²°κ³Ό μμ½ μΆλ ₯ βββββββββββββββββββββββββββββ
echo ""
echo "βΆ Step 3: κ²°κ³Ό μμ½"
python3 - <<'PYEOF'
import json, glob, sys, os
output_dir = sys.argv[1] if len(sys.argv) > 1 else "."
results_files = glob.glob(f"{output_dir}/**/*.json", recursive=True)
results_files = [f for f in results_files if "results" in f.lower()]
if not results_files:
print("κ²°κ³Ό JSON νμΌ μμ. eval.log νμΈνμΈμ.")
sys.exit(0)
for rf in results_files:
try:
with open(rf) as f:
data = json.load(f)
results = data.get("results", {})
print(f"\n{'='*50}")
print(f"Task Results (from {os.path.basename(rf)})")
print(f"{'='*50}")
for task, metrics in results.items():
print(f"\n{task}:")
for key, val in metrics.items():
if "stderr" not in key and isinstance(val, (int, float)):
print(f" {key}: {val:.4f}")
except Exception as e:
print(f"νμ± μ€ν¨: {rf}: {e}")
PYEOF
python3 - "$OUTPUT_DIR" <<'PYEOF'
import json, glob, sys, os
output_dir = sys.argv[1] if len(sys.argv) > 1 else "."
results_files = glob.glob(f"{output_dir}/**/*.json", recursive=True)
results_files = [f for f in results_files if "results" in os.path.basename(f)]
if not results_files:
# try finding any json
results_files = glob.glob(f"{output_dir}/*.json")
for rf in results_files[:3]:
try:
with open(rf) as f:
data = json.load(f)
results = data.get("results", {})
print(f"\n{'='*50}\nTask Results: {os.path.basename(rf)}\n{'='*50}")
for task, metrics in results.items():
print(f"\n{task}:")
for key, val in metrics.items():
if "stderr" not in key and isinstance(val, (int, float)):
print(f" {key}: {val:.4f}")
except Exception as e:
print(f"νμ± μ€ν¨: {rf}: {e}")
PYEOF
|