File size: 5,787 Bytes
48ecd01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env bash
# ============================================================
# run_eval_quick.sh β€” λΉ λ₯Έ 평가 체크 (λͺ©ν‘œ: 20-30λΆ„)
#
# μ‚¬μš©λ²•:
#   bash scripts/run_eval_quick.sh [CHECKPOINT_DIR] [OUTPUT_DIR]
#
# μ˜ˆμ‹œ:
#   bash scripts/run_eval_quick.sh \
#       checkpoints/korean_1b_sft/checkpoint-0005000 \
#       eval/outputs/quick_5000
#
# νƒœμŠ€ν¬: kobest_boolq, kobest_copa, haerae_general_knowledge,
#         haerae_history, paws_ko
# ============================================================
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"

# ─── 인자 처리 ────────────────────────────────────────────
CHECKPOINT="${1:-checkpoints/korean_1b_sft/checkpoint-0005000}"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
OUTPUT_DIR="${2:-eval/outputs/quick_${TIMESTAMP}}"

# μƒλŒ€ 경둜 β†’ μ ˆλŒ€ 경둜
[[ "$CHECKPOINT" != /* ]] && CHECKPOINT="$PROJECT_DIR/$CHECKPOINT"
[[ "$OUTPUT_DIR" != /* ]]  && OUTPUT_DIR="$PROJECT_DIR/$OUTPUT_DIR"

# ─── μ„€μ • ────────────────────────────────────────────────
HF_MODEL_DIR="$PROJECT_DIR/outputs/hf_$(basename "$CHECKPOINT")"
TOKENIZER="$PROJECT_DIR/tokenizer/korean_sp/tokenizer.json"
DEVICE="${CUDA_VISIBLE_DEVICES:-0}"   # κΈ°λ³Έ: GPU 0번만 μ‚¬μš©
BATCH_SIZE="auto"

# λΉ λ₯Έ 체크 νƒœμŠ€ν¬ (μ•½ 2,000 μƒ˜ν”Œ, ~20λΆ„)
TASKS="kobest_boolq,kobest_copa,haerae_general_knowledge,haerae_history,paws_ko"

# ─── μ˜μ‘΄μ„± 확인 ─────────────────────────────────────────
check_dep() {
    python3 -c "import $1" 2>/dev/null || { echo "❌ $1 not found. pip install $2"; exit 1; }
}
check_dep lm_eval lm-eval
check_dep transformers transformers
check_dep safetensors safetensors

echo "=================================================="
echo " Ko-LLM Quick Eval"
echo "=================================================="
echo " Checkpoint : $CHECKPOINT"
echo " HF output  : $HF_MODEL_DIR"
echo " Tasks      : $TASKS"
echo " Output     : $OUTPUT_DIR"
echo " Device     : cuda:$DEVICE"
echo "=================================================="

mkdir -p "$OUTPUT_DIR"

# ─── Step 1: HF 포맷 λ³€ν™˜ ───────────────────────────────
if [ ! -f "$HF_MODEL_DIR/config.json" ]; then
    echo ""
    echo "β–Ά Step 1: μ»€μŠ€ν…€ 체크포인트 β†’ HF 포맷 λ³€ν™˜..."
    python3 "$PROJECT_DIR/scripts/convert_to_hf.py" \
        --checkpoint "$CHECKPOINT" \
        --output "$HF_MODEL_DIR" \
        --tokenizer "$TOKENIZER"
    echo "βœ… HF λ³€ν™˜ μ™„λ£Œ: $HF_MODEL_DIR"
else
    echo "β–Ά Step 1: HF λͺ¨λΈ 이미 쑴재, λ³€ν™˜ μŠ€ν‚΅"
    echo "   $HF_MODEL_DIR"
fi

# ─── Step 2: lm-eval μ‹€ν–‰ ───────────────────────────────
echo ""
echo "β–Ά Step 2: lm-eval 평가 μ‹œμž‘..."
START_TIME=$(date +%s)

CUDA_VISIBLE_DEVICES="$DEVICE" python3 -m lm_eval \
    --model hf \
    --model_args "pretrained=$HF_MODEL_DIR,dtype=float16" \
    --tasks "$TASKS" \
    --num_fewshot 0 \
    --batch_size "$BATCH_SIZE" \
    --output_path "$OUTPUT_DIR" \
    --log_samples \
    --verbosity INFO \
    2>&1 | tee "$OUTPUT_DIR/eval.log"

END_TIME=$(date +%s)
ELAPSED=$(( END_TIME - START_TIME ))

echo ""
echo "=================================================="
echo "βœ… 평가 μ™„λ£Œ!"
echo " μ†Œμš”μ‹œκ°„: $((ELAPSED / 60))λΆ„ $((ELAPSED % 60))초"
echo " κ²°κ³Ό μ €μž₯: $OUTPUT_DIR"
echo "=================================================="

# ─── Step 3: κ²°κ³Ό μš”μ•½ 좜λ ₯ ─────────────────────────────
echo ""
echo "β–Ά Step 3: κ²°κ³Ό μš”μ•½"
python3 - <<'PYEOF'
import json, glob, sys, os

output_dir = sys.argv[1] if len(sys.argv) > 1 else "."
results_files = glob.glob(f"{output_dir}/**/*.json", recursive=True)
results_files = [f for f in results_files if "results" in f.lower()]

if not results_files:
    print("κ²°κ³Ό JSON 파일 μ—†μŒ. eval.log ν™•μΈν•˜μ„Έμš”.")
    sys.exit(0)

for rf in results_files:
    try:
        with open(rf) as f:
            data = json.load(f)
        results = data.get("results", {})
        print(f"\n{'='*50}")
        print(f"Task Results (from {os.path.basename(rf)})")
        print(f"{'='*50}")
        for task, metrics in results.items():
            print(f"\n{task}:")
            for key, val in metrics.items():
                if "stderr" not in key and isinstance(val, (int, float)):
                    print(f"  {key}: {val:.4f}")
    except Exception as e:
        print(f"νŒŒμ‹± μ‹€νŒ¨: {rf}: {e}")
PYEOF
python3 - "$OUTPUT_DIR" <<'PYEOF'
import json, glob, sys, os
output_dir = sys.argv[1] if len(sys.argv) > 1 else "."
results_files = glob.glob(f"{output_dir}/**/*.json", recursive=True)
results_files = [f for f in results_files if "results" in os.path.basename(f)]
if not results_files:
    # try finding any json
    results_files = glob.glob(f"{output_dir}/*.json")
for rf in results_files[:3]:
    try:
        with open(rf) as f:
            data = json.load(f)
        results = data.get("results", {})
        print(f"\n{'='*50}\nTask Results: {os.path.basename(rf)}\n{'='*50}")
        for task, metrics in results.items():
            print(f"\n{task}:")
            for key, val in metrics.items():
                if "stderr" not in key and isinstance(val, (int, float)):
                    print(f"  {key}: {val:.4f}")
    except Exception as e:
        print(f"νŒŒμ‹± μ‹€νŒ¨: {rf}: {e}")
PYEOF