frankenstallm / source /scripts /run_eval_quick.sh
pathcosmos's picture
Upload folder using huggingface_hub (#17)
48ecd01
#!/usr/bin/env bash
# ============================================================
# run_eval_quick.sh β€” λΉ λ₯Έ 평가 체크 (λͺ©ν‘œ: 20-30λΆ„)
#
# μ‚¬μš©λ²•:
# bash scripts/run_eval_quick.sh [CHECKPOINT_DIR] [OUTPUT_DIR]
#
# μ˜ˆμ‹œ:
# bash scripts/run_eval_quick.sh \
# checkpoints/korean_1b_sft/checkpoint-0005000 \
# eval/outputs/quick_5000
#
# νƒœμŠ€ν¬: kobest_boolq, kobest_copa, haerae_general_knowledge,
# haerae_history, paws_ko
# ============================================================
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
# ─── 인자 처리 ────────────────────────────────────────────
CHECKPOINT="${1:-checkpoints/korean_1b_sft/checkpoint-0005000}"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
OUTPUT_DIR="${2:-eval/outputs/quick_${TIMESTAMP}}"
# μƒλŒ€ 경둜 β†’ μ ˆλŒ€ 경둜
[[ "$CHECKPOINT" != /* ]] && CHECKPOINT="$PROJECT_DIR/$CHECKPOINT"
[[ "$OUTPUT_DIR" != /* ]] && OUTPUT_DIR="$PROJECT_DIR/$OUTPUT_DIR"
# ─── μ„€μ • ────────────────────────────────────────────────
HF_MODEL_DIR="$PROJECT_DIR/outputs/hf_$(basename "$CHECKPOINT")"
TOKENIZER="$PROJECT_DIR/tokenizer/korean_sp/tokenizer.json"
DEVICE="${CUDA_VISIBLE_DEVICES:-0}" # κΈ°λ³Έ: GPU 0번만 μ‚¬μš©
BATCH_SIZE="auto"
# λΉ λ₯Έ 체크 νƒœμŠ€ν¬ (μ•½ 2,000 μƒ˜ν”Œ, ~20λΆ„)
TASKS="kobest_boolq,kobest_copa,haerae_general_knowledge,haerae_history,paws_ko"
# ─── μ˜μ‘΄μ„± 확인 ─────────────────────────────────────────
check_dep() {
python3 -c "import $1" 2>/dev/null || { echo "❌ $1 not found. pip install $2"; exit 1; }
}
check_dep lm_eval lm-eval
check_dep transformers transformers
check_dep safetensors safetensors
echo "=================================================="
echo " Ko-LLM Quick Eval"
echo "=================================================="
echo " Checkpoint : $CHECKPOINT"
echo " HF output : $HF_MODEL_DIR"
echo " Tasks : $TASKS"
echo " Output : $OUTPUT_DIR"
echo " Device : cuda:$DEVICE"
echo "=================================================="
mkdir -p "$OUTPUT_DIR"
# ─── Step 1: HF 포맷 λ³€ν™˜ ───────────────────────────────
if [ ! -f "$HF_MODEL_DIR/config.json" ]; then
echo ""
echo "β–Ά Step 1: μ»€μŠ€ν…€ 체크포인트 β†’ HF 포맷 λ³€ν™˜..."
python3 "$PROJECT_DIR/scripts/convert_to_hf.py" \
--checkpoint "$CHECKPOINT" \
--output "$HF_MODEL_DIR" \
--tokenizer "$TOKENIZER"
echo "βœ… HF λ³€ν™˜ μ™„λ£Œ: $HF_MODEL_DIR"
else
echo "β–Ά Step 1: HF λͺ¨λΈ 이미 쑴재, λ³€ν™˜ μŠ€ν‚΅"
echo " $HF_MODEL_DIR"
fi
# ─── Step 2: lm-eval μ‹€ν–‰ ───────────────────────────────
echo ""
echo "β–Ά Step 2: lm-eval 평가 μ‹œμž‘..."
START_TIME=$(date +%s)
CUDA_VISIBLE_DEVICES="$DEVICE" python3 -m lm_eval \
--model hf \
--model_args "pretrained=$HF_MODEL_DIR,dtype=float16" \
--tasks "$TASKS" \
--num_fewshot 0 \
--batch_size "$BATCH_SIZE" \
--output_path "$OUTPUT_DIR" \
--log_samples \
--verbosity INFO \
2>&1 | tee "$OUTPUT_DIR/eval.log"
END_TIME=$(date +%s)
ELAPSED=$(( END_TIME - START_TIME ))
echo ""
echo "=================================================="
echo "βœ… 평가 μ™„λ£Œ!"
echo " μ†Œμš”μ‹œκ°„: $((ELAPSED / 60))λΆ„ $((ELAPSED % 60))초"
echo " κ²°κ³Ό μ €μž₯: $OUTPUT_DIR"
echo "=================================================="
# ─── Step 3: κ²°κ³Ό μš”μ•½ 좜λ ₯ ─────────────────────────────
echo ""
echo "β–Ά Step 3: κ²°κ³Ό μš”μ•½"
python3 - <<'PYEOF'
import json, glob, sys, os
output_dir = sys.argv[1] if len(sys.argv) > 1 else "."
results_files = glob.glob(f"{output_dir}/**/*.json", recursive=True)
results_files = [f for f in results_files if "results" in f.lower()]
if not results_files:
print("κ²°κ³Ό JSON 파일 μ—†μŒ. eval.log ν™•μΈν•˜μ„Έμš”.")
sys.exit(0)
for rf in results_files:
try:
with open(rf) as f:
data = json.load(f)
results = data.get("results", {})
print(f"\n{'='*50}")
print(f"Task Results (from {os.path.basename(rf)})")
print(f"{'='*50}")
for task, metrics in results.items():
print(f"\n{task}:")
for key, val in metrics.items():
if "stderr" not in key and isinstance(val, (int, float)):
print(f" {key}: {val:.4f}")
except Exception as e:
print(f"νŒŒμ‹± μ‹€νŒ¨: {rf}: {e}")
PYEOF
python3 - "$OUTPUT_DIR" <<'PYEOF'
import json, glob, sys, os
output_dir = sys.argv[1] if len(sys.argv) > 1 else "."
results_files = glob.glob(f"{output_dir}/**/*.json", recursive=True)
results_files = [f for f in results_files if "results" in os.path.basename(f)]
if not results_files:
# try finding any json
results_files = glob.glob(f"{output_dir}/*.json")
for rf in results_files[:3]:
try:
with open(rf) as f:
data = json.load(f)
results = data.get("results", {})
print(f"\n{'='*50}\nTask Results: {os.path.basename(rf)}\n{'='*50}")
for task, metrics in results.items():
print(f"\n{task}:")
for key, val in metrics.items():
if "stderr" not in key and isinstance(val, (int, float)):
print(f" {key}: {val:.4f}")
except Exception as e:
print(f"νŒŒμ‹± μ‹€νŒ¨: {rf}: {e}")
PYEOF