| | #!/usr/bin/env bash |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | set -euo pipefail |
| |
|
| | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| | PROJECT_DIR="$(dirname "$SCRIPT_DIR")" |
| |
|
| | |
| | CHECKPOINT="${1:-checkpoints/korean_1b_sft/checkpoint-0005000}" |
| | TIMESTAMP="$(date +%Y%m%d_%H%M%S)" |
| | OUTPUT_DIR="${2:-eval/outputs/quick_${TIMESTAMP}}" |
| |
|
| | |
| | [[ "$CHECKPOINT" != /* ]] && CHECKPOINT="$PROJECT_DIR/$CHECKPOINT" |
| | [[ "$OUTPUT_DIR" != /* ]] && OUTPUT_DIR="$PROJECT_DIR/$OUTPUT_DIR" |
| |
|
| | |
| | HF_MODEL_DIR="$PROJECT_DIR/outputs/hf_$(basename "$CHECKPOINT")" |
| | TOKENIZER="$PROJECT_DIR/tokenizer/korean_sp/tokenizer.json" |
| | DEVICE="${CUDA_VISIBLE_DEVICES:-0}" |
| | BATCH_SIZE="auto" |
| |
|
| | |
| | TASKS="kobest_boolq,kobest_copa,haerae_general_knowledge,haerae_history,paws_ko" |
| |
|
| | |
| | check_dep() { |
| | python3 -c "import $1" 2>/dev/null || { echo "β $1 not found. pip install $2"; exit 1; } |
| | } |
| | check_dep lm_eval lm-eval |
| | check_dep transformers transformers |
| | check_dep safetensors safetensors |
| |
|
| | echo "==================================================" |
| | echo " Ko-LLM Quick Eval" |
| | echo "==================================================" |
| | echo " Checkpoint : $CHECKPOINT" |
| | echo " HF output : $HF_MODEL_DIR" |
| | echo " Tasks : $TASKS" |
| | echo " Output : $OUTPUT_DIR" |
| | echo " Device : cuda:$DEVICE" |
| | echo "==================================================" |
| |
|
| | mkdir -p "$OUTPUT_DIR" |
| |
|
| | |
| | if [ ! -f "$HF_MODEL_DIR/config.json" ]; then |
| | echo "" |
| | echo "βΆ Step 1: 컀μ€ν
체ν¬ν¬μΈνΈ β HF ν¬λ§· λ³ν..." |
| | python3 "$PROJECT_DIR/scripts/convert_to_hf.py" \ |
| | --checkpoint "$CHECKPOINT" \ |
| | --output "$HF_MODEL_DIR" \ |
| | --tokenizer "$TOKENIZER" |
| | echo "β
HF λ³ν μλ£: $HF_MODEL_DIR" |
| | else |
| | echo "βΆ Step 1: HF λͺ¨λΈ μ΄λ―Έ μ‘΄μ¬, λ³ν μ€ν΅" |
| | echo " $HF_MODEL_DIR" |
| | fi |
| |
|
| | |
| | echo "" |
| | echo "βΆ Step 2: lm-eval νκ° μμ..." |
| | START_TIME=$(date +%s) |
| |
|
| | CUDA_VISIBLE_DEVICES="$DEVICE" python3 -m lm_eval \ |
| | --model hf \ |
| | --model_args "pretrained=$HF_MODEL_DIR,dtype=float16" \ |
| | --tasks "$TASKS" \ |
| | --num_fewshot 0 \ |
| | --batch_size "$BATCH_SIZE" \ |
| | --output_path "$OUTPUT_DIR" \ |
| | --log_samples \ |
| | --verbosity INFO \ |
| | 2>&1 | tee "$OUTPUT_DIR/eval.log" |
| |
|
| | END_TIME=$(date +%s) |
| | ELAPSED=$(( END_TIME - START_TIME )) |
| |
|
| | echo "" |
| | echo "==================================================" |
| | echo "β
νκ° μλ£!" |
| | echo " μμμκ°: $((ELAPSED / 60))λΆ $((ELAPSED % 60))μ΄" |
| | echo " κ²°κ³Ό μ μ₯: $OUTPUT_DIR" |
| | echo "==================================================" |
| |
|
| | |
| | echo "" |
| | echo "βΆ Step 3: κ²°κ³Ό μμ½" |
| | python3 - <<'PYEOF' |
| | import json, glob, sys, os |
| |
|
| | output_dir = sys.argv[1] if len(sys.argv) > 1 else "." |
| | results_files = glob.glob(f"{output_dir}/**/*.json", recursive=True) |
| | results_files = [f for f in results_files if "results" in f.lower()] |
| |
|
| | if not results_files: |
| | print("κ²°κ³Ό JSON νμΌ μμ. eval.log νμΈνμΈμ.") |
| | sys.exit(0) |
| |
|
| | for rf in results_files: |
| | try: |
| | with open(rf) as f: |
| | data = json.load(f) |
| | results = data.get("results", {}) |
| | print(f"\n{'='*50}") |
| | print(f"Task Results (from {os.path.basename(rf)})") |
| | print(f"{'='*50}") |
| | for task, metrics in results.items(): |
| | print(f"\n{task}:") |
| | for key, val in metrics.items(): |
| | if "stderr" not in key and isinstance(val, (int, float)): |
| | print(f" {key}: {val:.4f}") |
| | except Exception as e: |
| | print(f"νμ± μ€ν¨: {rf}: {e}") |
| | PYEOF |
| | python3 - "$OUTPUT_DIR" <<'PYEOF' |
| | import json, glob, sys, os |
| | output_dir = sys.argv[1] if len(sys.argv) > 1 else "." |
| | results_files = glob.glob(f"{output_dir}/**/*.json", recursive=True) |
| | results_files = [f for f in results_files if "results" in os.path.basename(f)] |
| | if not results_files: |
| | |
| | results_files = glob.glob(f"{output_dir}/*.json") |
| | for rf in results_files[:3]: |
| | try: |
| | with open(rf) as f: |
| | data = json.load(f) |
| | results = data.get("results", {}) |
| | print(f"\n{'='*50}\nTask Results: {os.path.basename(rf)}\n{'='*50}") |
| | for task, metrics in results.items(): |
| | print(f"\n{task}:") |
| | for key, val in metrics.items(): |
| | if "stderr" not in key and isinstance(val, (int, float)): |
| | print(f" {key}: {val:.4f}") |
| | except Exception as e: |
| | print(f"νμ± μ€ν¨: {rf}: {e}") |
| | PYEOF |
| |
|