laguna-martini / scripts /eval_mmlu.py
nikgeo's picture
Publish Laguna Martini grouped-pruning model card and reproducibility artifacts
6f11713 verified
Raw
History Blame Contribute Delete
778 Bytes
#!/usr/bin/env python3
"""Run MMLU through lm-evaluation-harness."""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from heapr.eval.mmlu import run_mmlu_lm_eval
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--model", required=True)
parser.add_argument("--output-dir", required=True)
parser.add_argument("--tasks", default="mmlu")
parser.add_argument("--limit", type=int)
return parser.parse_args()
def main() -> None:
args = parse_args()
raise SystemExit(
run_mmlu_lm_eval(model=args.model, output_dir=args.output_dir, tasks=args.tasks, limit=args.limit)
)
if __name__ == "__main__":
main()