| """Evaluate AMBER models""" |
|
|
| import argparse |
|
|
| import mteb |
|
|
| from models import PROMPTS |
|
|
| BENCHMARKS = { |
| "en": "MTEB(eng, v2)", |
| "ja": "MTEB(jpn, v1)", |
| } |
|
|
|
|
| def get_args() -> argparse.Namespace: |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--model_type", type=str, required=True, help="Model name", choices=PROMPTS.keys()) |
| parser.add_argument("--model_name_or_path", type=str, required=True) |
| parser.add_argument("--batch_size", type=int, default=32, help="Batch size") |
| parser.add_argument("--output_dir", type=str, required=True, help="Output directory") |
| parser.add_argument("--benchmark", type=str, required=True, choices=BENCHMARKS.keys()) |
| parser.add_argument("--corpus_chunk_size", type=int, default=50000) |
| parser.add_argument("--convert_to_tensor", action="store_true") |
| return parser.parse_args() |
|
|
|
|
| def main(): |
| args = get_args() |
| prompt = PROMPTS[args.model_type] |
| model = mteb.get_model(args.model_name_or_path, model_prompts=prompt) |
|
|
| tasks = mteb.get_benchmark(BENCHMARKS[args.benchmark]) |
| evaluation = mteb.MTEB(tasks=tasks) |
|
|
| encode_kwargs = { |
| "batch_size": args.batch_size, |
| "convert_to_tensor": args.convert_to_tensor, |
| } |
|
|
| evaluation.run( |
| model, |
| output_folder=args.output_dir, |
| encode_kwargs=encode_kwargs, |
| corpus_chunk_size=args.corpus_chunk_size, |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|