| import json | |
| import re | |
| from collections import Counter | |
| from loguru import logger | |
| from lmms_eval.tasks._task_utils.file_utils import generate_submission_file | |
| PROMPT = """Question: {} | |
| (A) {} | |
| (B) {} | |
| (C) {} | |
| (D) {} | |
| (E) {} | |
| (F) {}""" | |
| def ii_bench_doc_to_text(doc, lmms_eval_specific_kwargs): | |
| question = PROMPT.format(doc["question"], doc["option1"], doc["option2"], doc["option3"], doc["option4"], doc["option5"], doc["option6"]) | |
| pre_prompt = lmms_eval_specific_kwargs["pre_prompt"] | |
| post_prompt = lmms_eval_specific_kwargs["post_prompt"] | |
| return f"{pre_prompt}{question}{post_prompt}" | |
| def ii_bench_doc_to_visual(doc): | |
| return [doc["image"].convert("RGB")] | |
| def extract_option_labels(text, options=None): | |
| if isinstance(text, dict): | |
| return "error" | |
| pattern = r"\(([A-F])\)" | |
| matches = re.findall(pattern, text) | |
| if not matches: | |
| pattern = r"\b([A-F])\b" | |
| matches = re.findall(pattern, text) | |
| if matches: | |
| counter = Counter(matches) | |
| most_common = counter.most_common() | |
| max_count = most_common[0][1] | |
| candidates = [item for item in most_common if item[1] == max_count] | |
| return candidates[-1][0] | |
| else: | |
| if options: | |
| counter = Counter() | |
| for i, option in enumerate(options, start=1): | |
| label = chr(64 + i) | |
| option_stripped = option.strip() | |
| if option_stripped in text: | |
| counter[label] += 1 | |
| elif text in option: | |
| counter[label] += 1 | |
| if counter: | |
| most_common = counter.most_common() | |
| max_count = most_common[0][1] | |
| candidates = [item for item in most_common if item[1] == max_count] | |
| return candidates[-1][0] | |
| return None | |
| def ii_bench_process_results(doc, results): | |
| response = results[0] | |
| predict = extract_option_labels(response, [doc["option1"], doc["option2"], doc["option3"], doc["option4"], doc["option5"], doc["option6"]]) | |
| return {"submission": {"id": doc["id"], "predict_answer": predict, "response": response}} | |
| def ii_bench_aggregate_submissions(results, args): | |
| file = generate_submission_file("ii_bench_test_for_submission.json", args) | |
| with open(file, "w") as f: | |
| json.dump(results, f, indent=4) | |
| logger.info(f"Results saved to {file}") | |