| from .Dataset import Dataset |
| from evaluations.func_evaluate import evaluate_io, evaluate_functional_correctness |
| from constants.paths import * |
|
|
|
|
| class MBPPDataset(Dataset): |
| def __init__( |
| self, |
| path: str = MBPP_DATA_PATH, |
| ): |
| super().__init__(path) |
| self.id_key = "name" |
|
|
| def evaluate( |
| self, |
| item: dict, |
| cur_imp: str, |
| language: str, |
| ): |
| |
| |
| result = evaluate_functional_correctness( |
| problem=item, |
| completion=cur_imp |
| ) |
| return result == "passed" |
|
|
| def evaluate_sample_io( |
| self, |
| item: dict, |
| cur_imp: str, |
| language: str, |
| ): |
| if "sample_io" not in item: |
| return True, "" |
| if len(item["sample_io"]) == 0: |
| return True, "" |
| return evaluate_io( |
| sample_io=item["sample_io"], |
| completion=cur_imp, |
| ) |
|
|
| @staticmethod |
| def get_prompt(item): |
| |
| |
| return item["prompt"] |
|
|