| from .Dataset import Dataset |
| from evaluations.func_evaluate import evaluate_functional_correctness, evaluate_io |
| from constants.paths import * |
|
|
|
|
| class HumanDataset(Dataset): |
| def __init__( |
| self, |
| path: str = HUMAN_WST_DATA_PATH, |
| ): |
| super().__init__(path) |
| self.id_key = "task_id" |
|
|
| def evaluate( |
| self, |
| item: dict, |
| cur_imp: str, |
| language: str, |
| ): |
| result = evaluate_functional_correctness( |
| problem=item, |
| completion=cur_imp |
| ) |
| return result == "passed" |
|
|
| def evaluate_sample_io( |
| self, |
| item: dict, |
| cur_imp: str, |
| language: str, |
| ): |
| |
| return evaluate_io( |
| sample_io=item["sample_io"], |
| completion=cur_imp, |
| ) |
|
|
|
|
| @staticmethod |
| def get_prompt(item): |
| if "prompt" in item: |
| return f"{item['prompt']}" |
| elif "text" in item: |
| return f"{item['text']}" |
| else: |
| raise Exception("No prompt or text in item") |
|
|