from .Dataset import Dataset from evaluations.func_evaluate import evaluate_functional_correctness, evaluate_io from constants.paths import * class HumanDataset(Dataset): def __init__( self, path: str = HUMAN_WST_DATA_PATH, ): super().__init__(path) self.id_key = "task_id" def evaluate( self, item: dict, cur_imp: str, language: str, ): result = evaluate_functional_correctness( problem=item, completion=cur_imp ) return result == "passed" def evaluate_sample_io( self, item: dict, cur_imp: str, language: str, ): return evaluate_io( sample_io=item["sample_io"], completion=cur_imp, ) @staticmethod def get_prompt(item): if "prompt" in item: return f"{item['prompt']}" elif "text" in item: return f"{item['text']}" else: raise Exception("No prompt or text in item")