import numpy as np from datasets.formatting.formatting import LazyRow from src.language_model.language_model_abstraction import LanguageModel from src.task.task import Task class RandomBaselineModel(LanguageModel): def __init__(self, model_name: str, seed: int = 42): super().__init__(model_name) self.random_generator = np.random.RandomState(seed=seed) def predict(self, evaluation_dataset, task: Task): size = len(evaluation_dataset) choices = task.dataset.possible_ground_truths if len(choices) == 0: # Meaning it is a generation task. predictions = [] for row in evaluation_dataset: # Since we work with the prompt, including instruction, we extract the instance sentence after the # "Phrase :", then we remove the trailing Part-of-speech content. instance_sentence = row["text"].split("Phrase : ")[-1].split("\n")[0] whitespace_instance_sentence = instance_sentence.split(" ") choices = range(0, len(whitespace_instance_sentence)) prediction_idx = self.random_generator.choice(choices, size=1).tolist()[ 0 ] prediction = whitespace_instance_sentence[prediction_idx] predictions.append(prediction) else: # Meaning it is an inference task. predictions = self.random_generator.choice(choices, size=size).tolist() return predictions def infer(self, rows: LazyRow) -> LazyRow: return rows def generate(self, rows: LazyRow) -> LazyRow: return rows @property def num_parameters(self): return 0