Spaces:
Running
Running
File size: 1,729 Bytes
8fa3acc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import numpy as np
from datasets.formatting.formatting import LazyRow
from src.language_model.language_model_abstraction import LanguageModel
from src.task.task import Task
class RandomBaselineModel(LanguageModel):
def __init__(self, model_name: str, seed: int = 42):
super().__init__(model_name)
self.random_generator = np.random.RandomState(seed=seed)
def predict(self, evaluation_dataset, task: Task):
size = len(evaluation_dataset)
choices = task.dataset.possible_ground_truths
if len(choices) == 0:
# Meaning it is a generation task.
predictions = []
for row in evaluation_dataset:
# Since we work with the prompt, including instruction, we extract the instance sentence after the
# "Phrase :", then we remove the trailing Part-of-speech content.
instance_sentence = row["text"].split("Phrase : ")[-1].split("\n")[0]
whitespace_instance_sentence = instance_sentence.split(" ")
choices = range(0, len(whitespace_instance_sentence))
prediction_idx = self.random_generator.choice(choices, size=1).tolist()[
0
]
prediction = whitespace_instance_sentence[prediction_idx]
predictions.append(prediction)
else:
# Meaning it is an inference task.
predictions = self.random_generator.choice(choices, size=size).tolist()
return predictions
def infer(self, rows: LazyRow) -> LazyRow:
return rows
def generate(self, rows: LazyRow) -> LazyRow:
return rows
@property
def num_parameters(self):
return 0
|