blimp

Running

yu-val-weiss commited on Mar 13

Commit

eb6c7b0

1 Parent(s): 17ddf40

Update blimp.py

Files changed (1) hide show

blimp.py CHANGED Viewed

@@ -14,6 +14,7 @@
 """Blimp Metric."""
 from collections import defaultdict
 import datasets
 import evaluate
@@ -123,7 +124,7 @@ Args:
     predictions (list[str]): names of metrics to run. pass empty list or ["*"] to run all of them
     batch_size (int): the batch size to run texts through the model. Defaults to 16.
     device (str): device to run on, defaults to 'cuda' when available.
-    samples_per_set (int): the number of samples per phenomenon, defaults to 1_000.
 Returns:
     blimp: dictionary containing the blimp scores for each of the 67 sub-datasets, as well as the overall accuracy.
@@ -156,7 +157,7 @@ class Blimp(evaluate.Metric):
         predictions=None,
         batch_size: int = 16,
         device=None,
-        samples_per_set: int = 1_000,
     ):
         if device is not None:
             assert device in ["gpu", "cpu", "cuda", "mps"], (
@@ -171,6 +172,9 @@ class Blimp(evaluate.Metric):
                 else ("mps" if torch.mps.is_available() else "cpu")
             )
         model = AutoModelForCausalLM.from_pretrained(model_id)
         model = model.to(device)
         model.eval()

 """Blimp Metric."""
 from collections import defaultdict
+from typing import Optional
 import datasets
 import evaluate
     predictions (list[str]): names of metrics to run. pass empty list or ["*"] to run all of them
     batch_size (int): the batch size to run texts through the model. Defaults to 16.
     device (str): device to run on, defaults to 'cuda' when available.
+    samples_per_set (Optional[int]): the number of samples per phenomenon. Max is 1,000 (but will not error if higher value given.) If None, defaults to 1000.
 Returns:
     blimp: dictionary containing the blimp scores for each of the 67 sub-datasets, as well as the overall accuracy.
         predictions=None,
         batch_size: int = 16,
         device=None,
+        samples_per_set: Optional[int] = None,
     ):
         if device is not None:
             assert device in ["gpu", "cpu", "cuda", "mps"], (
                 else ("mps" if torch.mps.is_available() else "cpu")
             )
+        if samples_per_set is None or samples_per_set <= 0:
+            samples_per_set = 1000
         model = AutoModelForCausalLM.from_pretrained(model_id)
         model = model.to(device)
         model.eval()