Spaces:

davebulaval
/

meaningbert

Runtime error

App Files Files Community

davebulaval commited on Aug 1, 2024

Commit

7217d6a

1 Parent(s): 1982c24

uniformization of interface and add .to for tokenizer output

Browse files

Files changed (1) hide show

meaningbert.py +14 -16

meaningbert.py CHANGED Viewed

@@ -64,8 +64,8 @@ _KWARGS_DESCRIPTION = """
 MeaningBERT metric for assessing meaning preservation between sentences.
 Args:
-    documents (list of str): Document sentences.
-    simplifications (list of str): Simplification sentences (same number of element as documents).
     device (str): Device to use for model inference. By default, set to "cuda".
 Returns:
@@ -75,10 +75,10 @@ Returns:
 Examples:
-    >>> documents = ["hello there", "general kenobi"]
-    >>> simplifications = ["hello there", "general kenobi"]
     >>> meaning_bert = evaluate.load("davebulaval/meaningbert", device="cuda:0")
-    >>> results = meaning_bert.compute(documents=documents, simplifications=simplifications)
 """
 _HASH = "21845c0cc85a2e8e16c89bb0053f489095cf64c5b19e9c3865d3e10047aba51b"
@@ -110,19 +110,17 @@ class MeaningBERT(evaluate.Metric):
     def _compute(
         self,
-        documents: List,
-        simplifications: List,
         device: str = "cuda",
     ) -> Dict:
-        assert len(documents) == len(
-            simplifications
-        ), "The number of document is different of the number of simplifications."
         hashcode = _HASH
         # Index of sentence with perfect match between two sentences
-        matching_index = [
-            i for i, item in enumerate(documents) if item in simplifications
-        ]
         # We load the MeaningBERT pretrained model
         scorer = AutoModelForSequenceClassification.from_pretrained(
@@ -135,12 +133,12 @@ class MeaningBERT(evaluate.Metric):
         # We tokenize the text as a pair and return Pytorch Tensors
         tokenize_text = tokenizer(
-            documents,
-            simplifications,
             truncation=True,
             padding=True,
             return_tensors="pt",
-        )
         with filter_logging_context():
             # We process the text

 MeaningBERT metric for assessing meaning preservation between sentences.
 Args:
+    references (list of str): References sentences.
+    predictions (list of str): Predictions sentences (same number of element as documents).
     device (str): Device to use for model inference. By default, set to "cuda".
 Returns:
 Examples:
+    >>> references = ["hello there", "general kenobi"]
+    >>> predictions = ["hello there", "general kenobi"]
     >>> meaning_bert = evaluate.load("davebulaval/meaningbert", device="cuda:0")
+    >>> results = meaning_bert.compute(references=references, predictions=predictions)
 """
 _HASH = "21845c0cc85a2e8e16c89bb0053f489095cf64c5b19e9c3865d3e10047aba51b"
     def _compute(
         self,
+        references: List,
+        predictions: List,
         device: str = "cuda",
     ) -> Dict:
+        assert len(references) == len(
+            predictions
+        ), "The number of references is different of the number of predictions."
         hashcode = _HASH
         # Index of sentence with perfect match between two sentences
+        matching_index = [i for i, item in enumerate(references) if item in predictions]
         # We load the MeaningBERT pretrained model
         scorer = AutoModelForSequenceClassification.from_pretrained(
         # We tokenize the text as a pair and return Pytorch Tensors
         tokenize_text = tokenizer(
+            references,
+            predictions,
             truncation=True,
             padding=True,
             return_tensors="pt",
+        ).to(device)
         with filter_logging_context():
             # We process the text