Spaces:
Configuration error
Configuration error
| """METEOR (Metric for Evaluation of Translation with Explicit Ordering). | |
| METEOR is part of the standard COCO captioning report alongside BLEU, ROUGE-L, | |
| and CIDEr. It complements BLEU by rewarding semantic matches (synonyms, | |
| stems) rather than only surface n-gram overlap. | |
| Implementation notes: | |
| * We use the ``pycocoevalcap`` METEOR adapter, which shells out to the | |
| original Java implementation. METEOR therefore needs a JRE on PATH at | |
| runtime; the import succeeds either way, the Java process is spawned | |
| lazily on first scoring call. | |
| * METEOR's process is long-lived and accepts batches over stdin/stdout — | |
| a single ``compute_score`` call handles the whole corpus in one round | |
| trip, so this scales to thousands of examples without thrashing the JVM. | |
| """ | |
| from __future__ import annotations | |
| from collections.abc import Sequence | |
| from captioning.evaluation.tokenization import ( | |
| strip_sentinels_many, | |
| strip_sentinels_references, | |
| ) | |
| def corpus_meteor_score( | |
| predictions: Sequence[str], | |
| references: Sequence[Sequence[str]], | |
| ) -> float: | |
| """Compute corpus METEOR via ``pycocoevalcap``. | |
| Args: | |
| predictions: One generated caption per example. | |
| references: One *list* of reference captions per example. | |
| Returns: | |
| Corpus METEOR in the 0-100 range to match the rest of this package. | |
| pycocoevalcap returns 0-1; we multiply by 100 for report parity. | |
| Raises: | |
| ImportError: If ``pycocoevalcap`` is not installed. | |
| ValueError: On mismatched lengths. | |
| RuntimeError: If the Java METEOR process cannot be launched. | |
| """ | |
| if len(predictions) != len(references): | |
| raise ValueError( | |
| f"predictions ({len(predictions)}) and references " | |
| f"({len(references)}) must have the same length" | |
| ) | |
| if not predictions: | |
| return 0.0 | |
| try: | |
| from pycocoevalcap.meteor.meteor import Meteor | |
| except ImportError as e: | |
| raise ImportError( | |
| "pycocoevalcap is required for METEOR evaluation. " | |
| "Install via `pip install -r requirements-eval.txt`." | |
| ) from e | |
| preds = strip_sentinels_many(predictions) | |
| refs = strip_sentinels_references(references) | |
| gts = {str(i): [r for r in ref_list if r] for i, ref_list in enumerate(refs)} | |
| res = {str(i): [p] for i, p in enumerate(preds)} | |
| scorer = Meteor() | |
| try: | |
| score, _ = scorer.compute_score(gts, res) | |
| except Exception as e: # — meteor.py raises bare Exceptions | |
| raise RuntimeError( | |
| "METEOR scoring failed. METEOR requires a Java runtime on PATH. " | |
| f"Underlying error: {e}" | |
| ) from e | |
| return float(100.0 * score) | |