"""Hugging Face Evaluate metric for Script Fidelity Rate.""" from __future__ import annotations import datasets import evaluate from script_fidelity import compute_corpus_sfr # noqa: E402 _DESCRIPTION = """ Script Fidelity Rate (SFR) is a reference-free metric for multilingual ASR. It computes the fraction of countable hypothesis characters that belong to the expected Unicode script for a target FLEURS language code. """ _CITATION = """ @misc{scriptfidelity2026, title = {Script Collapse in Multilingual ASR: A Reference-Free Metric and 100-Pair Benchmark}, year = {2026} } """ _KWARGS_DESCRIPTION = """ Args: predictions: List of ASR hypothesis strings. language: FLEURS language code or alias, for example "ps_af" or "pashto". digit_policy: "count" keeps digits in the denominator. "ignore" treats digits as neutral. return_details: Return per-example SFR details. Returns: Corpus SFR, percent SFR, empty counts, low/high SFR rates, and dominant script counts. """ class ScriptFidelityRate(evaluate.Metric): """Evaluate community metric wrapper for SFR.""" def _info(self) -> evaluate.MetricInfo: return evaluate.MetricInfo( description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, features=datasets.Features( { "predictions": datasets.Value("string"), } ), reference_urls=[ "https://huggingface.co/datasets/themechanism/script-fidelity-benchmark" ], ) def _compute( self, predictions: list[str], language: str, digit_policy: str = "count", return_details: bool = False, ) -> dict: if digit_policy not in {"count", "ignore"}: raise ValueError("digit_policy must be 'count' or 'ignore'") return compute_corpus_sfr( predictions, language=language, digit_policy=digit_policy, # type: ignore[arg-type] return_details=return_details, )