Spaces:
Sleeping
Sleeping
| """Hugging Face Evaluate metric for Script Fidelity Rate.""" | |
| from __future__ import annotations | |
| import datasets | |
| import evaluate | |
| from script_fidelity import compute_corpus_sfr # noqa: E402 | |
| _DESCRIPTION = """ | |
| Script Fidelity Rate (SFR) is a reference-free metric for multilingual ASR. | |
| It computes the fraction of countable hypothesis characters that belong to the | |
| expected Unicode script for a target FLEURS language code. | |
| """ | |
| _CITATION = """ | |
| @misc{scriptfidelity2026, | |
| title = {Script Collapse in Multilingual ASR: A Reference-Free Metric and 100-Pair Benchmark}, | |
| year = {2026} | |
| } | |
| """ | |
| _KWARGS_DESCRIPTION = """ | |
| Args: | |
| predictions: List of ASR hypothesis strings. | |
| language: FLEURS language code or alias, for example "ps_af" or "pashto". | |
| digit_policy: "count" keeps digits in the denominator. "ignore" treats | |
| digits as neutral. | |
| return_details: Return per-example SFR details. | |
| Returns: | |
| Corpus SFR, percent SFR, empty counts, low/high SFR rates, and dominant | |
| script counts. | |
| """ | |
| class ScriptFidelityRate(evaluate.Metric): | |
| """Evaluate community metric wrapper for SFR.""" | |
| def _info(self) -> evaluate.MetricInfo: | |
| return evaluate.MetricInfo( | |
| description=_DESCRIPTION, | |
| citation=_CITATION, | |
| inputs_description=_KWARGS_DESCRIPTION, | |
| features=datasets.Features( | |
| { | |
| "predictions": datasets.Value("string"), | |
| } | |
| ), | |
| reference_urls=[ | |
| "https://huggingface.co/datasets/themechanism/script-fidelity-benchmark" | |
| ], | |
| ) | |
| def _compute( | |
| self, | |
| predictions: list[str], | |
| language: str, | |
| digit_policy: str = "count", | |
| return_details: bool = False, | |
| ) -> dict: | |
| if digit_policy not in {"count", "ignore"}: | |
| raise ValueError("digit_policy must be 'count' or 'ignore'") | |
| return compute_corpus_sfr( | |
| predictions, | |
| language=language, | |
| digit_policy=digit_policy, # type: ignore[arg-type] | |
| return_details=return_details, | |
| ) | |