File size: 2,124 Bytes
832e0be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""Hugging Face Evaluate metric for Script Fidelity Rate."""

from __future__ import annotations

import datasets
import evaluate

from script_fidelity import compute_corpus_sfr  # noqa: E402


_DESCRIPTION = """
Script Fidelity Rate (SFR) is a reference-free metric for multilingual ASR.
It computes the fraction of countable hypothesis characters that belong to the
expected Unicode script for a target FLEURS language code.
"""

_CITATION = """
@misc{scriptfidelity2026,
  title = {Script Collapse in Multilingual ASR: A Reference-Free Metric and 100-Pair Benchmark},
  year = {2026}
}
"""

_KWARGS_DESCRIPTION = """
Args:
    predictions: List of ASR hypothesis strings.
    language: FLEURS language code or alias, for example "ps_af" or "pashto".
    digit_policy: "count" keeps digits in the denominator. "ignore" treats
        digits as neutral.
    return_details: Return per-example SFR details.

Returns:
    Corpus SFR, percent SFR, empty counts, low/high SFR rates, and dominant
    script counts.
"""


class ScriptFidelityRate(evaluate.Metric):
    """Evaluate community metric wrapper for SFR."""

    def _info(self) -> evaluate.MetricInfo:
        return evaluate.MetricInfo(
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=datasets.Features(
                {
                    "predictions": datasets.Value("string"),
                }
            ),
            reference_urls=[
                "https://huggingface.co/datasets/themechanism/script-fidelity-benchmark"
            ],
        )

    def _compute(
        self,
        predictions: list[str],
        language: str,
        digit_policy: str = "count",
        return_details: bool = False,
    ) -> dict:
        if digit_policy not in {"count", "ignore"}:
            raise ValueError("digit_policy must be 'count' or 'ignore'")
        return compute_corpus_sfr(
            predictions,
            language=language,
            digit_policy=digit_policy,  # type: ignore[arg-type]
            return_details=return_details,
        )