File size: 1,717 Bytes
a37967e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python3
"""De-noised eval: score a synth dir against eval_big.jsonl (36 held-out sentences).
Run in moss-nano-venv. Usage: python assess_big.py --synth-dir m7_eval_big
Pairs with: synth_from_text.py --onnx-dir <m>_onnx --out-dir <m>_eval_big --texts eval_big.jsonl
Reports aggregate zh CER (zh + mix categories) and en WER, plus per-category, for low-noise comparison."""
import argparse, json, sys
ZT = "/home/luigi/jetson-tts/mossnano/zhtw8k"
sys.path.insert(0, ZT)
import xasr_offline as X


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--synth-dir", required=True)
    ap.add_argument("--tag", default="")
    args = ap.parse_args()
    lang = {r["id"]: r["lang"] for r in (json.loads(l) for l in open(f"{ZT}/eval_big.jsonl"))}
    text = {r["id"]: r["text"] for r in (json.loads(l) for l in open(f"{ZT}/eval_big.jsonl"))}
    rows = [json.loads(l) for l in open(f"{args.synth_dir}/synth.jsonl") if l.strip()]
    cat = {"zh": [], "mix": [], "en": []}
    for r in rows:
        hyp = X.asr(r["wav"])
        sc = X.score(text[r["id"]], hyp)
        v = sc if not isinstance(sc, dict) else sc.get("cer", sc.get("wer"))
        cat[lang[r["id"]]].append(v)

    def avg(xs):
        return sum(xs) / len(xs) if xs else float("nan")
    zh_cer = avg(cat["zh"] + cat["mix"])  # CER over zh + code-mix
    en_wer = avg(cat["en"])
    print(f"[{args.tag}] N={len(rows)}  zh-only={avg(cat['zh']):.3f}  mix={avg(cat['mix']):.3f}  "
          f"en={avg(cat['en']):.3f}")
    print(f"[{args.tag}] AGGREGATE  zh_CER(zh+mix)={zh_cer:.3f}  en_WER={en_wer:.3f}  "
          f"(n_zh={len(cat['zh'])} n_mix={len(cat['mix'])} n_en={len(cat['en'])})")


if __name__ == "__main__":
    main()