#!/usr/bin/env python3 """De-noised eval: score a synth dir against eval_big.jsonl (36 held-out sentences). Run in moss-nano-venv. Usage: python assess_big.py --synth-dir m7_eval_big Pairs with: synth_from_text.py --onnx-dir _onnx --out-dir _eval_big --texts eval_big.jsonl Reports aggregate zh CER (zh + mix categories) and en WER, plus per-category, for low-noise comparison.""" import argparse, json, sys ZT = "/home/luigi/jetson-tts/mossnano/zhtw8k" sys.path.insert(0, ZT) import xasr_offline as X def main(): ap = argparse.ArgumentParser() ap.add_argument("--synth-dir", required=True) ap.add_argument("--tag", default="") args = ap.parse_args() lang = {r["id"]: r["lang"] for r in (json.loads(l) for l in open(f"{ZT}/eval_big.jsonl"))} text = {r["id"]: r["text"] for r in (json.loads(l) for l in open(f"{ZT}/eval_big.jsonl"))} rows = [json.loads(l) for l in open(f"{args.synth_dir}/synth.jsonl") if l.strip()] cat = {"zh": [], "mix": [], "en": []} for r in rows: hyp = X.asr(r["wav"]) sc = X.score(text[r["id"]], hyp) v = sc if not isinstance(sc, dict) else sc.get("cer", sc.get("wer")) cat[lang[r["id"]]].append(v) def avg(xs): return sum(xs) / len(xs) if xs else float("nan") zh_cer = avg(cat["zh"] + cat["mix"]) # CER over zh + code-mix en_wer = avg(cat["en"]) print(f"[{args.tag}] N={len(rows)} zh-only={avg(cat['zh']):.3f} mix={avg(cat['mix']):.3f} " f"en={avg(cat['en']):.3f}") print(f"[{args.tag}] AGGREGATE zh_CER(zh+mix)={zh_cer:.3f} en_WER={en_wer:.3f} " f"(n_zh={len(cat['zh'])} n_mix={len(cat['mix'])} n_en={len(cat['en'])})") if __name__ == "__main__": main()