PrimeTTS / scripts /assess_big.py
Luigi's picture
PrimeTTS: full training pipeline + weights (fine-tune of Inflect-Nano-v1)
a37967e verified
Raw
History Blame Contribute Delete
1.72 kB
#!/usr/bin/env python3
"""De-noised eval: score a synth dir against eval_big.jsonl (36 held-out sentences).
Run in moss-nano-venv. Usage: python assess_big.py --synth-dir m7_eval_big
Pairs with: synth_from_text.py --onnx-dir <m>_onnx --out-dir <m>_eval_big --texts eval_big.jsonl
Reports aggregate zh CER (zh + mix categories) and en WER, plus per-category, for low-noise comparison."""
import argparse, json, sys
ZT = "/home/luigi/jetson-tts/mossnano/zhtw8k"
sys.path.insert(0, ZT)
import xasr_offline as X
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--synth-dir", required=True)
ap.add_argument("--tag", default="")
args = ap.parse_args()
lang = {r["id"]: r["lang"] for r in (json.loads(l) for l in open(f"{ZT}/eval_big.jsonl"))}
text = {r["id"]: r["text"] for r in (json.loads(l) for l in open(f"{ZT}/eval_big.jsonl"))}
rows = [json.loads(l) for l in open(f"{args.synth_dir}/synth.jsonl") if l.strip()]
cat = {"zh": [], "mix": [], "en": []}
for r in rows:
hyp = X.asr(r["wav"])
sc = X.score(text[r["id"]], hyp)
v = sc if not isinstance(sc, dict) else sc.get("cer", sc.get("wer"))
cat[lang[r["id"]]].append(v)
def avg(xs):
return sum(xs) / len(xs) if xs else float("nan")
zh_cer = avg(cat["zh"] + cat["mix"]) # CER over zh + code-mix
en_wer = avg(cat["en"])
print(f"[{args.tag}] N={len(rows)} zh-only={avg(cat['zh']):.3f} mix={avg(cat['mix']):.3f} "
f"en={avg(cat['en']):.3f}")
print(f"[{args.tag}] AGGREGATE zh_CER(zh+mix)={zh_cer:.3f} en_WER={en_wer:.3f} "
f"(n_zh={len(cat['zh'])} n_mix={len(cat['mix'])} n_en={len(cat['en'])})")
if __name__ == "__main__":
main()