arabic-audio-reader-worker / scripts /benchmark_voices.py
Syncre's picture
Deploy Arabic Audio Reader worker
2e1a095 verified
from __future__ import annotations
import argparse
import json
import sys
import tempfile
from pathlib import Path
from typing import Any
ROOT_DIR = Path(__file__).resolve().parent.parent
if str(ROOT_DIR) not in sys.path:
sys.path.insert(0, str(ROOT_DIR))
from app import main
from scripts.verify_voice import DEFAULT_SAMPLE_TEXT, synthesize_voice
DEFAULT_VOICES = ["silma-local", "habibi-msa", "supertonic-ar", "espeak-ar-clear"]
RECOMMENDED_VOICE_ORDER = ["silma-local", "habibi-msa", "supertonic-ar", "espeak-ar-clear", "espeak-ar", "espeak-ar-male"]
def available_voice_ids() -> list[str]:
return list(main.LOCAL_VOICES)
def benchmark_voice(
voice_id: str,
text: str,
output_dir: Path,
audio_format: str = "wav",
) -> dict[str, Any]:
output_dir.mkdir(parents=True, exist_ok=True)
destination = output_dir / f"{voice_id}.{audio_format}"
try:
result = synthesize_voice(text, voice_id, destination, audio_format)
return {"ok": True, **result}
except Exception as exc:
return {
"ok": False,
"voiceId": voice_id,
"label": main.get_local_voice(voice_id).get("label", voice_id),
"engine": main.get_local_voice(voice_id).get("engine", ""),
"error": str(exc),
}
def benchmark_voices(
voices: list[str] | None = None,
text: str = DEFAULT_SAMPLE_TEXT,
output_dir: Path | None = None,
audio_format: str = "wav",
) -> dict[str, Any]:
selected_voices = voices or DEFAULT_VOICES
output = output_dir or (ROOT_DIR / "outputs" / "voice-benchmark")
results = [benchmark_voice(voice_id, text, output, audio_format) for voice_id in selected_voices]
successful = [item for item in results if item.get("ok")]
fastest = min(successful, key=lambda item: item.get("elapsedSeconds", float("inf"))) if successful else None
recommended = choose_recommended_voice(successful)
return {
"voices": selected_voices,
"textCharacters": len(main.prepare_text_for_speech(text)),
"outputDir": str(output),
"audioFormat": audio_format,
"results": results,
"fastest": fastest,
"recommended": recommended,
"ready": bool(successful),
"nextSteps": build_next_steps(results),
}
def choose_recommended_voice(successful: list[dict[str, Any]]) -> dict[str, Any] | None:
if not successful:
return None
by_voice_id = {str(item.get("voiceId")): item for item in successful}
for voice_id in RECOMMENDED_VOICE_ORDER:
if voice_id in by_voice_id:
return by_voice_id[voice_id]
return min(successful, key=lambda item: item.get("elapsedSeconds", float("inf")))
def build_next_steps(results: list[dict[str, Any]]) -> list[str]:
steps: list[str] = []
successful = [item for item in results if item.get("ok")]
failed = [item for item in results if not item.get("ok")]
if successful:
recommended = choose_recommended_voice(successful)
if recommended:
steps.append(
f"Recommended starting voice: {recommended.get('voiceId')} ({recommended.get('label')}). "
"Listen before processing a full book."
)
steps.append("Listen to the generated files and choose the most natural Arabic voice before processing a full book.")
fastest = min(successful, key=lambda item: item.get("elapsedSeconds", float("inf")))
steps.append(f"Fastest successful voice in this run: {fastest.get('voiceId')} ({fastest.get('elapsedSeconds')}s).")
else:
steps.append("No tested voice produced audio. Run scripts/preflight_check.py and install SILMA, Habibi, Supertonic, or eSpeak NG.")
if any(item.get("voiceId") == "habibi-msa" and not item.get("ok") for item in failed):
steps.append("Habibi MSA is optional. Install it with scripts/setup_habibi.ps1 if you want to compare the newer MSA voice.")
if any(item.get("voiceId") == "silma-local" and not item.get("ok") for item in failed):
steps.append("SILMA is the preferred default voice. Install it with scripts/setup_silma.ps1 for the best current local baseline.")
if any(item.get("voiceId") == "supertonic-ar" and not item.get("ok") for item in failed):
steps.append("Supertonic 3 is optional. Install it with scripts/setup_supertonic.ps1 when you want a fast CPU Arabic-capable comparison voice.")
return steps
def write_markdown_report(path: Path, result: dict[str, Any]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
rating_command = build_rating_command(result)
lines = [
"# Arabic Voice Benchmark",
"",
f"Output directory: `{result.get('outputDir', '')}`",
f"Text characters: {result.get('textCharacters', 0)}",
f"Audio format: {result.get('audioFormat', 'wav')}",
]
recommended = result.get("recommended")
fastest = result.get("fastest")
if recommended:
lines.append(f"Recommended starting voice: `{recommended.get('voiceId')}` ({recommended.get('label', '-')})")
if fastest:
lines.append(f"Fastest successful voice: `{fastest.get('voiceId')}` ({fastest.get('elapsedSeconds')}s)")
lines.extend(
[
"",
"## Results",
"",
"| Voice | Label | Engine | Status | Time | Audio | Notes |",
"| --- | --- | --- | --- | ---: | --- | --- |",
]
)
for item in result.get("results", []):
status = "ok" if item.get("ok") else "failed"
elapsed = item.get("elapsedSeconds", "-")
audio = item.get("path", "-")
notes = item.get("error", "")
lines.append(
f"| {item.get('voiceId', '-')} | {item.get('label', '-')} | {item.get('engine', '-')} | "
f"{status} | {elapsed} | {audio} | {notes} |"
)
lines.extend(["", "## Next Steps", ""])
for step in result.get("nextSteps", []):
lines.append(f"- {step}")
lines.extend(
[
"",
"## Listening Score",
"",
"After listening, score each successful voice from 1 to 5 for pronunciation, naturalness, pacing, long-listen comfort, and artifact-free audio.",
"",
"```powershell",
rating_command,
"```",
]
)
path.write_text("\n".join(lines) + "\n", encoding="utf-8")
def build_rating_command(result: dict[str, Any]) -> str:
ratings = []
for item in result.get("results", []):
if item.get("ok"):
ratings.append(f"--rating {item.get('voiceId')}=5,5,5,5,5")
if not ratings:
ratings.append("--rating silma-local=5,5,5,5,5")
return (
"python scripts\\score_voice_listening.py "
+ " ".join(ratings)
+ " --write-report outputs\\voice-listening-score.md"
)
def print_table(result: dict[str, Any]) -> None:
print("voice ok sec engine file")
print("---------------- ---- ----- ---------- ----")
for item in result["results"]:
print(
f"{item.get('voiceId', '-'):<16} "
f"{str(item.get('ok')):<4} "
f"{item.get('elapsedSeconds', '-'):>5} "
f"{item.get('engine', '-'):<10} "
f"{item.get('path', item.get('error', '-'))}"
)
print()
for step in result["nextSteps"]:
print(f"- {step}")
def main_cli() -> None:
if hasattr(sys.stdout, "reconfigure"):
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
parser = argparse.ArgumentParser(description="Compare free local Arabic TTS voices on the same text.")
parser.add_argument("--voices", nargs="+", default=DEFAULT_VOICES, choices=available_voice_ids())
parser.add_argument("--text", default=DEFAULT_SAMPLE_TEXT, help="Arabic text to synthesize for every voice.")
parser.add_argument("--text-file", type=Path, help="Read benchmark text from a UTF-8 file.")
parser.add_argument("--out-dir", type=Path, default=ROOT_DIR / "outputs" / "voice-benchmark")
parser.add_argument("--format", choices=["wav", "mp3"], default="wav")
parser.add_argument("--write-report", type=Path, help="Write a Markdown benchmark report.")
parser.add_argument("--json", action="store_true", help="Print JSON instead of a compact table.")
args = parser.parse_args()
text = args.text_file.read_text(encoding="utf-8") if args.text_file else args.text
result = benchmark_voices(args.voices, text, args.out_dir, args.format)
if args.write_report:
write_markdown_report(args.write_report, result)
if args.json:
print(json.dumps(result, ensure_ascii=False, indent=2))
else:
print_table(result)
if __name__ == "__main__":
main_cli()