| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import sys |
| import tempfile |
| from pathlib import Path |
| from typing import Any |
|
|
| ROOT_DIR = Path(__file__).resolve().parent.parent |
| if str(ROOT_DIR) not in sys.path: |
| sys.path.insert(0, str(ROOT_DIR)) |
|
|
| from app import main |
| from scripts.verify_voice import DEFAULT_SAMPLE_TEXT, synthesize_voice |
|
|
|
|
| DEFAULT_VOICES = ["silma-local", "habibi-msa", "supertonic-ar", "espeak-ar-clear"] |
| RECOMMENDED_VOICE_ORDER = ["silma-local", "habibi-msa", "supertonic-ar", "espeak-ar-clear", "espeak-ar", "espeak-ar-male"] |
|
|
|
|
| def available_voice_ids() -> list[str]: |
| return list(main.LOCAL_VOICES) |
|
|
|
|
| def benchmark_voice( |
| voice_id: str, |
| text: str, |
| output_dir: Path, |
| audio_format: str = "wav", |
| ) -> dict[str, Any]: |
| output_dir.mkdir(parents=True, exist_ok=True) |
| destination = output_dir / f"{voice_id}.{audio_format}" |
| try: |
| result = synthesize_voice(text, voice_id, destination, audio_format) |
| return {"ok": True, **result} |
| except Exception as exc: |
| return { |
| "ok": False, |
| "voiceId": voice_id, |
| "label": main.get_local_voice(voice_id).get("label", voice_id), |
| "engine": main.get_local_voice(voice_id).get("engine", ""), |
| "error": str(exc), |
| } |
|
|
|
|
| def benchmark_voices( |
| voices: list[str] | None = None, |
| text: str = DEFAULT_SAMPLE_TEXT, |
| output_dir: Path | None = None, |
| audio_format: str = "wav", |
| ) -> dict[str, Any]: |
| selected_voices = voices or DEFAULT_VOICES |
| output = output_dir or (ROOT_DIR / "outputs" / "voice-benchmark") |
| results = [benchmark_voice(voice_id, text, output, audio_format) for voice_id in selected_voices] |
| successful = [item for item in results if item.get("ok")] |
| fastest = min(successful, key=lambda item: item.get("elapsedSeconds", float("inf"))) if successful else None |
| recommended = choose_recommended_voice(successful) |
| return { |
| "voices": selected_voices, |
| "textCharacters": len(main.prepare_text_for_speech(text)), |
| "outputDir": str(output), |
| "audioFormat": audio_format, |
| "results": results, |
| "fastest": fastest, |
| "recommended": recommended, |
| "ready": bool(successful), |
| "nextSteps": build_next_steps(results), |
| } |
|
|
|
|
| def choose_recommended_voice(successful: list[dict[str, Any]]) -> dict[str, Any] | None: |
| if not successful: |
| return None |
| by_voice_id = {str(item.get("voiceId")): item for item in successful} |
| for voice_id in RECOMMENDED_VOICE_ORDER: |
| if voice_id in by_voice_id: |
| return by_voice_id[voice_id] |
| return min(successful, key=lambda item: item.get("elapsedSeconds", float("inf"))) |
|
|
|
|
| def build_next_steps(results: list[dict[str, Any]]) -> list[str]: |
| steps: list[str] = [] |
| successful = [item for item in results if item.get("ok")] |
| failed = [item for item in results if not item.get("ok")] |
| if successful: |
| recommended = choose_recommended_voice(successful) |
| if recommended: |
| steps.append( |
| f"Recommended starting voice: {recommended.get('voiceId')} ({recommended.get('label')}). " |
| "Listen before processing a full book." |
| ) |
| steps.append("Listen to the generated files and choose the most natural Arabic voice before processing a full book.") |
| fastest = min(successful, key=lambda item: item.get("elapsedSeconds", float("inf"))) |
| steps.append(f"Fastest successful voice in this run: {fastest.get('voiceId')} ({fastest.get('elapsedSeconds')}s).") |
| else: |
| steps.append("No tested voice produced audio. Run scripts/preflight_check.py and install SILMA, Habibi, Supertonic, or eSpeak NG.") |
| if any(item.get("voiceId") == "habibi-msa" and not item.get("ok") for item in failed): |
| steps.append("Habibi MSA is optional. Install it with scripts/setup_habibi.ps1 if you want to compare the newer MSA voice.") |
| if any(item.get("voiceId") == "silma-local" and not item.get("ok") for item in failed): |
| steps.append("SILMA is the preferred default voice. Install it with scripts/setup_silma.ps1 for the best current local baseline.") |
| if any(item.get("voiceId") == "supertonic-ar" and not item.get("ok") for item in failed): |
| steps.append("Supertonic 3 is optional. Install it with scripts/setup_supertonic.ps1 when you want a fast CPU Arabic-capable comparison voice.") |
| return steps |
|
|
|
|
| def write_markdown_report(path: Path, result: dict[str, Any]) -> None: |
| path.parent.mkdir(parents=True, exist_ok=True) |
| rating_command = build_rating_command(result) |
| lines = [ |
| "# Arabic Voice Benchmark", |
| "", |
| f"Output directory: `{result.get('outputDir', '')}`", |
| f"Text characters: {result.get('textCharacters', 0)}", |
| f"Audio format: {result.get('audioFormat', 'wav')}", |
| ] |
| recommended = result.get("recommended") |
| fastest = result.get("fastest") |
| if recommended: |
| lines.append(f"Recommended starting voice: `{recommended.get('voiceId')}` ({recommended.get('label', '-')})") |
| if fastest: |
| lines.append(f"Fastest successful voice: `{fastest.get('voiceId')}` ({fastest.get('elapsedSeconds')}s)") |
| lines.extend( |
| [ |
| "", |
| "## Results", |
| "", |
| "| Voice | Label | Engine | Status | Time | Audio | Notes |", |
| "| --- | --- | --- | --- | ---: | --- | --- |", |
| ] |
| ) |
| for item in result.get("results", []): |
| status = "ok" if item.get("ok") else "failed" |
| elapsed = item.get("elapsedSeconds", "-") |
| audio = item.get("path", "-") |
| notes = item.get("error", "") |
| lines.append( |
| f"| {item.get('voiceId', '-')} | {item.get('label', '-')} | {item.get('engine', '-')} | " |
| f"{status} | {elapsed} | {audio} | {notes} |" |
| ) |
| lines.extend(["", "## Next Steps", ""]) |
| for step in result.get("nextSteps", []): |
| lines.append(f"- {step}") |
| lines.extend( |
| [ |
| "", |
| "## Listening Score", |
| "", |
| "After listening, score each successful voice from 1 to 5 for pronunciation, naturalness, pacing, long-listen comfort, and artifact-free audio.", |
| "", |
| "```powershell", |
| rating_command, |
| "```", |
| ] |
| ) |
| path.write_text("\n".join(lines) + "\n", encoding="utf-8") |
|
|
|
|
| def build_rating_command(result: dict[str, Any]) -> str: |
| ratings = [] |
| for item in result.get("results", []): |
| if item.get("ok"): |
| ratings.append(f"--rating {item.get('voiceId')}=5,5,5,5,5") |
| if not ratings: |
| ratings.append("--rating silma-local=5,5,5,5,5") |
| return ( |
| "python scripts\\score_voice_listening.py " |
| + " ".join(ratings) |
| + " --write-report outputs\\voice-listening-score.md" |
| ) |
|
|
|
|
| def print_table(result: dict[str, Any]) -> None: |
| print("voice ok sec engine file") |
| print("---------------- ---- ----- ---------- ----") |
| for item in result["results"]: |
| print( |
| f"{item.get('voiceId', '-'):<16} " |
| f"{str(item.get('ok')):<4} " |
| f"{item.get('elapsedSeconds', '-'):>5} " |
| f"{item.get('engine', '-'):<10} " |
| f"{item.get('path', item.get('error', '-'))}" |
| ) |
| print() |
| for step in result["nextSteps"]: |
| print(f"- {step}") |
|
|
|
|
| def main_cli() -> None: |
| if hasattr(sys.stdout, "reconfigure"): |
| sys.stdout.reconfigure(encoding="utf-8", errors="replace") |
| parser = argparse.ArgumentParser(description="Compare free local Arabic TTS voices on the same text.") |
| parser.add_argument("--voices", nargs="+", default=DEFAULT_VOICES, choices=available_voice_ids()) |
| parser.add_argument("--text", default=DEFAULT_SAMPLE_TEXT, help="Arabic text to synthesize for every voice.") |
| parser.add_argument("--text-file", type=Path, help="Read benchmark text from a UTF-8 file.") |
| parser.add_argument("--out-dir", type=Path, default=ROOT_DIR / "outputs" / "voice-benchmark") |
| parser.add_argument("--format", choices=["wav", "mp3"], default="wav") |
| parser.add_argument("--write-report", type=Path, help="Write a Markdown benchmark report.") |
| parser.add_argument("--json", action="store_true", help="Print JSON instead of a compact table.") |
| args = parser.parse_args() |
|
|
| text = args.text_file.read_text(encoding="utf-8") if args.text_file else args.text |
| result = benchmark_voices(args.voices, text, args.out_dir, args.format) |
| if args.write_report: |
| write_markdown_report(args.write_report, result) |
| if args.json: |
| print(json.dumps(result, ensure_ascii=False, indent=2)) |
| else: |
| print_table(result) |
|
|
|
|
| if __name__ == "__main__": |
| main_cli() |
|
|