Spaces:

Syncre
/

arabic-audio-reader-worker

Running

App Files Files Community

arabic-audio-reader-worker / scripts /benchmark_voices.py

Syncre

Deploy Arabic Audio Reader worker

2e1a095 verified 1 day ago

raw

history blame contribute delete

8.81 kB

	from __future__ import annotations

	import argparse
	import json
	import sys
	import tempfile
	from pathlib import Path
	from typing import Any

	ROOT_DIR = Path(__file__).resolve().parent.parent
	if str(ROOT_DIR) not in sys.path:
	sys.path.insert(0, str(ROOT_DIR))

	from app import main
	from scripts.verify_voice import DEFAULT_SAMPLE_TEXT, synthesize_voice


	DEFAULT_VOICES = ["silma-local", "habibi-msa", "supertonic-ar", "espeak-ar-clear"]
	RECOMMENDED_VOICE_ORDER = ["silma-local", "habibi-msa", "supertonic-ar", "espeak-ar-clear", "espeak-ar", "espeak-ar-male"]


	def available_voice_ids() -> list[str]:
	return list(main.LOCAL_VOICES)


	def benchmark_voice(
	voice_id: str,
	text: str,
	output_dir: Path,
	audio_format: str = "wav",
	) -> dict[str, Any]:
	output_dir.mkdir(parents=True, exist_ok=True)
	destination = output_dir / f"{voice_id}.{audio_format}"
	try:
	result = synthesize_voice(text, voice_id, destination, audio_format)
	return {"ok": True, **result}
	except Exception as exc:
	return {
	"ok": False,
	"voiceId": voice_id,
	"label": main.get_local_voice(voice_id).get("label", voice_id),
	"engine": main.get_local_voice(voice_id).get("engine", ""),
	"error": str(exc),
	}


	def benchmark_voices(
	voices: list[str] \| None = None,
	text: str = DEFAULT_SAMPLE_TEXT,
	output_dir: Path \| None = None,
	audio_format: str = "wav",
	) -> dict[str, Any]:
	selected_voices = voices or DEFAULT_VOICES
	output = output_dir or (ROOT_DIR / "outputs" / "voice-benchmark")
	results = [benchmark_voice(voice_id, text, output, audio_format) for voice_id in selected_voices]
	successful = [item for item in results if item.get("ok")]
	fastest = min(successful, key=lambda item: item.get("elapsedSeconds", float("inf"))) if successful else None
	recommended = choose_recommended_voice(successful)
	return {
	"voices": selected_voices,
	"textCharacters": len(main.prepare_text_for_speech(text)),
	"outputDir": str(output),
	"audioFormat": audio_format,
	"results": results,
	"fastest": fastest,
	"recommended": recommended,
	"ready": bool(successful),
	"nextSteps": build_next_steps(results),
	}


	def choose_recommended_voice(successful: list[dict[str, Any]]) -> dict[str, Any] \| None:
	if not successful:
	return None
	by_voice_id = {str(item.get("voiceId")): item for item in successful}
	for voice_id in RECOMMENDED_VOICE_ORDER:
	if voice_id in by_voice_id:
	return by_voice_id[voice_id]
	return min(successful, key=lambda item: item.get("elapsedSeconds", float("inf")))


	def build_next_steps(results: list[dict[str, Any]]) -> list[str]:
	steps: list[str] = []
	successful = [item for item in results if item.get("ok")]
	failed = [item for item in results if not item.get("ok")]
	if successful:
	recommended = choose_recommended_voice(successful)
	if recommended:
	steps.append(
	f"Recommended starting voice: {recommended.get('voiceId')} ({recommended.get('label')}). "
	"Listen before processing a full book."
	)
	steps.append("Listen to the generated files and choose the most natural Arabic voice before processing a full book.")
	fastest = min(successful, key=lambda item: item.get("elapsedSeconds", float("inf")))
	steps.append(f"Fastest successful voice in this run: {fastest.get('voiceId')} ({fastest.get('elapsedSeconds')}s).")
	else:
	steps.append("No tested voice produced audio. Run scripts/preflight_check.py and install SILMA, Habibi, Supertonic, or eSpeak NG.")
	if any(item.get("voiceId") == "habibi-msa" and not item.get("ok") for item in failed):
	steps.append("Habibi MSA is optional. Install it with scripts/setup_habibi.ps1 if you want to compare the newer MSA voice.")
	if any(item.get("voiceId") == "silma-local" and not item.get("ok") for item in failed):
	steps.append("SILMA is the preferred default voice. Install it with scripts/setup_silma.ps1 for the best current local baseline.")
	if any(item.get("voiceId") == "supertonic-ar" and not item.get("ok") for item in failed):
	steps.append("Supertonic 3 is optional. Install it with scripts/setup_supertonic.ps1 when you want a fast CPU Arabic-capable comparison voice.")
	return steps


	def write_markdown_report(path: Path, result: dict[str, Any]) -> None:
	path.parent.mkdir(parents=True, exist_ok=True)
	rating_command = build_rating_command(result)
	lines = [
	"# Arabic Voice Benchmark",
	"",
	f"Output directory: `{result.get('outputDir', '')}`",
	f"Text characters: {result.get('textCharacters', 0)}",
	f"Audio format: {result.get('audioFormat', 'wav')}",
	]
	recommended = result.get("recommended")
	fastest = result.get("fastest")
	if recommended:
	lines.append(f"Recommended starting voice: `{recommended.get('voiceId')}` ({recommended.get('label', '-')})")
	if fastest:
	lines.append(f"Fastest successful voice: `{fastest.get('voiceId')}` ({fastest.get('elapsedSeconds')}s)")
	lines.extend(
	[
	"",
	"## Results",
	"",
	"\| Voice \| Label \| Engine \| Status \| Time \| Audio \| Notes \|",
	"\| --- \| --- \| --- \| --- \| ---: \| --- \| --- \|",
	]
	)
	for item in result.get("results", []):
	status = "ok" if item.get("ok") else "failed"
	elapsed = item.get("elapsedSeconds", "-")
	audio = item.get("path", "-")
	notes = item.get("error", "")
	lines.append(
	f"\| {item.get('voiceId', '-')} \| {item.get('label', '-')} \| {item.get('engine', '-')} \| "
	f"{status} \| {elapsed} \| {audio} \| {notes} \|"
	)
	lines.extend(["", "## Next Steps", ""])
	for step in result.get("nextSteps", []):
	lines.append(f"- {step}")
	lines.extend(
	[
	"",
	"## Listening Score",
	"",
	"After listening, score each successful voice from 1 to 5 for pronunciation, naturalness, pacing, long-listen comfort, and artifact-free audio.",
	"",
	"```powershell",
	rating_command,
	"```",
	]
	)
	path.write_text("\n".join(lines) + "\n", encoding="utf-8")


	def build_rating_command(result: dict[str, Any]) -> str:
	ratings = []
	for item in result.get("results", []):
	if item.get("ok"):
	ratings.append(f"--rating {item.get('voiceId')}=5,5,5,5,5")
	if not ratings:
	ratings.append("--rating silma-local=5,5,5,5,5")
	return (
	"python scripts\\score_voice_listening.py "
	+ " ".join(ratings)
	+ " --write-report outputs\\voice-listening-score.md"
	)


	def print_table(result: dict[str, Any]) -> None:
	print("voice ok sec engine file")
	print("---------------- ---- ----- ---------- ----")
	for item in result["results"]:
	print(
	f"{item.get('voiceId', '-'):<16} "
	f"{str(item.get('ok')):<4} "
	f"{item.get('elapsedSeconds', '-'):>5} "
	f"{item.get('engine', '-'):<10} "
	f"{item.get('path', item.get('error', '-'))}"
	)
	print()
	for step in result["nextSteps"]:
	print(f"- {step}")


	def main_cli() -> None:
	if hasattr(sys.stdout, "reconfigure"):
	sys.stdout.reconfigure(encoding="utf-8", errors="replace")
	parser = argparse.ArgumentParser(description="Compare free local Arabic TTS voices on the same text.")
	parser.add_argument("--voices", nargs="+", default=DEFAULT_VOICES, choices=available_voice_ids())
	parser.add_argument("--text", default=DEFAULT_SAMPLE_TEXT, help="Arabic text to synthesize for every voice.")
	parser.add_argument("--text-file", type=Path, help="Read benchmark text from a UTF-8 file.")
	parser.add_argument("--out-dir", type=Path, default=ROOT_DIR / "outputs" / "voice-benchmark")
	parser.add_argument("--format", choices=["wav", "mp3"], default="wav")
	parser.add_argument("--write-report", type=Path, help="Write a Markdown benchmark report.")
	parser.add_argument("--json", action="store_true", help="Print JSON instead of a compact table.")
	args = parser.parse_args()

	text = args.text_file.read_text(encoding="utf-8") if args.text_file else args.text
	result = benchmark_voices(args.voices, text, args.out_dir, args.format)
	if args.write_report:
	write_markdown_report(args.write_report, result)
	if args.json:
	print(json.dumps(result, ensure_ascii=False, indent=2))
	else:
	print_table(result)


	if __name__ == "__main__":
	main_cli()