Spaces:

Navid-AI
/

Arabic-TTS-Arena

Running

App Files Files Community

Arabic-TTS-Arena / app.py

MohamedRashad

Update app.py

b5cd1cb verified 1 day ago

raw

history blame contribute delete

46.9 kB

	import gradio as gr
	import modal
	import base64
	import random
	import json
	import uuid
	from pathlib import Path
	from typing import Optional
	from examples import SAMPLE_SENTENCES

	APP_NAME = "arabic-tts-arena"

	LEADERBOARD_FILE = Path(__file__).parent / "leaderboard.json"

	MAX_SYNTHESIS_RETRIES = 2 # per-model retry cap before giving up
	MIN_BATTLES = 45 # minimum battles for a model to appear on the leaderboard (to avoid unjust rankings for new models with few votes)

	_AVAILABLE_MODELS_CACHE: dict[str, dict[str, str]] \| None = None


	def _fetch_model_registry() -> dict[str, dict[str, str]]:
	"""Fetch the model registry from the Modal backend.

	Returns dict like:
	{"chatterbox": {"class_name": "ChatterboxModel", "display_name": "Chatterbox"}, ...}
	"""
	service = modal.Cls.from_name(APP_NAME, "ArenaService")
	registry = service().get_model_registry.remote()
	if registry:
	print(f"✅ Fetched {len(registry)} models from Modal backend")
	return registry
	raise RuntimeError("Failed to fetch model registry from Modal backend")


	def _get_available_models() -> dict[str, dict[str, str]]:
	"""Lazy-load the model registry on first use (avoids crashing at import time)."""
	global _AVAILABLE_MODELS_CACHE
	if _AVAILABLE_MODELS_CACHE is None:
	print("⏳ Fetching model registry from Modal backend...")
	_AVAILABLE_MODELS_CACHE = _fetch_model_registry()
	print(f"✅ Available models: {', '.join(_AVAILABLE_MODELS_CACHE.keys())}")
	return _AVAILABLE_MODELS_CACHE


	def get_model_cls(model_id: str):
	"""Get a Modal class by model_id using the registered class name."""
	available = _get_available_models()
	if model_id not in available:
	raise ValueError(f"Model not available: {model_id}")
	class_name = available[model_id]["class_name"]
	return modal.Cls.from_name(APP_NAME, class_name)


	def get_display_name(model_id: str) -> str:
	"""Get the human-readable display name for a model."""
	available = _get_available_models()
	if model_id in available:
	return available[model_id].get("display_name", model_id)
	return model_id


	def get_arena_service():
	"""Get ArenaService class for voting operations."""
	return modal.Cls.from_name(APP_NAME, "ArenaService")


	HEADER_MD = """
	<div style="text-align: center; max-width: 700px; margin: 0 auto;">
	<h1 style="font-size: 2.2em; margin-bottom: 0.2em;"> Arabic TTS Arena</h1>
	<p style="font-size: 1.1em; color: #666; margin-top: 0;">
	Compare Arabic text‑to‑speech models side by side.<br>
	Listen, vote, and help build the community leaderboard.
	</p>
	<p style="font-size: 0.85em; margin-top: 0.3em;">
	<a href="https://huggingface.co/blog/Navid-AI/introducing-arabic-tts-arena" target="_blank" style="color: #10b981; text-decoration: none;">Blog post</a>
	·
	<a href="https://github.com/Navid-Gen-AI/arabic-tts-arena" target="_blank" style="color: #10b981; text-decoration: none;">GitHub</a>
	</p>
	</div>
	"""

	HOW_IT_WORKS_MD = """
	<div style="text-align: center; color: #888; font-size: 0.9em; margin-bottom: 0.5em;">
	<strong>How it works:</strong>
	Enter Arabic text → Listen to two anonymous models → Vote for the better one
	</div>
	"""

	# Leaderboard header removed — metadata is now rendered inline by refresh_leaderboard()

	ABOUT_MD = """
	<style>
	.about-wrap {
	max-width: 680px; margin: 0 auto; padding: 0.5em 0 2em 0;
	font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
	line-height: 1.7; color: var(--body-text-color);
	}
	.about-wrap h2 {
	font-size: 1.6em; font-weight: 700; margin: 0 0 0.3em 0;
	letter-spacing: -0.01em;
	}
	.about-wrap h3 {
	font-size: 1.15em; font-weight: 700; margin: 1.6em 0 0.5em 0;
	letter-spacing: -0.01em;
	}
	.about-wrap p, .about-wrap li {
	font-size: 0.95em; color: #ccc;
	}
	.about-wrap ol { padding-left: 1.4em; }
	.about-wrap ol li { margin-bottom: 0.35em; }
	.about-wrap a {
	color: #10b981; text-decoration: none;
	}
	.about-wrap a:hover { text-decoration: underline; }
	.about-wrap strong { color: var(--body-text-color); }

	/* News / Updates section */
	.news-section {
	border: 1px solid var(--border-color-primary);
	border-radius: 10px;
	padding: 1em 1.3em;
	margin-bottom: 1.8em;
	background: rgba(16,185,129,0.04);
	}
	.news-section h3 {
	margin: 0 0 0.6em 0 !important; font-size: 1.05em;
	}
	.news-item {
	display: flex; gap: 0.8em; align-items: baseline;
	margin-bottom: 0.4em; font-size: 0.9em;
	}
	.news-date {
	flex-shrink: 0; font-size: 0.82em; font-weight: 600;
	color: #10b981; white-space: nowrap;
	font-variant-numeric: tabular-nums;
	}
	.news-text { color: #ccc; }

	/* Shortcuts / Voting tables */
	.about-table {
	width: 100%; border-collapse: collapse; margin: 0.5em 0 0.8em 0;
	font-size: 0.9em;
	}
	.about-table th {
	text-align: left; padding: 0.5em 0.8em;
	border-bottom: 1px solid var(--border-color-primary);
	font-weight: 600; font-size: 0.85em;
	text-transform: uppercase; letter-spacing: 0.04em;
	color: var(--body-text-color-subdued, #888);
	}
	.about-table td {
	padding: 0.5em 0.8em;
	border-bottom: 1px solid rgba(255,255,255,0.05);
	color: #ccc;
	}
	.about-table td:first-child { font-weight: 600; color: var(--body-text-color); }
	.about-table kbd {
	display: inline-block; padding: 0.15em 0.5em;
	border-radius: 4px; font-size: 0.9em; font-family: monospace;
	background: rgba(255,255,255,0.08);
	border: 1px solid rgba(255,255,255,0.12);
	color: var(--body-text-color);
	}

	/* Contribute card */
	.contribute-card {
	border: 1px solid var(--border-color-primary);
	border-radius: 10px;
	padding: 1.2em 1.4em;
	margin-top: 0.5em;
	background: rgba(255,255,255,0.02);
	}
	.contribute-card p { margin-bottom: 0.6em; }
	.contribute-card code {
	padding: 0.15em 0.45em; border-radius: 4px;
	font-size: 0.88em;
	background: rgba(255,255,255,0.08);
	color: #10b981;
	}
	.contribute-steps { display: flex; flex-direction: column; gap: 10px; }
	.contribute-step {
	display: flex; align-items: center; gap: 12px;
	font-size: 0.93em; color: #ccc;
	}
	.step-num {
	flex-shrink: 0;
	width: 28px; height: 28px;
	display: flex; align-items: center; justify-content: center;
	border-radius: 50%;
	font-weight: 700; font-size: 0.85em;
	background: rgba(16,185,129,0.15);
	color: #10b981;
	}
	</style>

	<div class="about-wrap">

	<div class="news-section">
	<h3>📢 Latest Updates</h3>
	<div class="news-item"><span class="news-date">Mar 17, 2026</span><span class="news-text">✨ Added Latency to Leaderboard based on feedback from <a href="https://www.linkedin.com/in/hazem-abdelazim-95153b72/" target="_blank">Dr. Hazem Abdelazim</a></span></div>
	<div class="news-item"><span class="news-date">Mar 12, 2026</span><span class="news-text">🎉 Arena launched with 12 Arabic TTS models — <a href="https://huggingface.co/blog/Navid-AI/introducing-arabic-tts-arena" target="_blank">read the blog post</a></span></div>
	<div class="news-item"><span class="news-date" style="opacity:0;">—</span><span class="news-text">🤝 Have a model that should be here? <a href="https://github.com/Navid-Gen-AI/arabic-tts-arena" target="_blank">Open a PR</a> — we'd love to welcome it in.</span></div>
	</div>

	<h2>Why We Built This</h2>

	<p>Arabic is spoken by over 400 million people. It's the language of poetry, prayer, storytelling, and everyday life. Yet when it comes to text-to-speech, Arabic has been an afterthought — tested in labs, benchmarked on charts, but rarely <em>listened to</em> by the people it's meant to serve.</p>

	<p>We wanted to change that. Not with another paper or another metric — but by putting the microphone in <strong>your</strong> hands. You listen. You choose. Your ear is the benchmark.</p>

	<h3>How the Arena Works</h3>
	<ol>
	<li>You type (or pick) an Arabic sentence</li>
	<li>Two anonymous models read it aloud</li>
	<li>You vote for the one that sounds more natural, more <em>human</em></li>
	<li>Rankings update — and the best voices rise to the top</li>
	</ol>

	<p>No model names are shown until after you vote, so every judgement is pure. Over time, thousands of these small choices build a leaderboard that reflects what people actually prefer — not what a loss function thinks is best.</p>

	<h3>Your Moves</h3>
	<table class="about-table">
	<thead><tr><th>Choice</th><th>What it means</th></tr></thead>
	<tbody>
	<tr><td>A is Better</td><td>Voice A sounded more natural to you</td></tr>
	<tr><td>B is Better</td><td>Voice B sounded more natural to you</td></tr>
	<tr><td>Both Good</td><td>Honestly, both sounded great</td></tr>
	<tr><td>Both Bad</td><td>Neither felt right</td></tr>
	</tbody>
	</table>

	<h3>Quick Keys</h3>
	<table class="about-table">
	<thead><tr><th>Key</th><th>Action</th></tr></thead>
	<tbody>
	<tr><td><kbd>A</kbd></td><td>Vote for A</td></tr>
	<tr><td><kbd>B</kbd></td><td>Vote for B</td></tr>
	<tr><td><kbd>N</kbd></td><td>Next round</td></tr>
	</tbody>
	</table>

	<p style="text-align: center; color: #888; font-size: 0.88em; margin-top: 2em;">
	Built with ❤️ for the Arabic-speaking world by <a href="https://github.com/Navid-Gen-AI" target="_blank">Navid</a>
	</p>

	</div>
	"""


	def decode_audio_to_file(audio_base64: str) -> Optional[str]:
	"""Decode base64 WAV audio and write to a temp file.

	Returns the file path (Gradio gr.Audio accepts file paths).
	"""
	import tempfile

	try:
	wav_bytes = base64.b64decode(audio_base64)
	tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
	tmp.write(wav_bytes)
	tmp.flush()
	tmp.close()
	return tmp.name
	except Exception:
	return None


	def synthesize_audio(text: str, model_id: str) -> dict:
	"""Call the ArenaService to synthesize (or return cached) audio.

	The backend checks its audio cache first and only calls the GPU model
	on a cache miss, saving compute and reducing latency.

	Retries up to MAX_SYNTHESIS_RETRIES times on failure before giving up.
	"""
	last_error = None
	for attempt in range(1, MAX_SYNTHESIS_RETRIES + 1):
	try:
	service = get_arena_service()
	result = service().synthesize_or_cache.remote(text, model_id)
	if result.get("success"):
	return result
	last_error = result.get("error", "Unknown synthesis error")
	print(f"⚠️ {model_id} attempt {attempt} failed: {last_error}")
	except Exception as e:
	last_error = str(e)
	print(f"⚠️ {model_id} attempt {attempt} exception: {last_error}")
	# All retries exhausted
	return {
	"success": False,
	"error": f"{model_id} failed after {MAX_SYNTHESIS_RETRIES} attempts: {last_error}",
	"model_id": model_id,
	}


	def _get_model_ratings() -> dict[str, dict]:
	"""Read per-model elo and ci from the local leaderboard file."""
	try:
	if LEADERBOARD_FILE.exists():
	with open(LEADERBOARD_FILE, "r") as f:
	data = json.load(f)
	return {
	m["model_id"]: {
	"elo": m.get("elo", 1000),
	"ci": m.get("ci", 0),
	"battles": m.get("battles", 0),
	}
	for m in data.get("models", [])
	}
	except Exception:
	pass
	return {}


	def get_random_model_pair() -> tuple[str, str]:
	"""Select two models using adaptive pairing for maximum information gain.

	Combines two signals to score every possible pair:

	1. CI overlap — pairs whose confidence intervals overlap are the
	most uncertain (we don't know which is better), so a vote between
	them is maximally informative. Measured as the fraction of overlap
	relative to the smaller CI. Pairs with no CI data yet get the
	maximum overlap score (1.0) so new models are explored.

	2. Under-sampling — pairs where either model has few battles get
	a boost via inverse-sqrt weighting, same as before.

	The two signals are multiplied together and used as sampling weights
	over all possible pairs, so the selection is stochastic (not greedy)
	and every pair retains a non-zero chance of appearing.
	"""
	import math
	from itertools import combinations

	models = list(_get_available_models().keys())
	if len(models) < 2:
	raise ValueError("Not enough models available for comparison")

	ratings = _get_model_ratings()

	# --- score every candidate pair ---
	pairs: list[tuple[str, str]] = list(combinations(models, 2))
	pair_weights: list[float] = []

	for a, b in pairs:
	ra = ratings.get(a, {})
	rb = ratings.get(b, {})

	elo_a = ra.get("elo", 1000)
	elo_b = rb.get("elo", 1000)
	ci_a = ra.get("ci", 0)
	ci_b = rb.get("ci", 0)
	battles_a = ra.get("battles", 0)
	battles_b = rb.get("battles", 0)

	# -- Signal 1: CI overlap score (0–1) --
	# If either model has no CI yet, treat as maximally uncertain → 1.0
	if ci_a <= 0 or ci_b <= 0:
	overlap_score = 1.0
	else:
	# Interval: [elo - ci, elo + ci]
	lo_a, hi_a = elo_a - ci_a, elo_a + ci_a
	lo_b, hi_b = elo_b - ci_b, elo_b + ci_b
	overlap = max(0.0, min(hi_a, hi_b) - max(lo_a, lo_b))
	span = min(ci_a, ci_b) * 2 # width of the smaller CI
	overlap_score = min(overlap / span, 1.0) if span > 0 else 1.0

	# Ensure a minimum floor so distant pairs still occasionally appear
	overlap_score = max(overlap_score, 0.05)

	# -- Signal 2: under-sampling boost --
	exploration = (
	1.0 / math.sqrt(battles_a + 1)
	+ 1.0 / math.sqrt(battles_b + 1)
	) / 2.0

	pair_weights.append(overlap_score * exploration)

	# --- sample one pair stochastically ---
	(first, second), = random.choices(pairs, weights=pair_weights, k=1)

	# Randomise A/B assignment so there's no positional bias
	if random.random() < 0.5:
	return (first, second)
	return (second, first)


	def get_random_sentence():
	"""Return a random Arabic sample sentence."""
	return random.choice(SAMPLE_SENTENCES)


	def _empty_comparison():
	"""Return values that reset the UI to the pre-synthesis state."""
	return (
	None,
	None, # audio_a, audio_b
	None,
	None, # model_a_id, model_b_id
	None,
	None, # audio_a_base64, audio_b_base64
	None,
	None, # latency_a, latency_b
	gr.update(visible=False), # audio_row
	gr.update(visible=False), # vote_row
	gr.update(visible=False), # result_display
	gr.update(value="🔊 Synthesize", interactive=True), # synth_btn
	gr.update(value="", visible=False), # status_display
	"🔒 Hidden", # model_a_label
	"🔒 Hidden", # model_b_label
	gr.update(visible=False), # next_round_btn
	gr.update(interactive=True), # vote_a_btn
	gr.update(interactive=True), # vote_b_btn
	gr.update(interactive=True), # vote_both_good_btn
	gr.update(interactive=True), # vote_both_bad_btn
	)


	def _pick_replacement(exclude: set[str]) -> str \| None:
	"""Pick a model not in exclude, or None if none left."""
	candidates = [m for m in _get_available_models() if m not in exclude]
	return random.choice(candidates) if candidates else None


	def _synth_one(text: str, model_id: str, used: set[str]) -> tuple[dict \| None, str]:
	"""Try to synthesize with model_id; on failure swap in a replacement once.

	Returns (result_dict_or_None, final_model_id).
	"""
	result = synthesize_audio(text, model_id)
	if result.get("success"):
	return result, model_id

	# First model failed after retries — try a replacement
	replacement = _pick_replacement(used)
	if replacement:
	used.add(replacement)
	result = synthesize_audio(text, replacement)
	if result.get("success"):
	return result, replacement

	return None, model_id # give up


	def generate_comparison(text: str):
	"""Generate audio from two random TTS models for comparison.

	Both models are synthesized in parallel using threads to halve wait time.
	Uses a generator to yield status updates so the user sees progress.
	"""
	from concurrent.futures import ThreadPoolExecutor

	if not text or not text.strip():
	gr.Warning("Please enter some Arabic text first.")
	yield _empty_comparison()
	return

	text = text.strip()
	model_a_id, model_b_id = get_random_model_pair()

	# — Show "synthesizing" status —
	yield (
	None,
	None,
	None,
	None,
	None,
	None,
	None,
	None, # latency_a, latency_b
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(value="⏳ Synthesizing…", interactive=False),
	gr.update(value="⏳ Generating audio from both models…", visible=True),
	"🔒 Hidden",
	"🔒 Hidden",
	gr.update(visible=False),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	)

	# — Synthesize both models in parallel —
	# Each thread gets its own used set for the replacement fallback logic.
	def synth_a():
	used = {model_a_id, model_b_id}
	return _synth_one(text, model_a_id, used)

	def synth_b():
	used = {model_a_id, model_b_id}
	return _synth_one(text, model_b_id, used)

	try:
	with ThreadPoolExecutor(max_workers=2) as pool:
	future_a = pool.submit(synth_a)
	future_b = pool.submit(synth_b)
	result_a, model_a_id = future_a.result()
	result_b, model_b_id = future_b.result()
	except Exception as e:
	gr.Warning(f"Connection error — is the backend deployed? ({e})")
	yield _empty_comparison()
	return

	if result_a is None:
	gr.Warning("Model A synthesis failed after retries. Please try again.")
	yield _empty_comparison()
	return

	if result_b is None:
	gr.Warning("Model B synthesis failed after retries. Please try again.")
	yield _empty_comparison()
	return

	# — Decode audio to temp files for Gradio —
	audio_a_path = decode_audio_to_file(result_a["audio_base64"])
	audio_b_path = decode_audio_to_file(result_b["audio_base64"])
	if not audio_a_path or not audio_b_path:
	gr.Warning("Failed to decode audio from backend.")
	yield _empty_comparison()
	return

	# Extract latency (None for cache hits / legacy responses)
	latency_a = result_a.get("latency_seconds")
	latency_b = result_b.get("latency_seconds")

	yield (
	audio_a_path,
	audio_b_path,
	model_a_id,
	model_b_id,
	result_a["audio_base64"],
	result_b["audio_base64"],
	latency_a,
	latency_b,
	gr.update(visible=True), # audio_row
	gr.update(visible=True), # vote_row
	gr.update(visible=False), # result_display
	gr.update(value="🔊 Synthesize", interactive=True),
	gr.update(value="", visible=False), # hide status
	"🔒 Hidden", # model_a_label
	"🔒 Hidden", # model_b_label
	gr.update(visible=False), # next_round_btn
	gr.update(interactive=True), # vote_a_btn
	gr.update(interactive=True), # vote_b_btn
	gr.update(interactive=True), # vote_both_good_btn
	gr.update(interactive=True), # vote_both_bad_btn
	)


	def submit_vote(
	vote: str,
	text_prompt: str,
	model_a_id: str,
	model_b_id: str,
	audio_a_b64: str,
	audio_b_b64: str,
	latency_a: float \| None,
	latency_b: float \| None,
	):
	"""Submit a vote for the comparison."""
	if not model_a_id or not model_b_id:
	gr.Warning("Please synthesize audio first.")
	return (
	gr.update(visible=True), # vote_row stays
	gr.update(visible=False), # result_display
	gr.update(visible=False), # next_round_btn
	"🔒 Hidden",
	"🔒 Hidden",
	gr.update(), # vote_a_btn unchanged
	gr.update(), # vote_b_btn unchanged
	gr.update(), # vote_both_good_btn unchanged
	gr.update(), # vote_both_bad_btn unchanged
	)

	session_id = uuid.uuid4().hex

	try:
	service = get_arena_service()
	result = service().record_vote.remote(
	session_id=session_id,
	text=text_prompt,
	model_a=model_a_id,
	model_b=model_b_id,
	winner=vote,
	audio_a_base64=audio_a_b64,
	audio_b_base64=audio_b_b64,
	latency_a=latency_a,
	latency_b=latency_b,
	)
	except Exception as e:
	gr.Warning(f"Vote failed: {e}")
	return (
	gr.update(visible=True),
	gr.update(visible=False),
	gr.update(visible=False),
	"🔒 Hidden",
	"🔒 Hidden",
	gr.update(), # vote_a_btn unchanged
	gr.update(), # vote_b_btn unchanged
	gr.update(), # vote_both_good_btn unchanged
	gr.update(), # vote_both_bad_btn unchanged
	)

	if not result.get("success"):
	gr.Warning(f"Error: {result.get('error', 'Unknown')}")
	return (
	gr.update(visible=True),
	gr.update(visible=False),
	gr.update(visible=False),
	"🔒 Hidden",
	"🔒 Hidden",
	gr.update(), # vote_a_btn unchanged
	gr.update(), # vote_b_btn unchanged
	gr.update(), # vote_both_good_btn unchanged
	gr.update(), # vote_both_bad_btn unchanged
	)

	vote_emoji = {
	"model_a": "🅰️ Model A",
	"model_b": "🅱️ Model B",
	"both_good": "👍 Both Good",
	"both_bad": "👎 Both Bad",
	}

	name_a = get_display_name(model_a_id)
	name_b = get_display_name(model_b_id)

	result_md = f"""
	<div style="text-align:center; padding: 1.2em 1em; border-radius: 12px;
	background: var(--block-background-fill); border: 1px solid var(--border-color-primary);">
	<div style="font-size: 1.6em; margin-bottom: 0.3em;">✅ Vote Recorded!</div>
	<div style="font-size: 1.05em; margin-bottom: 0.8em;">
	You chose: <strong>{vote_emoji.get(vote, vote)}</strong>
	</div>
	<div style="display: flex; justify-content: center; gap: 2em; font-size: 1em;">
	<div>🅰️ <strong>{name_a}</strong></div>
	<div style="color: #aaa;">vs</div>
	<div>🅱️ <strong>{name_b}</strong></div>
	</div>
	<div style="margin-top: 0.8em; color: #888; font-size: 0.85em;">
	Thanks for voting! The leaderboard updates daily.
	</div>
	</div>
	"""
	return (
	gr.update(visible=False), # hide vote_row
	gr.update(value=result_md, visible=True), # show result
	gr.update(visible=True), # show next_round_btn
	f"{name_a}", # reveal model A
	f"{name_b}", # reveal model B
	gr.update(interactive=False), # disable vote_a_btn
	gr.update(interactive=False), # disable vote_b_btn
	gr.update(interactive=False), # disable vote_both_good_btn
	gr.update(interactive=False), # disable vote_both_bad_btn
	)


	def refresh_leaderboard():
	"""Read and display leaderboard from local JSON file."""
	try:
	if not LEADERBOARD_FILE.exists():
	return _empty_leaderboard_md()

	with open(LEADERBOARD_FILE, "r") as f:
	data = json.load(f)

	models = data.get("models", [])
	last_updated = data.get("last_updated", "")

	if not models:
	return _empty_leaderboard_md()

	# Hide models with fewer than 30 battles
	models = [m for m in models if m.get("battles", 0) >= MIN_BATTLES]

	if not models:
	return _empty_leaderboard_md()

	# Re-assign ranks after filtering
	for i, m in enumerate(models, start=1):
	m["rank"] = i

	# Format timestamp
	try:
	from datetime import datetime

	dt = datetime.fromisoformat(last_updated.replace("Z", "+00:00"))
	updated_str = dt.strftime("%b %d, %Y")
	except Exception:
	updated_str = last_updated or "—"

	# --- Styles ---
	style_block = """
	<style>
	.lb-container { max-width: 660px; margin: 0 auto; }
	.lb-meta {
	display: flex; justify-content: center; align-items: center;
	gap: 0.6em;
	font-size: 0.85em;
	color: var(--body-text-color-subdued, #999);
	padding: 0 0 0.8em 0;
	}
	.lb-meta strong {
	color: var(--body-text-color);
	font-weight: 700;
	}
	.lb-meta-sep {
	color: var(--body-text-color-subdued, #666);
	opacity: 0.5;
	}
	.lb-list { display: flex; flex-direction: column; gap: 0; }

	/* Each model row */
	.lb-item {
	display: grid;
	grid-template-columns: 48px 1fr 120px 90px 72px;
	align-items: center;
	gap: 0 12px;
	padding: 14px 20px;
	border-bottom: 1px solid var(--border-color-primary);
	}
	.lb-item:first-child { border-top: 1px solid var(--border-color-primary); }
	.lb-item:hover { background: rgba(255,255,255,0.03); }

	/* Top-3 subtle left accent */
	.lb-item.gold { background: rgba(255,195,0,0.04); }
	.lb-item.silver { background: rgba(180,180,195,0.04); }
	.lb-item.bronze { background: rgba(210,140,70,0.03); }
	.lb-item.gold:hover { background: rgba(255,195,0,0.08); }
	.lb-item.silver:hover { background: rgba(180,180,195,0.08); }
	.lb-item.bronze:hover { background: rgba(210,140,70,0.07); }

	.lb-rank {
	font-size: 1.1em; font-weight: 600;
	text-align: center;
	color: var(--body-text-color);
	}
	.lb-rank.r-gold { color: #E8C33A; }
	.lb-rank.r-silver { color: #C0C0C8; }
	.lb-rank.r-bronze { color: #D4944A; }

	.lb-name {
	font-weight: 600; font-size: 1.05em;
	color: var(--body-text-color) !important;
	text-decoration: none !important;
	white-space: nowrap; overflow: hidden; text-overflow: ellipsis;
	cursor: default;
	}
	a.lb-name { cursor: pointer; }
	a.lb-name::after {
	content: '';
	display: inline-block;
	width: 0.95em; height: 0.95em;
	margin-left: 5px;
	vertical-align: middle;
	opacity: 0;
	transition: opacity 0.15s;
	background: currentColor;
	-webkit-mask: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='currentColor' stroke-width='2.5' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpath d='M7 17L17 7'/%3E%3Cpath d='M7 7h10v10'/%3E%3C/svg%3E") no-repeat center/contain;
	mask: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='currentColor' stroke-width='2.5' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpath d='M7 17L17 7'/%3E%3Cpath d='M7 7h10v10'/%3E%3C/svg%3E") no-repeat center/contain;
	}
	a.lb-name:hover { color: #10b981 !important; text-decoration: none !important; }
	a.lb-name:hover::after { opacity: 1; }

	.lb-score {
	text-align: center;
	font-weight: 700; font-size: 1.05em;
	font-variant-numeric: tabular-nums;
	color: var(--body-text-color);
	}
	.lb-votes {
	text-align: center;
	font-size: 0.92em;
	font-variant-numeric: tabular-nums;
	color: var(--body-text-color);
	}
	.lb-latency {
	text-align: center;
	font-size: 0.92em;
	font-variant-numeric: tabular-nums;
	color: var(--body-text-color);
	position: relative;
	cursor: default;
	overflow: hidden;
	}
	.lb-latency[data-gpu] {
	cursor: pointer;
	}

	/* The latency value and GPU label live in spans inside the cell */
	.lb-latency .lb-lat-val {
	display: block;
	transition: transform 0.25s ease, opacity 0.25s ease;
	}
	.lb-latency .lb-lat-gpu {
	display: block;
	position: absolute;
	inset: 0;
	display: flex;
	align-items: center;
	justify-content: center;
	font-size: 0.85em;
	font-weight: 600;
	color: #10b981;
	letter-spacing: 0.01em;
	transform: translateY(100%);
	opacity: 0;
	transition: transform 0.25s ease, opacity 0.25s ease;
	}
	.lb-latency[data-gpu]:hover .lb-lat-val {
	transform: translateY(-100%);
	opacity: 0;
	}
	.lb-latency[data-gpu]:hover .lb-lat-gpu {
	transform: translateY(0);
	opacity: 1;
	}
	.lb-ci {
	font-size: 0.78em;
	font-weight: 400;
	color: var(--body-text-color-subdued, #888);
	margin-left: 3px;
	font-variant-numeric: tabular-nums;
	}

	/* Column labels */
	.lb-colheader {
	display: grid;
	grid-template-columns: 48px 1fr 120px 90px 72px;
	align-items: center;
	gap: 0 12px;
	padding: 6px 20px 8px 20px;
	font-size: 0.75em;
	font-weight: 600;
	text-transform: uppercase;
	letter-spacing: 0.05em;
	color: var(--body-text-color-subdued, #999);
	}
	.lb-colheader span:nth-child(3),
	.lb-colheader span:nth-child(4),
	.lb-colheader span:nth-child(5) { text-align: center; }

	@media (max-width: 600px) {
	.lb-container { margin: 0 4px; }
	.lb-colheader {
	grid-template-columns: 36px 1fr 72px;
	padding: 6px 10px 8px 10px;
	font-size: 0.7em;
	}
	.lb-colheader span:nth-child(4),
	.lb-colheader span:nth-child(5) { display: none; }
	.lb-item {
	grid-template-columns: 36px 1fr 72px;
	gap: 0 6px;
	padding: 10px 10px;
	}
	.lb-rank { font-size: 0.95em; }
	.lb-name { font-size: 0.92em; }
	.lb-score { font-size: 0.92em; }
	.lb-ci { font-size: 0.7em; }
	.lb-votes { display: none; }
	.lb-latency { display: none; }
	}
	@media (max-width: 380px) {
	.lb-colheader {
	grid-template-columns: 30px 1fr 64px;
	padding: 5px 6px 7px 6px;
	}
	.lb-item {
	grid-template-columns: 30px 1fr 64px;
	gap: 0 4px;
	padding: 9px 6px;
	}
	.lb-rank { font-size: 0.88em; }
	.lb-name { font-size: 0.85em; }
	.lb-score { font-size: 0.85em; }
	.lb-ci { display: none; }
	}
	</style>
	"""

	# Metadata line
	total_battles = sum(m.get("battles", 0) for m in models)
	meta_html = (
	f'<div class="lb-meta">'
	f'<span>⚔️ <strong>{total_battles:,}</strong> battles</span>'
	f'<span class="lb-meta-sep">·</span>'
	f'<span>Updated <strong>{updated_str}</strong></span>'
	f'</div>'
	)

	# Column labels
	col_header = (
	'<div class="lb-colheader">'
	"<span>Rank</span>"
	"<span>Model</span>"
	"<span>Score</span>"
	"<span>Latency</span>"
	"<span>Battles</span>"
	"</div>"
	)

	# Build rows
	tier_row = {1: "gold", 2: "silver", 3: "bronze"}
	tier_rank = {1: "r-gold", 2: "r-silver", 3: "r-bronze"}

	items_html = ""
	for entry in models:
	rank = entry["rank"]
	name = entry["name"]
	model_url = entry.get("model_url", "")
	elo = entry["elo"]
	ci = entry.get("ci", 0)
	battles = entry.get("battles", 0)
	avg_latency = entry.get("avg_latency")
	gpu = entry.get("gpu", "")

	row_cls = tier_row.get(rank, "")
	rank_cls = tier_rank.get(rank, "")

	is_api = not gpu

	if model_url:
	name_el = (
	f'<a class="lb-name" href="{model_url}" target="_blank">{name}</a>'
	)
	else:
	name_el = f'<span class="lb-name">{name}</span>'

	votes_text = f"{battles:,}" if battles else "—"
	ci_html = f'<span class="lb-ci">±{ci:.0f}</span>' if ci else ""
	latency_text = f"{avg_latency:.1f}s" if avg_latency is not None else "—"
	if gpu:
	gpu_attr = f' data-gpu="{gpu}"'
	gpu_label = f'<span class="lb-lat-gpu">⚡ {gpu}</span>'
	elif is_api:
	gpu_attr = ' data-gpu="API"'
	gpu_label = '<span class="lb-lat-gpu">☁️ API</span>'
	else:
	gpu_attr = ""
	gpu_label = ""

	items_html += (
	f'<div class="lb-item {row_cls}">'
	f'<div class="lb-rank {rank_cls}">{rank}</div>'
	f"<div>{name_el}</div>"
	f'<div class="lb-score">{elo:.0f}{ci_html}</div>'
	f'<div class="lb-latency"{gpu_attr}><span class="lb-lat-val">{latency_text}</span>{gpu_label}</div>'
	f'<div class="lb-votes">{votes_text}</div>'
	f"</div>"
	)

	return (
	f'<div class="lb-container">'
	f"{style_block}{meta_html}{col_header}"
	f'<div class="lb-list">{items_html}</div>'
	f"</div>"
	)

	except Exception as e:
	return f"❌ Error loading leaderboard: {e}"


	def _empty_leaderboard_md() -> str:
	return (
	'<div style="text-align:center; padding:3em 1em; color:var(--body-text-color-subdued,#888);">'
	"<h3>No data yet!</h3>"
	"<p>Be the first to vote — head to the <strong>⚔️ Battle</strong> tab.</p>"
	"</div>"
	)


	def clear_for_next_round():
	"""Reset the UI for a new comparison."""
	return (
	None,
	None, # audio_a, audio_b
	gr.update(visible=False), # audio_row
	gr.update(visible=False), # vote_row
	gr.update(visible=False), # result_display
	gr.update(visible=False), # next_round_btn
	"🔒 Hidden",
	"🔒 Hidden", # model labels
	get_random_sentence(), # new random sentence
	gr.update(interactive=True), # re-enable vote_a_btn
	gr.update(interactive=True), # re-enable vote_b_btn
	gr.update(interactive=True), # re-enable vote_both_good_btn
	gr.update(interactive=True), # re-enable vote_both_bad_btn
	)


	CUSTOM_CSS = """
	/* Hide Gradio footer */
	footer { display: none !important; }

	/* RTL text input */
	.text-input textarea {
	font-size: 1.15em !important;
	direction: rtl;
	line-height: 1.6;
	}

	/* Center helpers */
	.center-text { text-align: center !important; }

	/* Model column labels */
	.model-label {
	text-align: center;
	font-weight: 600;
	font-size: 1em;
	padding: 0.4em 0 0.1em 0;
	min-height: 28px;
	letter-spacing: 0.02em;
	}

	/* Column header badges (A / B) */
	.column-header {
	text-align: center;
	font-size: 1.15em;
	font-weight: 700;
	padding: 0.3em 0;
	margin-bottom: 0.15em;
	}

	/* Audio players */
	.audio-player { min-height: 70px; }

	/* Voting buttons — consistent sizing */
	.vote-btn {
	min-height: 48px !important;
	font-size: 1em !important;
	font-weight: 600 !important;
	}
	.tie-btn {
	min-height: 44px !important;
	font-weight: 500 !important;
	}

	/* Next round button */
	.next-btn {
	min-height: 48px !important;
	font-size: 1.05em !important;
	font-weight: 600 !important;
	margin-top: 0.5em !important;
	}

	/* Synth button */
	.synth-btn {
	min-height: 48px !important;
	font-size: 1.05em !important;
	}

	/* Status message during synthesis */
	.status-msg {
	text-align: center;
	font-size: 1.05em;
	color: #888;
	padding: 0.6em 0;
	animation: pulse 1.5s ease-in-out infinite;
	}
	@keyframes pulse {
	0%, 100% { opacity: 1; }
	50% { opacity: 0.5; }
	}

	/* Leaderboard tab spacing */
	.leaderboard-wrap { max-width: 680px; margin: 0 auto; }

	/* Center the tab buttons */
	.tabs > .tab-nav,
	div[role="tablist"],
	.tab-nav {
	justify-content: center !important;
	}
	"""

	SHORTCUT_JS = """
	<script>
	document.addEventListener('keypress', function(e) {
	// Don't fire when user is typing in an input
	const tag = e.target.tagName.toLowerCase();
	if (tag === 'input' \|\| tag === 'textarea') return;

	switch (e.key.toLowerCase()) {
	case 'a': document.getElementById('vote-a-btn')?.click(); break;
	case 'b': document.getElementById('vote-b-btn')?.click(); break;
	case 'n': document.getElementById('next-round-btn')?.click(); break;
	}
	}, false);
	</script>
	"""


	def create_demo():
	"""Create the Gradio interface."""

	with gr.Blocks(
	title="Arabic TTS Arena",
	theme=gr.themes.Soft(
	primary_hue="emerald",
	secondary_hue="slate",
	neutral_hue="slate",
	),
	css=CUSTOM_CSS,
	head=SHORTCUT_JS,
	) as demo:
	# Header
	gr.HTML(HEADER_MD)

	with gr.Tabs():
	# Voting Tab
	with gr.TabItem("⚔️ Battle", id="battle"):
	gr.HTML(HOW_IT_WORKS_MD)

	# Hidden state
	model_a_id = gr.State(value=None)
	model_b_id = gr.State(value=None)
	audio_a_base64 = gr.State(value=None)
	audio_b_base64 = gr.State(value=None)
	latency_a_state = gr.State(value=None)
	latency_b_state = gr.State(value=None)
	current_text = gr.State(value="")

	# — Text input —
	with gr.Group():
	with gr.Row():
	text_input = gr.Textbox(
	container=False,
	show_label=False,
	placeholder="اكتب نصاً عربياً هنا...",
	lines=1,
	max_lines=3,
	scale=20,
	elem_classes=["text-input"],
	)
	random_btn = gr.Button(
	"🎲",
	scale=0,
	min_width=50,
	variant="secondary",
	)
	synth_btn = gr.Button(
	"🔊 Synthesize",
	variant="primary",
	size="lg",
	elem_classes=["synth-btn"],
	)

	# — Status indicator (shown during synthesis) —
	status_display = gr.HTML(
	value="", visible=False, elem_classes=["status-msg"]
	)

	# — Audio players (hidden until synthesis) —
	with gr.Row(visible=False, equal_height=True) as audio_row:
	with gr.Column():
	gr.Markdown(
	"### 🅰️ Model A",
	elem_classes=["column-header", "center-text"],
	)
	audio_a = gr.Audio(
	show_label=False,
	interactive=False,
	elem_classes=["audio-player"],
	)
	model_a_label = gr.Markdown(
	"🔒 Hidden",
	elem_classes=["model-label", "center-text"],
	)
	vote_a_btn = gr.Button(
	"👆 A is Better",
	variant="primary",
	elem_id="vote-a-btn",
	elem_classes=["vote-btn"],
	)

	with gr.Column():
	gr.Markdown(
	"### 🅱️ Model B",
	elem_classes=["column-header", "center-text"],
	)
	audio_b = gr.Audio(
	show_label=False,
	interactive=False,
	elem_classes=["audio-player"],
	)
	model_b_label = gr.Markdown(
	"🔒 Hidden",
	elem_classes=["model-label", "center-text"],
	)
	vote_b_btn = gr.Button(
	"👆 B is Better",
	variant="primary",
	elem_id="vote-b-btn",
	elem_classes=["vote-btn"],
	)

	# — Tie buttons —
	with gr.Row(visible=False) as vote_row:
	vote_both_good_btn = gr.Button(
	"👍 Both Good",
	variant="secondary",
	elem_classes=["tie-btn"],
	)
	vote_both_bad_btn = gr.Button(
	"👎 Both Bad",
	variant="secondary",
	elem_classes=["tie-btn"],
	)

	# — Result card + next round —
	result_display = gr.HTML(visible=False)
	next_round_btn = gr.Button(
	"⚡ Next Round (N)",
	visible=False,
	variant="primary",
	elem_id="next-round-btn",
	elem_classes=["next-btn"],
	)

	random_btn.click(fn=get_random_sentence, outputs=[text_input])

	synth_btn.click(
	fn=generate_comparison,
	inputs=[text_input],
	outputs=[
	audio_a,
	audio_b,
	model_a_id,
	model_b_id,
	audio_a_base64,
	audio_b_base64,
	latency_a_state,
	latency_b_state,
	audio_row,
	vote_row,
	result_display,
	synth_btn,
	status_display,
	model_a_label,
	model_b_label,
	next_round_btn,
	vote_a_btn,
	vote_b_btn,
	vote_both_good_btn,
	vote_both_bad_btn,
	],
	).then(
	fn=lambda t: t,
	inputs=[text_input],
	outputs=[current_text],
	)

	# Vote handlers (all four buttons share the same signature)
	def make_vote_handler(vote_type: str):
	def handler(text, m_a, m_b, a_b64, b_b64, lat_a, lat_b):
	return submit_vote(vote_type, text, m_a, m_b, a_b64, b_b64, lat_a, lat_b)

	return handler

	vote_outputs = [
	vote_row,
	result_display,
	next_round_btn,
	model_a_label,
	model_b_label,
	vote_a_btn,
	vote_b_btn,
	vote_both_good_btn,
	vote_both_bad_btn,
	]
	vote_inputs = [
	current_text,
	model_a_id,
	model_b_id,
	audio_a_base64,
	audio_b_base64,
	latency_a_state,
	latency_b_state,
	]

	for btn, vtype in [
	(vote_a_btn, "model_a"),
	(vote_b_btn, "model_b"),
	(vote_both_good_btn, "both_good"),
	(vote_both_bad_btn, "both_bad"),
	]:
	btn.click(
	fn=make_vote_handler(vtype),
	inputs=vote_inputs,
	outputs=vote_outputs,
	)

	next_round_btn.click(
	fn=clear_for_next_round,
	outputs=[
	audio_a,
	audio_b,
	audio_row,
	vote_row,
	result_display,
	next_round_btn,
	model_a_label,
	model_b_label,
	text_input,
	vote_a_btn,
	vote_b_btn,
	vote_both_good_btn,
	vote_both_bad_btn,
	],
	)

	# Leaderboard Tab
	with gr.TabItem("🏆 Leaderboard", id="leaderboard"):
	with gr.Column(elem_classes=["leaderboard-wrap"]):
	leaderboard_display = gr.HTML(
	"<p style='text-align:center; color:var(--body-text-color-subdued,#888);'>Loading…</p>"
	)
	demo.load(fn=refresh_leaderboard, outputs=[leaderboard_display])

	# About Tab
	with gr.TabItem("📖 Story", id="story"):
	gr.HTML(ABOUT_MD)

	return demo


	if __name__ == "__main__":
	demo = create_demo()
	demo.queue(default_concurrency_limit=4).launch(server_name="0.0.0.0", server_port=7860)