Spaces:
Running
Running
| import gradio as gr | |
| import modal | |
| import base64 | |
| import random | |
| import json | |
| import uuid | |
| from pathlib import Path | |
| from typing import Optional | |
| from examples import SAMPLE_SENTENCES | |
| APP_NAME = "arabic-tts-arena" | |
| LEADERBOARD_FILE = Path(__file__).parent / "leaderboard.json" | |
| MAX_SYNTHESIS_RETRIES = 2 # per-model retry cap before giving up | |
| MIN_BATTLES = 45 # minimum battles for a model to appear on the leaderboard (to avoid unjust rankings for new models with few votes) | |
| _AVAILABLE_MODELS_CACHE: dict[str, dict[str, str]] | None = None | |
| def _fetch_model_registry() -> dict[str, dict[str, str]]: | |
| """Fetch the model registry from the Modal backend. | |
| Returns dict like: | |
| {"chatterbox": {"class_name": "ChatterboxModel", "display_name": "Chatterbox"}, ...} | |
| """ | |
| service = modal.Cls.from_name(APP_NAME, "ArenaService") | |
| registry = service().get_model_registry.remote() | |
| if registry: | |
| print(f"✅ Fetched {len(registry)} models from Modal backend") | |
| return registry | |
| raise RuntimeError("Failed to fetch model registry from Modal backend") | |
| def _get_available_models() -> dict[str, dict[str, str]]: | |
| """Lazy-load the model registry on first use (avoids crashing at import time).""" | |
| global _AVAILABLE_MODELS_CACHE | |
| if _AVAILABLE_MODELS_CACHE is None: | |
| print("⏳ Fetching model registry from Modal backend...") | |
| _AVAILABLE_MODELS_CACHE = _fetch_model_registry() | |
| print(f"✅ Available models: {', '.join(_AVAILABLE_MODELS_CACHE.keys())}") | |
| return _AVAILABLE_MODELS_CACHE | |
| def get_model_cls(model_id: str): | |
| """Get a Modal class by model_id using the registered class name.""" | |
| available = _get_available_models() | |
| if model_id not in available: | |
| raise ValueError(f"Model not available: {model_id}") | |
| class_name = available[model_id]["class_name"] | |
| return modal.Cls.from_name(APP_NAME, class_name) | |
| def get_display_name(model_id: str) -> str: | |
| """Get the human-readable display name for a model.""" | |
| available = _get_available_models() | |
| if model_id in available: | |
| return available[model_id].get("display_name", model_id) | |
| return model_id | |
| def get_arena_service(): | |
| """Get ArenaService class for voting operations.""" | |
| return modal.Cls.from_name(APP_NAME, "ArenaService") | |
| HEADER_MD = """ | |
| <div style="text-align: center; max-width: 700px; margin: 0 auto;"> | |
| <h1 style="font-size: 2.2em; margin-bottom: 0.2em;"> Arabic TTS Arena</h1> | |
| <p style="font-size: 1.1em; color: #666; margin-top: 0;"> | |
| Compare Arabic text‑to‑speech models side by side.<br> | |
| Listen, vote, and help build the community leaderboard. | |
| </p> | |
| <p style="font-size: 0.85em; margin-top: 0.3em;"> | |
| <a href="https://huggingface.co/blog/Navid-AI/introducing-arabic-tts-arena" target="_blank" style="color: #10b981; text-decoration: none;">Blog post</a> | |
| · | |
| <a href="https://github.com/Navid-Gen-AI/arabic-tts-arena" target="_blank" style="color: #10b981; text-decoration: none;">GitHub</a> | |
| </p> | |
| </div> | |
| """ | |
| HOW_IT_WORKS_MD = """ | |
| <div style="text-align: center; color: #888; font-size: 0.9em; margin-bottom: 0.5em;"> | |
| <strong>How it works:</strong> | |
| Enter Arabic text → Listen to two anonymous models → Vote for the better one | |
| </div> | |
| """ | |
| # Leaderboard header removed — metadata is now rendered inline by refresh_leaderboard() | |
| ABOUT_MD = """ | |
| <style> | |
| .about-wrap { | |
| max-width: 680px; margin: 0 auto; padding: 0.5em 0 2em 0; | |
| font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; | |
| line-height: 1.7; color: var(--body-text-color); | |
| } | |
| .about-wrap h2 { | |
| font-size: 1.6em; font-weight: 700; margin: 0 0 0.3em 0; | |
| letter-spacing: -0.01em; | |
| } | |
| .about-wrap h3 { | |
| font-size: 1.15em; font-weight: 700; margin: 1.6em 0 0.5em 0; | |
| letter-spacing: -0.01em; | |
| } | |
| .about-wrap p, .about-wrap li { | |
| font-size: 0.95em; color: #ccc; | |
| } | |
| .about-wrap ol { padding-left: 1.4em; } | |
| .about-wrap ol li { margin-bottom: 0.35em; } | |
| .about-wrap a { | |
| color: #10b981; text-decoration: none; | |
| } | |
| .about-wrap a:hover { text-decoration: underline; } | |
| .about-wrap strong { color: var(--body-text-color); } | |
| /* News / Updates section */ | |
| .news-section { | |
| border: 1px solid var(--border-color-primary); | |
| border-radius: 10px; | |
| padding: 1em 1.3em; | |
| margin-bottom: 1.8em; | |
| background: rgba(16,185,129,0.04); | |
| } | |
| .news-section h3 { | |
| margin: 0 0 0.6em 0 !important; font-size: 1.05em; | |
| } | |
| .news-item { | |
| display: flex; gap: 0.8em; align-items: baseline; | |
| margin-bottom: 0.4em; font-size: 0.9em; | |
| } | |
| .news-date { | |
| flex-shrink: 0; font-size: 0.82em; font-weight: 600; | |
| color: #10b981; white-space: nowrap; | |
| font-variant-numeric: tabular-nums; | |
| } | |
| .news-text { color: #ccc; } | |
| /* Shortcuts / Voting tables */ | |
| .about-table { | |
| width: 100%; border-collapse: collapse; margin: 0.5em 0 0.8em 0; | |
| font-size: 0.9em; | |
| } | |
| .about-table th { | |
| text-align: left; padding: 0.5em 0.8em; | |
| border-bottom: 1px solid var(--border-color-primary); | |
| font-weight: 600; font-size: 0.85em; | |
| text-transform: uppercase; letter-spacing: 0.04em; | |
| color: var(--body-text-color-subdued, #888); | |
| } | |
| .about-table td { | |
| padding: 0.5em 0.8em; | |
| border-bottom: 1px solid rgba(255,255,255,0.05); | |
| color: #ccc; | |
| } | |
| .about-table td:first-child { font-weight: 600; color: var(--body-text-color); } | |
| .about-table kbd { | |
| display: inline-block; padding: 0.15em 0.5em; | |
| border-radius: 4px; font-size: 0.9em; font-family: monospace; | |
| background: rgba(255,255,255,0.08); | |
| border: 1px solid rgba(255,255,255,0.12); | |
| color: var(--body-text-color); | |
| } | |
| /* Contribute card */ | |
| .contribute-card { | |
| border: 1px solid var(--border-color-primary); | |
| border-radius: 10px; | |
| padding: 1.2em 1.4em; | |
| margin-top: 0.5em; | |
| background: rgba(255,255,255,0.02); | |
| } | |
| .contribute-card p { margin-bottom: 0.6em; } | |
| .contribute-card code { | |
| padding: 0.15em 0.45em; border-radius: 4px; | |
| font-size: 0.88em; | |
| background: rgba(255,255,255,0.08); | |
| color: #10b981; | |
| } | |
| .contribute-steps { display: flex; flex-direction: column; gap: 10px; } | |
| .contribute-step { | |
| display: flex; align-items: center; gap: 12px; | |
| font-size: 0.93em; color: #ccc; | |
| } | |
| .step-num { | |
| flex-shrink: 0; | |
| width: 28px; height: 28px; | |
| display: flex; align-items: center; justify-content: center; | |
| border-radius: 50%; | |
| font-weight: 700; font-size: 0.85em; | |
| background: rgba(16,185,129,0.15); | |
| color: #10b981; | |
| } | |
| </style> | |
| <div class="about-wrap"> | |
| <div class="news-section"> | |
| <h3>📢 Latest Updates</h3> | |
| <div class="news-item"><span class="news-date">Mar 17, 2026</span><span class="news-text">✨ Added Latency to Leaderboard based on feedback from <a href="https://www.linkedin.com/in/hazem-abdelazim-95153b72/" target="_blank">Dr. Hazem Abdelazim</a></span></div> | |
| <div class="news-item"><span class="news-date">Mar 12, 2026</span><span class="news-text">🎉 Arena launched with 12 Arabic TTS models — <a href="https://huggingface.co/blog/Navid-AI/introducing-arabic-tts-arena" target="_blank">read the blog post</a></span></div> | |
| <div class="news-item"><span class="news-date" style="opacity:0;">—</span><span class="news-text">🤝 Have a model that should be here? <a href="https://github.com/Navid-Gen-AI/arabic-tts-arena" target="_blank">Open a PR</a> — we'd love to welcome it in.</span></div> | |
| </div> | |
| <h2>Why We Built This</h2> | |
| <p>Arabic is spoken by over 400 million people. It's the language of poetry, prayer, storytelling, and everyday life. Yet when it comes to text-to-speech, Arabic has been an afterthought — tested in labs, benchmarked on charts, but rarely <em>listened to</em> by the people it's meant to serve.</p> | |
| <p>We wanted to change that. Not with another paper or another metric — but by putting the microphone in <strong>your</strong> hands. You listen. You choose. Your ear is the benchmark.</p> | |
| <h3>How the Arena Works</h3> | |
| <ol> | |
| <li>You type (or pick) an Arabic sentence</li> | |
| <li>Two anonymous models read it aloud</li> | |
| <li>You vote for the one that sounds more natural, more <em>human</em></li> | |
| <li>Rankings update — and the best voices rise to the top</li> | |
| </ol> | |
| <p>No model names are shown until after you vote, so every judgement is pure. Over time, thousands of these small choices build a leaderboard that reflects what people actually prefer — not what a loss function thinks is best.</p> | |
| <h3>Your Moves</h3> | |
| <table class="about-table"> | |
| <thead><tr><th>Choice</th><th>What it means</th></tr></thead> | |
| <tbody> | |
| <tr><td>A is Better</td><td>Voice A sounded more natural to you</td></tr> | |
| <tr><td>B is Better</td><td>Voice B sounded more natural to you</td></tr> | |
| <tr><td>Both Good</td><td>Honestly, both sounded great</td></tr> | |
| <tr><td>Both Bad</td><td>Neither felt right</td></tr> | |
| </tbody> | |
| </table> | |
| <h3>Quick Keys</h3> | |
| <table class="about-table"> | |
| <thead><tr><th>Key</th><th>Action</th></tr></thead> | |
| <tbody> | |
| <tr><td><kbd>A</kbd></td><td>Vote for A</td></tr> | |
| <tr><td><kbd>B</kbd></td><td>Vote for B</td></tr> | |
| <tr><td><kbd>N</kbd></td><td>Next round</td></tr> | |
| </tbody> | |
| </table> | |
| <p style="text-align: center; color: #888; font-size: 0.88em; margin-top: 2em;"> | |
| Built with ❤️ for the Arabic-speaking world by <a href="https://github.com/Navid-Gen-AI" target="_blank">Navid</a> | |
| </p> | |
| </div> | |
| """ | |
| def decode_audio_to_file(audio_base64: str) -> Optional[str]: | |
| """Decode base64 WAV audio and write to a temp file. | |
| Returns the file path (Gradio gr.Audio accepts file paths). | |
| """ | |
| import tempfile | |
| try: | |
| wav_bytes = base64.b64decode(audio_base64) | |
| tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
| tmp.write(wav_bytes) | |
| tmp.flush() | |
| tmp.close() | |
| return tmp.name | |
| except Exception: | |
| return None | |
| def synthesize_audio(text: str, model_id: str) -> dict: | |
| """Call the ArenaService to synthesize (or return cached) audio. | |
| The backend checks its audio cache first and only calls the GPU model | |
| on a cache miss, saving compute and reducing latency. | |
| Retries up to MAX_SYNTHESIS_RETRIES times on failure before giving up. | |
| """ | |
| last_error = None | |
| for attempt in range(1, MAX_SYNTHESIS_RETRIES + 1): | |
| try: | |
| service = get_arena_service() | |
| result = service().synthesize_or_cache.remote(text, model_id) | |
| if result.get("success"): | |
| return result | |
| last_error = result.get("error", "Unknown synthesis error") | |
| print(f"⚠️ {model_id} attempt {attempt} failed: {last_error}") | |
| except Exception as e: | |
| last_error = str(e) | |
| print(f"⚠️ {model_id} attempt {attempt} exception: {last_error}") | |
| # All retries exhausted | |
| return { | |
| "success": False, | |
| "error": f"{model_id} failed after {MAX_SYNTHESIS_RETRIES} attempts: {last_error}", | |
| "model_id": model_id, | |
| } | |
| def _get_model_ratings() -> dict[str, dict]: | |
| """Read per-model elo and ci from the local leaderboard file.""" | |
| try: | |
| if LEADERBOARD_FILE.exists(): | |
| with open(LEADERBOARD_FILE, "r") as f: | |
| data = json.load(f) | |
| return { | |
| m["model_id"]: { | |
| "elo": m.get("elo", 1000), | |
| "ci": m.get("ci", 0), | |
| "battles": m.get("battles", 0), | |
| } | |
| for m in data.get("models", []) | |
| } | |
| except Exception: | |
| pass | |
| return {} | |
| def get_random_model_pair() -> tuple[str, str]: | |
| """Select two models using adaptive pairing for maximum information gain. | |
| Combines two signals to score every possible pair: | |
| 1. **CI overlap** — pairs whose confidence intervals overlap are the | |
| most uncertain (we don't know which is better), so a vote between | |
| them is maximally informative. Measured as the fraction of overlap | |
| relative to the smaller CI. Pairs with no CI data yet get the | |
| maximum overlap score (1.0) so new models are explored. | |
| 2. **Under-sampling** — pairs where either model has few battles get | |
| a boost via inverse-sqrt weighting, same as before. | |
| The two signals are multiplied together and used as sampling weights | |
| over all possible pairs, so the selection is stochastic (not greedy) | |
| and every pair retains a non-zero chance of appearing. | |
| """ | |
| import math | |
| from itertools import combinations | |
| models = list(_get_available_models().keys()) | |
| if len(models) < 2: | |
| raise ValueError("Not enough models available for comparison") | |
| ratings = _get_model_ratings() | |
| # --- score every candidate pair --- | |
| pairs: list[tuple[str, str]] = list(combinations(models, 2)) | |
| pair_weights: list[float] = [] | |
| for a, b in pairs: | |
| ra = ratings.get(a, {}) | |
| rb = ratings.get(b, {}) | |
| elo_a = ra.get("elo", 1000) | |
| elo_b = rb.get("elo", 1000) | |
| ci_a = ra.get("ci", 0) | |
| ci_b = rb.get("ci", 0) | |
| battles_a = ra.get("battles", 0) | |
| battles_b = rb.get("battles", 0) | |
| # -- Signal 1: CI overlap score (0–1) -- | |
| # If either model has no CI yet, treat as maximally uncertain → 1.0 | |
| if ci_a <= 0 or ci_b <= 0: | |
| overlap_score = 1.0 | |
| else: | |
| # Interval: [elo - ci, elo + ci] | |
| lo_a, hi_a = elo_a - ci_a, elo_a + ci_a | |
| lo_b, hi_b = elo_b - ci_b, elo_b + ci_b | |
| overlap = max(0.0, min(hi_a, hi_b) - max(lo_a, lo_b)) | |
| span = min(ci_a, ci_b) * 2 # width of the smaller CI | |
| overlap_score = min(overlap / span, 1.0) if span > 0 else 1.0 | |
| # Ensure a minimum floor so distant pairs still occasionally appear | |
| overlap_score = max(overlap_score, 0.05) | |
| # -- Signal 2: under-sampling boost -- | |
| exploration = ( | |
| 1.0 / math.sqrt(battles_a + 1) | |
| + 1.0 / math.sqrt(battles_b + 1) | |
| ) / 2.0 | |
| pair_weights.append(overlap_score * exploration) | |
| # --- sample one pair stochastically --- | |
| (first, second), = random.choices(pairs, weights=pair_weights, k=1) | |
| # Randomise A/B assignment so there's no positional bias | |
| if random.random() < 0.5: | |
| return (first, second) | |
| return (second, first) | |
| def get_random_sentence(): | |
| """Return a random Arabic sample sentence.""" | |
| return random.choice(SAMPLE_SENTENCES) | |
| def _empty_comparison(): | |
| """Return values that reset the UI to the pre-synthesis state.""" | |
| return ( | |
| None, | |
| None, # audio_a, audio_b | |
| None, | |
| None, # model_a_id, model_b_id | |
| None, | |
| None, # audio_a_base64, audio_b_base64 | |
| None, | |
| None, # latency_a, latency_b | |
| gr.update(visible=False), # audio_row | |
| gr.update(visible=False), # vote_row | |
| gr.update(visible=False), # result_display | |
| gr.update(value="🔊 Synthesize", interactive=True), # synth_btn | |
| gr.update(value="", visible=False), # status_display | |
| "🔒 Hidden", # model_a_label | |
| "🔒 Hidden", # model_b_label | |
| gr.update(visible=False), # next_round_btn | |
| gr.update(interactive=True), # vote_a_btn | |
| gr.update(interactive=True), # vote_b_btn | |
| gr.update(interactive=True), # vote_both_good_btn | |
| gr.update(interactive=True), # vote_both_bad_btn | |
| ) | |
| def _pick_replacement(exclude: set[str]) -> str | None: | |
| """Pick a model not in *exclude*, or None if none left.""" | |
| candidates = [m for m in _get_available_models() if m not in exclude] | |
| return random.choice(candidates) if candidates else None | |
| def _synth_one(text: str, model_id: str, used: set[str]) -> tuple[dict | None, str]: | |
| """Try to synthesize with *model_id*; on failure swap in a replacement once. | |
| Returns (result_dict_or_None, final_model_id). | |
| """ | |
| result = synthesize_audio(text, model_id) | |
| if result.get("success"): | |
| return result, model_id | |
| # First model failed after retries — try a replacement | |
| replacement = _pick_replacement(used) | |
| if replacement: | |
| used.add(replacement) | |
| result = synthesize_audio(text, replacement) | |
| if result.get("success"): | |
| return result, replacement | |
| return None, model_id # give up | |
| def generate_comparison(text: str): | |
| """Generate audio from two random TTS models for comparison. | |
| Both models are synthesized in parallel using threads to halve wait time. | |
| Uses a generator to yield status updates so the user sees progress. | |
| """ | |
| from concurrent.futures import ThreadPoolExecutor | |
| if not text or not text.strip(): | |
| gr.Warning("Please enter some Arabic text first.") | |
| yield _empty_comparison() | |
| return | |
| text = text.strip() | |
| model_a_id, model_b_id = get_random_model_pair() | |
| # — Show "synthesizing" status — | |
| yield ( | |
| None, | |
| None, | |
| None, | |
| None, | |
| None, | |
| None, | |
| None, | |
| None, # latency_a, latency_b | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| gr.update(value="⏳ Synthesizing…", interactive=False), | |
| gr.update(value="⏳ Generating audio from both models…", visible=True), | |
| "🔒 Hidden", | |
| "🔒 Hidden", | |
| gr.update(visible=False), | |
| gr.update(interactive=True), | |
| gr.update(interactive=True), | |
| gr.update(interactive=True), | |
| gr.update(interactive=True), | |
| ) | |
| # — Synthesize both models in parallel — | |
| # Each thread gets its own used set for the replacement fallback logic. | |
| def synth_a(): | |
| used = {model_a_id, model_b_id} | |
| return _synth_one(text, model_a_id, used) | |
| def synth_b(): | |
| used = {model_a_id, model_b_id} | |
| return _synth_one(text, model_b_id, used) | |
| try: | |
| with ThreadPoolExecutor(max_workers=2) as pool: | |
| future_a = pool.submit(synth_a) | |
| future_b = pool.submit(synth_b) | |
| result_a, model_a_id = future_a.result() | |
| result_b, model_b_id = future_b.result() | |
| except Exception as e: | |
| gr.Warning(f"Connection error — is the backend deployed? ({e})") | |
| yield _empty_comparison() | |
| return | |
| if result_a is None: | |
| gr.Warning("Model A synthesis failed after retries. Please try again.") | |
| yield _empty_comparison() | |
| return | |
| if result_b is None: | |
| gr.Warning("Model B synthesis failed after retries. Please try again.") | |
| yield _empty_comparison() | |
| return | |
| # — Decode audio to temp files for Gradio — | |
| audio_a_path = decode_audio_to_file(result_a["audio_base64"]) | |
| audio_b_path = decode_audio_to_file(result_b["audio_base64"]) | |
| if not audio_a_path or not audio_b_path: | |
| gr.Warning("Failed to decode audio from backend.") | |
| yield _empty_comparison() | |
| return | |
| # Extract latency (None for cache hits / legacy responses) | |
| latency_a = result_a.get("latency_seconds") | |
| latency_b = result_b.get("latency_seconds") | |
| yield ( | |
| audio_a_path, | |
| audio_b_path, | |
| model_a_id, | |
| model_b_id, | |
| result_a["audio_base64"], | |
| result_b["audio_base64"], | |
| latency_a, | |
| latency_b, | |
| gr.update(visible=True), # audio_row | |
| gr.update(visible=True), # vote_row | |
| gr.update(visible=False), # result_display | |
| gr.update(value="🔊 Synthesize", interactive=True), | |
| gr.update(value="", visible=False), # hide status | |
| "🔒 Hidden", # model_a_label | |
| "🔒 Hidden", # model_b_label | |
| gr.update(visible=False), # next_round_btn | |
| gr.update(interactive=True), # vote_a_btn | |
| gr.update(interactive=True), # vote_b_btn | |
| gr.update(interactive=True), # vote_both_good_btn | |
| gr.update(interactive=True), # vote_both_bad_btn | |
| ) | |
| def submit_vote( | |
| vote: str, | |
| text_prompt: str, | |
| model_a_id: str, | |
| model_b_id: str, | |
| audio_a_b64: str, | |
| audio_b_b64: str, | |
| latency_a: float | None, | |
| latency_b: float | None, | |
| ): | |
| """Submit a vote for the comparison.""" | |
| if not model_a_id or not model_b_id: | |
| gr.Warning("Please synthesize audio first.") | |
| return ( | |
| gr.update(visible=True), # vote_row stays | |
| gr.update(visible=False), # result_display | |
| gr.update(visible=False), # next_round_btn | |
| "🔒 Hidden", | |
| "🔒 Hidden", | |
| gr.update(), # vote_a_btn unchanged | |
| gr.update(), # vote_b_btn unchanged | |
| gr.update(), # vote_both_good_btn unchanged | |
| gr.update(), # vote_both_bad_btn unchanged | |
| ) | |
| session_id = uuid.uuid4().hex | |
| try: | |
| service = get_arena_service() | |
| result = service().record_vote.remote( | |
| session_id=session_id, | |
| text=text_prompt, | |
| model_a=model_a_id, | |
| model_b=model_b_id, | |
| winner=vote, | |
| audio_a_base64=audio_a_b64, | |
| audio_b_base64=audio_b_b64, | |
| latency_a=latency_a, | |
| latency_b=latency_b, | |
| ) | |
| except Exception as e: | |
| gr.Warning(f"Vote failed: {e}") | |
| return ( | |
| gr.update(visible=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| "🔒 Hidden", | |
| "🔒 Hidden", | |
| gr.update(), # vote_a_btn unchanged | |
| gr.update(), # vote_b_btn unchanged | |
| gr.update(), # vote_both_good_btn unchanged | |
| gr.update(), # vote_both_bad_btn unchanged | |
| ) | |
| if not result.get("success"): | |
| gr.Warning(f"Error: {result.get('error', 'Unknown')}") | |
| return ( | |
| gr.update(visible=True), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| "🔒 Hidden", | |
| "🔒 Hidden", | |
| gr.update(), # vote_a_btn unchanged | |
| gr.update(), # vote_b_btn unchanged | |
| gr.update(), # vote_both_good_btn unchanged | |
| gr.update(), # vote_both_bad_btn unchanged | |
| ) | |
| vote_emoji = { | |
| "model_a": "🅰️ Model A", | |
| "model_b": "🅱️ Model B", | |
| "both_good": "👍 Both Good", | |
| "both_bad": "👎 Both Bad", | |
| } | |
| name_a = get_display_name(model_a_id) | |
| name_b = get_display_name(model_b_id) | |
| result_md = f""" | |
| <div style="text-align:center; padding: 1.2em 1em; border-radius: 12px; | |
| background: var(--block-background-fill); border: 1px solid var(--border-color-primary);"> | |
| <div style="font-size: 1.6em; margin-bottom: 0.3em;">✅ Vote Recorded!</div> | |
| <div style="font-size: 1.05em; margin-bottom: 0.8em;"> | |
| You chose: <strong>{vote_emoji.get(vote, vote)}</strong> | |
| </div> | |
| <div style="display: flex; justify-content: center; gap: 2em; font-size: 1em;"> | |
| <div>🅰️ <strong>{name_a}</strong></div> | |
| <div style="color: #aaa;">vs</div> | |
| <div>🅱️ <strong>{name_b}</strong></div> | |
| </div> | |
| <div style="margin-top: 0.8em; color: #888; font-size: 0.85em;"> | |
| Thanks for voting! The leaderboard updates daily. | |
| </div> | |
| </div> | |
| """ | |
| return ( | |
| gr.update(visible=False), # hide vote_row | |
| gr.update(value=result_md, visible=True), # show result | |
| gr.update(visible=True), # show next_round_btn | |
| f"**{name_a}**", # reveal model A | |
| f"**{name_b}**", # reveal model B | |
| gr.update(interactive=False), # disable vote_a_btn | |
| gr.update(interactive=False), # disable vote_b_btn | |
| gr.update(interactive=False), # disable vote_both_good_btn | |
| gr.update(interactive=False), # disable vote_both_bad_btn | |
| ) | |
| def refresh_leaderboard(): | |
| """Read and display leaderboard from local JSON file.""" | |
| try: | |
| if not LEADERBOARD_FILE.exists(): | |
| return _empty_leaderboard_md() | |
| with open(LEADERBOARD_FILE, "r") as f: | |
| data = json.load(f) | |
| models = data.get("models", []) | |
| last_updated = data.get("last_updated", "") | |
| if not models: | |
| return _empty_leaderboard_md() | |
| # Hide models with fewer than 30 battles | |
| models = [m for m in models if m.get("battles", 0) >= MIN_BATTLES] | |
| if not models: | |
| return _empty_leaderboard_md() | |
| # Re-assign ranks after filtering | |
| for i, m in enumerate(models, start=1): | |
| m["rank"] = i | |
| # Format timestamp | |
| try: | |
| from datetime import datetime | |
| dt = datetime.fromisoformat(last_updated.replace("Z", "+00:00")) | |
| updated_str = dt.strftime("%b %d, %Y") | |
| except Exception: | |
| updated_str = last_updated or "—" | |
| # --- Styles --- | |
| style_block = """ | |
| <style> | |
| .lb-container { max-width: 660px; margin: 0 auto; } | |
| .lb-meta { | |
| display: flex; justify-content: center; align-items: center; | |
| gap: 0.6em; | |
| font-size: 0.85em; | |
| color: var(--body-text-color-subdued, #999); | |
| padding: 0 0 0.8em 0; | |
| } | |
| .lb-meta strong { | |
| color: var(--body-text-color); | |
| font-weight: 700; | |
| } | |
| .lb-meta-sep { | |
| color: var(--body-text-color-subdued, #666); | |
| opacity: 0.5; | |
| } | |
| .lb-list { display: flex; flex-direction: column; gap: 0; } | |
| /* Each model row */ | |
| .lb-item { | |
| display: grid; | |
| grid-template-columns: 48px 1fr 120px 90px 72px; | |
| align-items: center; | |
| gap: 0 12px; | |
| padding: 14px 20px; | |
| border-bottom: 1px solid var(--border-color-primary); | |
| } | |
| .lb-item:first-child { border-top: 1px solid var(--border-color-primary); } | |
| .lb-item:hover { background: rgba(255,255,255,0.03); } | |
| /* Top-3 subtle left accent */ | |
| .lb-item.gold { background: rgba(255,195,0,0.04); } | |
| .lb-item.silver { background: rgba(180,180,195,0.04); } | |
| .lb-item.bronze { background: rgba(210,140,70,0.03); } | |
| .lb-item.gold:hover { background: rgba(255,195,0,0.08); } | |
| .lb-item.silver:hover { background: rgba(180,180,195,0.08); } | |
| .lb-item.bronze:hover { background: rgba(210,140,70,0.07); } | |
| .lb-rank { | |
| font-size: 1.1em; font-weight: 600; | |
| text-align: center; | |
| color: var(--body-text-color); | |
| } | |
| .lb-rank.r-gold { color: #E8C33A; } | |
| .lb-rank.r-silver { color: #C0C0C8; } | |
| .lb-rank.r-bronze { color: #D4944A; } | |
| .lb-name { | |
| font-weight: 600; font-size: 1.05em; | |
| color: var(--body-text-color) !important; | |
| text-decoration: none !important; | |
| white-space: nowrap; overflow: hidden; text-overflow: ellipsis; | |
| cursor: default; | |
| } | |
| a.lb-name { cursor: pointer; } | |
| a.lb-name::after { | |
| content: ''; | |
| display: inline-block; | |
| width: 0.95em; height: 0.95em; | |
| margin-left: 5px; | |
| vertical-align: middle; | |
| opacity: 0; | |
| transition: opacity 0.15s; | |
| background: currentColor; | |
| -webkit-mask: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='currentColor' stroke-width='2.5' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpath d='M7 17L17 7'/%3E%3Cpath d='M7 7h10v10'/%3E%3C/svg%3E") no-repeat center/contain; | |
| mask: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='currentColor' stroke-width='2.5' stroke-linecap='round' stroke-linejoin='round'%3E%3Cpath d='M7 17L17 7'/%3E%3Cpath d='M7 7h10v10'/%3E%3C/svg%3E") no-repeat center/contain; | |
| } | |
| a.lb-name:hover { color: #10b981 !important; text-decoration: none !important; } | |
| a.lb-name:hover::after { opacity: 1; } | |
| .lb-score { | |
| text-align: center; | |
| font-weight: 700; font-size: 1.05em; | |
| font-variant-numeric: tabular-nums; | |
| color: var(--body-text-color); | |
| } | |
| .lb-votes { | |
| text-align: center; | |
| font-size: 0.92em; | |
| font-variant-numeric: tabular-nums; | |
| color: var(--body-text-color); | |
| } | |
| .lb-latency { | |
| text-align: center; | |
| font-size: 0.92em; | |
| font-variant-numeric: tabular-nums; | |
| color: var(--body-text-color); | |
| position: relative; | |
| cursor: default; | |
| overflow: hidden; | |
| } | |
| .lb-latency[data-gpu] { | |
| cursor: pointer; | |
| } | |
| /* The latency value and GPU label live in spans inside the cell */ | |
| .lb-latency .lb-lat-val { | |
| display: block; | |
| transition: transform 0.25s ease, opacity 0.25s ease; | |
| } | |
| .lb-latency .lb-lat-gpu { | |
| display: block; | |
| position: absolute; | |
| inset: 0; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| font-size: 0.85em; | |
| font-weight: 600; | |
| color: #10b981; | |
| letter-spacing: 0.01em; | |
| transform: translateY(100%); | |
| opacity: 0; | |
| transition: transform 0.25s ease, opacity 0.25s ease; | |
| } | |
| .lb-latency[data-gpu]:hover .lb-lat-val { | |
| transform: translateY(-100%); | |
| opacity: 0; | |
| } | |
| .lb-latency[data-gpu]:hover .lb-lat-gpu { | |
| transform: translateY(0); | |
| opacity: 1; | |
| } | |
| .lb-ci { | |
| font-size: 0.78em; | |
| font-weight: 400; | |
| color: var(--body-text-color-subdued, #888); | |
| margin-left: 3px; | |
| font-variant-numeric: tabular-nums; | |
| } | |
| /* Column labels */ | |
| .lb-colheader { | |
| display: grid; | |
| grid-template-columns: 48px 1fr 120px 90px 72px; | |
| align-items: center; | |
| gap: 0 12px; | |
| padding: 6px 20px 8px 20px; | |
| font-size: 0.75em; | |
| font-weight: 600; | |
| text-transform: uppercase; | |
| letter-spacing: 0.05em; | |
| color: var(--body-text-color-subdued, #999); | |
| } | |
| .lb-colheader span:nth-child(3), | |
| .lb-colheader span:nth-child(4), | |
| .lb-colheader span:nth-child(5) { text-align: center; } | |
| @media (max-width: 600px) { | |
| .lb-container { margin: 0 4px; } | |
| .lb-colheader { | |
| grid-template-columns: 36px 1fr 72px; | |
| padding: 6px 10px 8px 10px; | |
| font-size: 0.7em; | |
| } | |
| .lb-colheader span:nth-child(4), | |
| .lb-colheader span:nth-child(5) { display: none; } | |
| .lb-item { | |
| grid-template-columns: 36px 1fr 72px; | |
| gap: 0 6px; | |
| padding: 10px 10px; | |
| } | |
| .lb-rank { font-size: 0.95em; } | |
| .lb-name { font-size: 0.92em; } | |
| .lb-score { font-size: 0.92em; } | |
| .lb-ci { font-size: 0.7em; } | |
| .lb-votes { display: none; } | |
| .lb-latency { display: none; } | |
| } | |
| @media (max-width: 380px) { | |
| .lb-colheader { | |
| grid-template-columns: 30px 1fr 64px; | |
| padding: 5px 6px 7px 6px; | |
| } | |
| .lb-item { | |
| grid-template-columns: 30px 1fr 64px; | |
| gap: 0 4px; | |
| padding: 9px 6px; | |
| } | |
| .lb-rank { font-size: 0.88em; } | |
| .lb-name { font-size: 0.85em; } | |
| .lb-score { font-size: 0.85em; } | |
| .lb-ci { display: none; } | |
| } | |
| </style> | |
| """ | |
| # Metadata line | |
| total_battles = sum(m.get("battles", 0) for m in models) | |
| meta_html = ( | |
| f'<div class="lb-meta">' | |
| f'<span>⚔️ <strong>{total_battles:,}</strong> battles</span>' | |
| f'<span class="lb-meta-sep">·</span>' | |
| f'<span>Updated <strong>{updated_str}</strong></span>' | |
| f'</div>' | |
| ) | |
| # Column labels | |
| col_header = ( | |
| '<div class="lb-colheader">' | |
| "<span>Rank</span>" | |
| "<span>Model</span>" | |
| "<span>Score</span>" | |
| "<span>Latency</span>" | |
| "<span>Battles</span>" | |
| "</div>" | |
| ) | |
| # Build rows | |
| tier_row = {1: "gold", 2: "silver", 3: "bronze"} | |
| tier_rank = {1: "r-gold", 2: "r-silver", 3: "r-bronze"} | |
| items_html = "" | |
| for entry in models: | |
| rank = entry["rank"] | |
| name = entry["name"] | |
| model_url = entry.get("model_url", "") | |
| elo = entry["elo"] | |
| ci = entry.get("ci", 0) | |
| battles = entry.get("battles", 0) | |
| avg_latency = entry.get("avg_latency") | |
| gpu = entry.get("gpu", "") | |
| row_cls = tier_row.get(rank, "") | |
| rank_cls = tier_rank.get(rank, "") | |
| is_api = not gpu | |
| if model_url: | |
| name_el = ( | |
| f'<a class="lb-name" href="{model_url}" target="_blank">{name}</a>' | |
| ) | |
| else: | |
| name_el = f'<span class="lb-name">{name}</span>' | |
| votes_text = f"{battles:,}" if battles else "—" | |
| ci_html = f'<span class="lb-ci">±{ci:.0f}</span>' if ci else "" | |
| latency_text = f"{avg_latency:.1f}s" if avg_latency is not None else "—" | |
| if gpu: | |
| gpu_attr = f' data-gpu="{gpu}"' | |
| gpu_label = f'<span class="lb-lat-gpu">⚡ {gpu}</span>' | |
| elif is_api: | |
| gpu_attr = ' data-gpu="API"' | |
| gpu_label = '<span class="lb-lat-gpu">☁️ API</span>' | |
| else: | |
| gpu_attr = "" | |
| gpu_label = "" | |
| items_html += ( | |
| f'<div class="lb-item {row_cls}">' | |
| f'<div class="lb-rank {rank_cls}">{rank}</div>' | |
| f"<div>{name_el}</div>" | |
| f'<div class="lb-score">{elo:.0f}{ci_html}</div>' | |
| f'<div class="lb-latency"{gpu_attr}><span class="lb-lat-val">{latency_text}</span>{gpu_label}</div>' | |
| f'<div class="lb-votes">{votes_text}</div>' | |
| f"</div>" | |
| ) | |
| return ( | |
| f'<div class="lb-container">' | |
| f"{style_block}{meta_html}{col_header}" | |
| f'<div class="lb-list">{items_html}</div>' | |
| f"</div>" | |
| ) | |
| except Exception as e: | |
| return f"❌ Error loading leaderboard: {e}" | |
| def _empty_leaderboard_md() -> str: | |
| return ( | |
| '<div style="text-align:center; padding:3em 1em; color:var(--body-text-color-subdued,#888);">' | |
| "<h3>No data yet!</h3>" | |
| "<p>Be the first to vote — head to the <strong>⚔️ Battle</strong> tab.</p>" | |
| "</div>" | |
| ) | |
| def clear_for_next_round(): | |
| """Reset the UI for a new comparison.""" | |
| return ( | |
| None, | |
| None, # audio_a, audio_b | |
| gr.update(visible=False), # audio_row | |
| gr.update(visible=False), # vote_row | |
| gr.update(visible=False), # result_display | |
| gr.update(visible=False), # next_round_btn | |
| "🔒 Hidden", | |
| "🔒 Hidden", # model labels | |
| get_random_sentence(), # new random sentence | |
| gr.update(interactive=True), # re-enable vote_a_btn | |
| gr.update(interactive=True), # re-enable vote_b_btn | |
| gr.update(interactive=True), # re-enable vote_both_good_btn | |
| gr.update(interactive=True), # re-enable vote_both_bad_btn | |
| ) | |
| CUSTOM_CSS = """ | |
| /* Hide Gradio footer */ | |
| footer { display: none !important; } | |
| /* RTL text input */ | |
| .text-input textarea { | |
| font-size: 1.15em !important; | |
| direction: rtl; | |
| line-height: 1.6; | |
| } | |
| /* Center helpers */ | |
| .center-text { text-align: center !important; } | |
| /* Model column labels */ | |
| .model-label { | |
| text-align: center; | |
| font-weight: 600; | |
| font-size: 1em; | |
| padding: 0.4em 0 0.1em 0; | |
| min-height: 28px; | |
| letter-spacing: 0.02em; | |
| } | |
| /* Column header badges (A / B) */ | |
| .column-header { | |
| text-align: center; | |
| font-size: 1.15em; | |
| font-weight: 700; | |
| padding: 0.3em 0; | |
| margin-bottom: 0.15em; | |
| } | |
| /* Audio players */ | |
| .audio-player { min-height: 70px; } | |
| /* Voting buttons — consistent sizing */ | |
| .vote-btn { | |
| min-height: 48px !important; | |
| font-size: 1em !important; | |
| font-weight: 600 !important; | |
| } | |
| .tie-btn { | |
| min-height: 44px !important; | |
| font-weight: 500 !important; | |
| } | |
| /* Next round button */ | |
| .next-btn { | |
| min-height: 48px !important; | |
| font-size: 1.05em !important; | |
| font-weight: 600 !important; | |
| margin-top: 0.5em !important; | |
| } | |
| /* Synth button */ | |
| .synth-btn { | |
| min-height: 48px !important; | |
| font-size: 1.05em !important; | |
| } | |
| /* Status message during synthesis */ | |
| .status-msg { | |
| text-align: center; | |
| font-size: 1.05em; | |
| color: #888; | |
| padding: 0.6em 0; | |
| animation: pulse 1.5s ease-in-out infinite; | |
| } | |
| @keyframes pulse { | |
| 0%, 100% { opacity: 1; } | |
| 50% { opacity: 0.5; } | |
| } | |
| /* Leaderboard tab spacing */ | |
| .leaderboard-wrap { max-width: 680px; margin: 0 auto; } | |
| /* Center the tab buttons */ | |
| .tabs > .tab-nav, | |
| div[role="tablist"], | |
| .tab-nav { | |
| justify-content: center !important; | |
| } | |
| """ | |
| SHORTCUT_JS = """ | |
| <script> | |
| document.addEventListener('keypress', function(e) { | |
| // Don't fire when user is typing in an input | |
| const tag = e.target.tagName.toLowerCase(); | |
| if (tag === 'input' || tag === 'textarea') return; | |
| switch (e.key.toLowerCase()) { | |
| case 'a': document.getElementById('vote-a-btn')?.click(); break; | |
| case 'b': document.getElementById('vote-b-btn')?.click(); break; | |
| case 'n': document.getElementById('next-round-btn')?.click(); break; | |
| } | |
| }, false); | |
| </script> | |
| """ | |
| def create_demo(): | |
| """Create the Gradio interface.""" | |
| with gr.Blocks( | |
| title="Arabic TTS Arena", | |
| theme=gr.themes.Soft( | |
| primary_hue="emerald", | |
| secondary_hue="slate", | |
| neutral_hue="slate", | |
| ), | |
| css=CUSTOM_CSS, | |
| head=SHORTCUT_JS, | |
| ) as demo: | |
| # Header | |
| gr.HTML(HEADER_MD) | |
| with gr.Tabs(): | |
| # Voting Tab | |
| with gr.TabItem("⚔️ Battle", id="battle"): | |
| gr.HTML(HOW_IT_WORKS_MD) | |
| # Hidden state | |
| model_a_id = gr.State(value=None) | |
| model_b_id = gr.State(value=None) | |
| audio_a_base64 = gr.State(value=None) | |
| audio_b_base64 = gr.State(value=None) | |
| latency_a_state = gr.State(value=None) | |
| latency_b_state = gr.State(value=None) | |
| current_text = gr.State(value="") | |
| # — Text input — | |
| with gr.Group(): | |
| with gr.Row(): | |
| text_input = gr.Textbox( | |
| container=False, | |
| show_label=False, | |
| placeholder="اكتب نصاً عربياً هنا...", | |
| lines=1, | |
| max_lines=3, | |
| scale=20, | |
| elem_classes=["text-input"], | |
| ) | |
| random_btn = gr.Button( | |
| "🎲", | |
| scale=0, | |
| min_width=50, | |
| variant="secondary", | |
| ) | |
| synth_btn = gr.Button( | |
| "🔊 Synthesize", | |
| variant="primary", | |
| size="lg", | |
| elem_classes=["synth-btn"], | |
| ) | |
| # — Status indicator (shown during synthesis) — | |
| status_display = gr.HTML( | |
| value="", visible=False, elem_classes=["status-msg"] | |
| ) | |
| # — Audio players (hidden until synthesis) — | |
| with gr.Row(visible=False, equal_height=True) as audio_row: | |
| with gr.Column(): | |
| gr.Markdown( | |
| "### 🅰️ Model A", | |
| elem_classes=["column-header", "center-text"], | |
| ) | |
| audio_a = gr.Audio( | |
| show_label=False, | |
| interactive=False, | |
| elem_classes=["audio-player"], | |
| ) | |
| model_a_label = gr.Markdown( | |
| "🔒 Hidden", | |
| elem_classes=["model-label", "center-text"], | |
| ) | |
| vote_a_btn = gr.Button( | |
| "👆 A is Better", | |
| variant="primary", | |
| elem_id="vote-a-btn", | |
| elem_classes=["vote-btn"], | |
| ) | |
| with gr.Column(): | |
| gr.Markdown( | |
| "### 🅱️ Model B", | |
| elem_classes=["column-header", "center-text"], | |
| ) | |
| audio_b = gr.Audio( | |
| show_label=False, | |
| interactive=False, | |
| elem_classes=["audio-player"], | |
| ) | |
| model_b_label = gr.Markdown( | |
| "🔒 Hidden", | |
| elem_classes=["model-label", "center-text"], | |
| ) | |
| vote_b_btn = gr.Button( | |
| "👆 B is Better", | |
| variant="primary", | |
| elem_id="vote-b-btn", | |
| elem_classes=["vote-btn"], | |
| ) | |
| # — Tie buttons — | |
| with gr.Row(visible=False) as vote_row: | |
| vote_both_good_btn = gr.Button( | |
| "👍 Both Good", | |
| variant="secondary", | |
| elem_classes=["tie-btn"], | |
| ) | |
| vote_both_bad_btn = gr.Button( | |
| "👎 Both Bad", | |
| variant="secondary", | |
| elem_classes=["tie-btn"], | |
| ) | |
| # — Result card + next round — | |
| result_display = gr.HTML(visible=False) | |
| next_round_btn = gr.Button( | |
| "⚡ Next Round (N)", | |
| visible=False, | |
| variant="primary", | |
| elem_id="next-round-btn", | |
| elem_classes=["next-btn"], | |
| ) | |
| random_btn.click(fn=get_random_sentence, outputs=[text_input]) | |
| synth_btn.click( | |
| fn=generate_comparison, | |
| inputs=[text_input], | |
| outputs=[ | |
| audio_a, | |
| audio_b, | |
| model_a_id, | |
| model_b_id, | |
| audio_a_base64, | |
| audio_b_base64, | |
| latency_a_state, | |
| latency_b_state, | |
| audio_row, | |
| vote_row, | |
| result_display, | |
| synth_btn, | |
| status_display, | |
| model_a_label, | |
| model_b_label, | |
| next_round_btn, | |
| vote_a_btn, | |
| vote_b_btn, | |
| vote_both_good_btn, | |
| vote_both_bad_btn, | |
| ], | |
| ).then( | |
| fn=lambda t: t, | |
| inputs=[text_input], | |
| outputs=[current_text], | |
| ) | |
| # Vote handlers (all four buttons share the same signature) | |
| def make_vote_handler(vote_type: str): | |
| def handler(text, m_a, m_b, a_b64, b_b64, lat_a, lat_b): | |
| return submit_vote(vote_type, text, m_a, m_b, a_b64, b_b64, lat_a, lat_b) | |
| return handler | |
| vote_outputs = [ | |
| vote_row, | |
| result_display, | |
| next_round_btn, | |
| model_a_label, | |
| model_b_label, | |
| vote_a_btn, | |
| vote_b_btn, | |
| vote_both_good_btn, | |
| vote_both_bad_btn, | |
| ] | |
| vote_inputs = [ | |
| current_text, | |
| model_a_id, | |
| model_b_id, | |
| audio_a_base64, | |
| audio_b_base64, | |
| latency_a_state, | |
| latency_b_state, | |
| ] | |
| for btn, vtype in [ | |
| (vote_a_btn, "model_a"), | |
| (vote_b_btn, "model_b"), | |
| (vote_both_good_btn, "both_good"), | |
| (vote_both_bad_btn, "both_bad"), | |
| ]: | |
| btn.click( | |
| fn=make_vote_handler(vtype), | |
| inputs=vote_inputs, | |
| outputs=vote_outputs, | |
| ) | |
| next_round_btn.click( | |
| fn=clear_for_next_round, | |
| outputs=[ | |
| audio_a, | |
| audio_b, | |
| audio_row, | |
| vote_row, | |
| result_display, | |
| next_round_btn, | |
| model_a_label, | |
| model_b_label, | |
| text_input, | |
| vote_a_btn, | |
| vote_b_btn, | |
| vote_both_good_btn, | |
| vote_both_bad_btn, | |
| ], | |
| ) | |
| # Leaderboard Tab | |
| with gr.TabItem("🏆 Leaderboard", id="leaderboard"): | |
| with gr.Column(elem_classes=["leaderboard-wrap"]): | |
| leaderboard_display = gr.HTML( | |
| "<p style='text-align:center; color:var(--body-text-color-subdued,#888);'>Loading…</p>" | |
| ) | |
| demo.load(fn=refresh_leaderboard, outputs=[leaderboard_display]) | |
| # About Tab | |
| with gr.TabItem("📖 Story", id="story"): | |
| gr.HTML(ABOUT_MD) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_demo() | |
| demo.queue(default_concurrency_limit=4).launch(server_name="0.0.0.0", server_port=7860) | |