Spaces:

Ytgetahun
/

visual-narrator-comparison

Sleeping

App Files Files Community

Ytgetahun commited on Jan 6

Commit

d6210b8

1 Parent(s): fbab538

Add Live API Race tab with January 2026 results

Browse files

Files changed (1) hide show

app.py +66 -0

app.py CHANGED Viewed

@@ -29,6 +29,29 @@ SPEED_DATA = {
     },
 }
 QUALITY_DATA = {
     "Visual Narrator 3B": {"adj_density": 2.0, "semantic_accuracy": 71.6},
     "Claude Sonnet 4.5": {"adj_density": 2.0, "semantic_accuracy": 64.2},
@@ -171,6 +194,46 @@ def create_sample_output():
 - Suitable for audio description / accessibility
 """
 # =============================================================================
 # GRADIO INTERFACE
 # =============================================================================
@@ -203,6 +266,9 @@ with gr.Blocks(
     """)
     with gr.Tabs():
         with gr.Tab("Speed Benchmark"):
             gr.Markdown(create_speed_comparison())

     },
 }
+# =============================================================================
+# LIVE API RACE DATA (January 2026 - Real API calls, not simulated)
+# =============================================================================
+LIVE_API_DATA = {
+    "Visual Narrator": {
+        "latency_ms": 429,
+        "relative": 1.0,
+    },
+    "Claude Sonnet 4": {
+        "latency_ms": 4559,
+        "relative": 10.6,
+    },
+    "Gemini 2.0 Flash": {
+        "latency_ms": 8048,
+        "relative": 18.8,
+    },
+    "GPT-4o": {
+        "latency_ms": 11873,
+        "relative": 27.7,
+    },
+}
 QUALITY_DATA = {
     "Visual Narrator 3B": {"adj_density": 2.0, "semantic_accuracy": 71.6},
     "Claude Sonnet 4.5": {"adj_density": 2.0, "semantic_accuracy": 64.2},
 - Suitable for audio description / accessibility
 """
+def create_live_api_race():
+    """Generate live API race results."""
+    vn = LIVE_API_DATA["Visual Narrator"]
+    claude = LIVE_API_DATA["Claude Sonnet 4"]
+    gemini = LIVE_API_DATA["Gemini 2.0 Flash"]
+    gpt4 = LIVE_API_DATA["GPT-4o"]
+    return f"""
+## Live API Race Results (January 2026)
+**What's measured:** Real API calls to OpenAI, Anthropic, and Google—executed in parallel at the exact same millisecond. No simulation.
+| Model | Live Latency | vs Visual Narrator |
+|-------|-------------|-------------------|
+| **Visual Narrator** | **{vn['latency_ms']}ms** | — |
+| Claude Sonnet 4 | {claude['latency_ms']:,}ms | {claude['relative']}x slower |
+| Gemini 2.0 Flash | {gemini['latency_ms']:,}ms | {gemini['relative']}x slower |
+| GPT-4o | {gpt4['latency_ms']:,}ms | {gpt4['relative']}x slower |
+### Why This Matters
+The **1-second threshold** is critical for real-time accessibility. Delays over 1 second break the connection between narration and on-screen action.
+- **Visual Narrator at 429ms:** Well under the threshold. Enables true real-time narration.
+- **Frontier models at 4-12 seconds:** Fundamentally incompatible with live video.
+### Verification
+These results come from a live WebSocket demo that makes actual API calls:
+- All 4 models receive the same input at the same moment
+- Latency is measured from request to response
+- No artificial delays or handicapping
+- Reproducible by anyone with API access
+### Try It Yourself
+The live demo WebSocket endpoint is available for verification:
+`wss://egqm8ecka4.execute-api.us-east-1.amazonaws.com/prod`
+"""
 # =============================================================================
 # GRADIO INTERFACE
 # =============================================================================
     """)
     with gr.Tabs():
+        with gr.Tab("Live API Race"):
+            gr.Markdown(create_live_api_race())
         with gr.Tab("Speed Benchmark"):
             gr.Markdown(create_speed_comparison())