Spaces:

Nanboy
/

RVCBench

Running

App Files Files Community

Nanboy commited on 6 days ago

Commit

665fda4

verified ·

1 Parent(s): 31d8814

Fix RVCBench title color: add color: white !important to .hero h1

Browse files

Files changed (1) hide show

app.py +99 -1

app.py CHANGED Viewed

@@ -124,6 +124,41 @@ LEADERBOARD_ROWS = [
     dict(model="StyleTTS 2",   SIM=0.228, WER=0.049, MOS=4.30, MCD=6.81, RTF=0.11,  SVA=0.388, Emo=0.589),
 ]
 # Protection robustness — SIM under each method (LibriTTS, all 18 models)
 PROT_ROWS = [
     dict(model="Qwen3-TTS",    Clean=0.614, SafeSpeech=0.384, Enkidu=0.502, Spectral=0.363, GRNoise=0.408, AntiFake=0.582),
@@ -409,6 +444,59 @@ def make_prot_heatmap() -> go.Figure:
     return fig
 def make_waveform_figure(
     original: np.ndarray, protected: np.ndarray, sr: int
 ) -> go.Figure:
@@ -543,6 +631,7 @@ footer { display: none !important; }
     font-size: 2.35rem;
     line-height: 1.08;
     letter-spacing: 0;
 }
 .hero p {
     max-width: 760px;
@@ -635,7 +724,7 @@ INTRO_MD = """
 </div>
 <div class="stat-strip">
-  <div class="stat-card"><b>18</b><span>voice cloning models</span></div>
   <div class="stat-card"><b>5</b><span>protection methods</span></div>
   <div class="stat-card"><b>7</b><span>evaluation metrics</span></div>
   <div class="stat-card"><b>10</b><span>speech datasets</span></div>
@@ -778,6 +867,15 @@ def build_demo():
                                  outputs=[bar_chart])
                 demo.load(fn=lambda: make_results_bar("SIM"), outputs=[bar_chart])
                 gr.Markdown("---")
                 gr.Markdown(
                     "### Protection Robustness Heatmap\n"

     dict(model="StyleTTS 2",   SIM=0.228, WER=0.049, MOS=4.30, MCD=6.81, RTF=0.11,  SVA=0.388, Emo=0.589),
 ]
+# Cross-dataset generalisation — SIM on clean prompts across all 10 datasets
+CROSS_DATASET_ROWS = [
+    dict(model="Qwen3-TTS",    LibriTTS=0.614, VCTK=0.618, MultiSpk=0.495, Long=0.561,  AISHELL=0.721, French=0.536, Bilingual=0.673, BGclean=0.689, BGnoise=0.572, Hallucin=0.515),
+    dict(model="IndexTTS",     LibriTTS=0.606, VCTK=0.567, MultiSpk=0.473, Long=0.775,  AISHELL=0.721, French=0.397, Bilingual=0.673, BGclean=0.589, BGnoise=0.528, Hallucin=0.529),
+    dict(model="CosyVoice 2",  LibriTTS=0.602, VCTK=0.582, MultiSpk=0.448, Long=0.530,  AISHELL=0.717, French=0.378, Bilingual=0.653, BGclean=0.626, BGnoise=0.515, Hallucin=0.518),
+    dict(model="ZipVoice",     LibriTTS=0.579, VCTK=0.554, MultiSpk=0.531, Long=0.729,  AISHELL=0.712, French=0.363, Bilingual=0.322, BGclean=0.625, BGnoise=0.462, Hallucin=0.509),
+    dict(model="MaskGCT",      LibriTTS=0.570, VCTK=0.555, MultiSpk=0.431, Long=0.194,  AISHELL=0.674, French=0.494, Bilingual=None,  BGclean=0.610, BGnoise=0.487, Hallucin=0.499),
+    dict(model="GLM-TTS",      LibriTTS=0.570, VCTK=0.573, MultiSpk=0.445, Long=0.757,  AISHELL=0.690, French=0.398, Bilingual=0.657, BGclean=0.622, BGnoise=0.528, Hallucin=0.533),
+    dict(model="F5-TTS",       LibriTTS=0.559, VCTK=0.537, MultiSpk=0.507, Long=0.607,  AISHELL=0.696, French=0.304, Bilingual=0.653, BGclean=0.582, BGnoise=0.414, Hallucin=0.455),
+    dict(model="Higgs Audio",  LibriTTS=0.559, VCTK=0.516, MultiSpk=0.418, Long=0.520,  AISHELL=0.581, French=0.349, Bilingual=0.543, BGclean=0.592, BGnoise=0.421, Hallucin=0.425),
+    dict(model="MGM-Omni",     LibriTTS=0.539, VCTK=0.447, MultiSpk=0.370, Long=0.442,  AISHELL=0.713, French=0.227, Bilingual=0.630, BGclean=0.523, BGnoise=0.332, Hallucin=0.396),
+    dict(model="PlayDiffusion",LibriTTS=0.506, VCTK=0.426, MultiSpk=0.360, Long=0.637,  AISHELL=0.441, French=0.283, Bilingual=0.465, BGclean=0.433, BGnoise=0.305, Hallucin=0.408),
+    dict(model="MOSS-TTSD",    LibriTTS=0.492, VCTK=0.440, MultiSpk=0.379, Long=0.644,  AISHELL=0.437, French=0.327, Bilingual=0.471, BGclean=0.494, BGnoise=0.488, Hallucin=0.416),
+    dict(model="VibeVoice",    LibriTTS=0.480, VCTK=0.436, MultiSpk=0.348, Long=0.625,  AISHELL=0.564, French=0.343, Bilingual=0.531, BGclean=0.513, BGnoise=0.364, Hallucin=0.408),
+    dict(model="FishSpeech",   LibriTTS=0.472, VCTK=0.430, MultiSpk=0.383, Long=0.572,  AISHELL=0.611, French=0.374, Bilingual=0.566, BGclean=0.495, BGnoise=0.387, Hallucin=0.351),
+    dict(model="XTTS-v2",      LibriTTS=0.454, VCTK=0.454, MultiSpk=0.328, Long=0.613,  AISHELL=0.569, French=0.445, Bilingual=0.506, BGclean=0.546, BGnoise=0.394, Hallucin=0.488),
+    dict(model="SparkTTS",     LibriTTS=0.408, VCTK=0.532, MultiSpk=0.228, Long=0.345,  AISHELL=0.569, French=0.164, Bilingual=0.480, BGclean=0.588, BGnoise=0.332, Hallucin=0.336),
+    dict(model="OZSpeech",     LibriTTS=0.388, VCTK=0.253, MultiSpk=0.271, Long=None,   AISHELL=None,  French=0.109, Bilingual=None,  BGclean=0.272, BGnoise=0.164, Hallucin=0.281),
+    dict(model="OpenVoice V2", LibriTTS=0.244, VCTK=0.392, MultiSpk=0.192, Long=0.278,  AISHELL=0.431, French=0.271, Bilingual=0.298, BGclean=0.484, BGnoise=0.358, Hallucin=0.365),
+    dict(model="StyleTTS 2",   LibriTTS=0.228, VCTK=0.236, MultiSpk=0.162, Long=None,   AISHELL=None,  French=None,  Bilingual=0.213, BGclean=0.196, BGnoise=0.166, Hallucin=0.184),
+]
+CROSS_DATASET_COLS = [
+    ("LibriTTS",  "LibriTTS"),
+    ("VCTK",      "VCTK"),
+    ("MultiSpk",  "Multi-spk"),
+    ("Long",      "Long"),
+    ("AISHELL",   "AISHELL"),
+    ("French",    "French"),
+    ("Bilingual", "Bilingual"),
+    ("BGclean",   "BG-clean"),
+    ("BGnoise",   "BG-noise"),
+    ("Hallucin",  "Hallucin."),
+]
 # Protection robustness — SIM under each method (LibriTTS, all 18 models)
 PROT_ROWS = [
     dict(model="Qwen3-TTS",    Clean=0.614, SafeSpeech=0.384, Enkidu=0.502, Spectral=0.363, GRNoise=0.408, AntiFake=0.582),
     return fig
+def make_cross_dataset_heatmap() -> go.Figure:
+    """Heatmap: SIM on clean prompts across all 10 datasets for all 18 models."""
+    col_keys   = [k for k, _ in CROSS_DATASET_COLS]
+    col_labels = [label for _, label in CROSS_DATASET_COLS]
+    rows = sorted(CROSS_DATASET_ROWS, key=lambda r: r["LibriTTS"], reverse=True)
+    model_names = [r["model"] for r in rows]
+    z: list[list] = []
+    text_vals: list[list[str]] = []
+    for r in rows:
+        row_z, row_t = [], []
+        for key in col_keys:
+            v = r.get(key)
+            row_z.append(v)
+            row_t.append(f"{v:.3f}" if v is not None else "—")
+        z.append(row_z)
+        text_vals.append(row_t)
+    fig = go.Figure(go.Heatmap(
+        z=z,
+        x=col_labels,
+        y=model_names,
+        text=text_vals,
+        texttemplate="%{text}",
+        textfont=dict(size=10),
+        colorscale=[
+            [0.0,  "#b71c1c"],
+            [0.25, "#ef9a9a"],
+            [0.5,  "#fff9c4"],
+            [0.75, "#a5d6a7"],
+            [1.0,  "#1b5e20"],
+        ],
+        zmin=0.0, zmax=0.75,
+        colorbar=dict(title="SIM", tickformat=".2f", len=0.8),
+        hoverongaps=False,
+    ))
+    fig.update_layout(
+        title=dict(
+            text="<b>Cross-Dataset Generalisation — Speaker Similarity (SIM) on Clean Prompts</b><br>"
+                 "<sup>Models sorted by LibriTTS SIM. — = not evaluated. "
+                 "Green = high SIM (faithful clone), red = low SIM.</sup>",
+            font=dict(size=13),
+        ),
+        yaxis=dict(autorange="reversed"),
+        xaxis=dict(side="top"),
+        paper_bgcolor="white", plot_bgcolor="white",
+        margin=dict(t=120, b=40, l=120, r=80),
+        height=600,
+    )
+    return fig
 def make_waveform_figure(
     original: np.ndarray, protected: np.ndarray, sr: int
 ) -> go.Figure:
     font-size: 2.35rem;
     line-height: 1.08;
     letter-spacing: 0;
+    color: white !important;
 }
 .hero p {
     max-width: 760px;
 </div>
 <div class="stat-strip">
+  <div class="stat-card"><b>26</b><span>voice cloning models</span></div>
   <div class="stat-card"><b>5</b><span>protection methods</span></div>
   <div class="stat-card"><b>7</b><span>evaluation metrics</span></div>
   <div class="stat-card"><b>10</b><span>speech datasets</span></div>
                                  outputs=[bar_chart])
                 demo.load(fn=lambda: make_results_bar("SIM"), outputs=[bar_chart])
+                gr.Markdown("---")
+                gr.Markdown(
+                    "### Cross-Dataset Generalisation\n"
+                    "SIM on clean prompts across all 10 benchmark datasets. "
+                    "Models sorted by LibriTTS SIM. — = not evaluated."
+                )
+                cross_heatmap = gr.Plot(label="", show_label=False)
+                demo.load(fn=make_cross_dataset_heatmap, outputs=[cross_heatmap])
                 gr.Markdown("---")
                 gr.Markdown(
                     "### Protection Robustness Heatmap\n"