Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -252,42 +252,62 @@ def make_sim_bar(model_name: str) -> go.Figure:
|
|
| 252 |
values = list(sims.values())
|
| 253 |
|
| 254 |
bar_colors = [
|
| 255 |
-
"#
|
| 256 |
-
"#
|
| 257 |
-
"#
|
| 258 |
-
"#
|
| 259 |
-
"#
|
| 260 |
-
"#
|
| 261 |
]
|
| 262 |
# annotate drop vs clean
|
| 263 |
clean_sim = sims["Clean"]
|
| 264 |
text = [f"{v:.3f}" if k == "Clean" else f"{v:.3f}<br>↓{clean_sim - v:.3f}"
|
| 265 |
for k, v in sims.items()]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
fig = go.Figure(go.Bar(
|
| 268 |
x=labels, y=values,
|
| 269 |
marker_color=bar_colors,
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
cliponaxis=False,
|
| 272 |
))
|
| 273 |
fig.update_layout(
|
| 274 |
-
title=dict(
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
showlegend=False,
|
|
|
|
|
|
|
| 282 |
)
|
| 283 |
fig.add_trace(go.Scatter(
|
| 284 |
x=labels,
|
| 285 |
y=[clean_sim] * len(labels),
|
| 286 |
mode="lines+text",
|
| 287 |
-
line=dict(color="#
|
| 288 |
text=[""] * (len(labels) - 1) + ["Clean baseline"],
|
| 289 |
textposition="top right",
|
| 290 |
-
textfont=dict(size=10, color="#
|
| 291 |
hoverinfo="skip",
|
| 292 |
showlegend=False,
|
| 293 |
))
|
|
@@ -506,31 +526,126 @@ def update_results_bar(metric: str) -> go.Figure:
|
|
| 506 |
# ── UI constants ──────────────────────────────────────────────────────────────
|
| 507 |
|
| 508 |
CSS = """
|
| 509 |
-
#title { text-align: center; }
|
| 510 |
footer { display: none !important; }
|
| 511 |
-
.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 512 |
"""
|
| 513 |
|
| 514 |
INTRO_MD = """
|
| 515 |
-
<div
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
</div>
|
| 526 |
"""
|
| 527 |
|
| 528 |
GALLERY_INTRO_MD = """
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
The bar chart below shows the SIM drop under **all 5 protection methods** for the selected model.
|
| 534 |
"""
|
| 535 |
|
| 536 |
PROT_INTRO_MD = """
|
|
@@ -564,39 +679,39 @@ def build_demo():
|
|
| 564 |
with gr.Tab("🎧 Voice Cloning Gallery"):
|
| 565 |
gr.Markdown(GALLERY_INTRO_MD)
|
| 566 |
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
|
|
|
|
|
|
| 573 |
|
| 574 |
sim_note = gr.Markdown("", elem_classes="note-box")
|
| 575 |
|
| 576 |
with gr.Row():
|
| 577 |
-
with gr.Column():
|
| 578 |
-
gr.Markdown(
|
| 579 |
gr.Markdown(f"*\"{REF_TEXT}\"*")
|
| 580 |
ref_out = gr.Audio(label="Reference (original)", interactive=False)
|
| 581 |
-
with gr.Column():
|
| 582 |
-
gr.Markdown(
|
| 583 |
gr.Markdown(f"*\"{TARGET_TEXT}\"*")
|
| 584 |
target_out = gr.Audio(label="Target utterance", interactive=False)
|
| 585 |
|
| 586 |
-
gr.Markdown("-
|
| 587 |
-
gr.Markdown("### 3 · Cloning Results — Clean vs. SafeSpeech-Protected")
|
| 588 |
|
| 589 |
with gr.Row():
|
| 590 |
-
with gr.Column():
|
| 591 |
-
gr.Markdown("####
|
| 592 |
clean_out = gr.Audio(label="Clean clone", interactive=False)
|
| 593 |
-
with gr.Column():
|
| 594 |
-
gr.Markdown("####
|
| 595 |
prot_ref_out = gr.Audio(label="Protected reference", interactive=False)
|
| 596 |
prot_clone_out = gr.Audio(label="Clone from protected (degraded)", interactive=False)
|
| 597 |
|
| 598 |
-
gr.Markdown("-
|
| 599 |
-
gr.Markdown("### 4 · Protection Effectiveness Across All Methods")
|
| 600 |
sim_chart = gr.Plot(label="", show_label=False)
|
| 601 |
|
| 602 |
gallery_outputs = [ref_out, target_out, clean_out, prot_ref_out,
|
|
|
|
| 252 |
values = list(sims.values())
|
| 253 |
|
| 254 |
bar_colors = [
|
| 255 |
+
"#2563eb", # Clean
|
| 256 |
+
"#7c3aed", # SafeSpeech
|
| 257 |
+
"#059669", # Enkidu
|
| 258 |
+
"#ea580c", # Spectral
|
| 259 |
+
"#475569", # GR-Noise
|
| 260 |
+
"#be123c", # AntiFake
|
| 261 |
]
|
| 262 |
# annotate drop vs clean
|
| 263 |
clean_sim = sims["Clean"]
|
| 264 |
text = [f"{v:.3f}" if k == "Clean" else f"{v:.3f}<br>↓{clean_sim - v:.3f}"
|
| 265 |
for k, v in sims.items()]
|
| 266 |
+
hover_text = [
|
| 267 |
+
f"{label}<br>SIM: {value:.3f}<br>Drop from clean: {clean_sim - value:.3f}"
|
| 268 |
+
for label, value in zip(labels, values)
|
| 269 |
+
]
|
| 270 |
|
| 271 |
fig = go.Figure(go.Bar(
|
| 272 |
x=labels, y=values,
|
| 273 |
marker_color=bar_colors,
|
| 274 |
+
marker_line_color="rgba(15, 23, 42, 0.25)",
|
| 275 |
+
marker_line_width=1,
|
| 276 |
+
text=text,
|
| 277 |
+
textposition="outside",
|
| 278 |
+
hovertext=hover_text,
|
| 279 |
+
hoverinfo="text",
|
| 280 |
cliponaxis=False,
|
| 281 |
))
|
| 282 |
fig.update_layout(
|
| 283 |
+
title=dict(
|
| 284 |
+
text=f"<b>{model_name}</b> speaker similarity after protection",
|
| 285 |
+
font=dict(size=16, color="#0f172a"),
|
| 286 |
+
x=0.02,
|
| 287 |
+
),
|
| 288 |
+
yaxis=dict(
|
| 289 |
+
title="SIM",
|
| 290 |
+
range=[0, min(0.75, max(values) * 1.28)],
|
| 291 |
+
gridcolor="#e2e8f0",
|
| 292 |
+
zeroline=False,
|
| 293 |
+
),
|
| 294 |
+
xaxis=dict(title="", tickfont=dict(size=12)),
|
| 295 |
+
paper_bgcolor="white",
|
| 296 |
+
plot_bgcolor="#f8fafc",
|
| 297 |
+
margin=dict(t=62, b=42, l=48, r=24),
|
| 298 |
+
height=350,
|
| 299 |
showlegend=False,
|
| 300 |
+
bargap=0.28,
|
| 301 |
+
font=dict(color="#334155"),
|
| 302 |
)
|
| 303 |
fig.add_trace(go.Scatter(
|
| 304 |
x=labels,
|
| 305 |
y=[clean_sim] * len(labels),
|
| 306 |
mode="lines+text",
|
| 307 |
+
line=dict(color="#2563eb", dash="dot", width=1.5),
|
| 308 |
text=[""] * (len(labels) - 1) + ["Clean baseline"],
|
| 309 |
textposition="top right",
|
| 310 |
+
textfont=dict(size=10, color="#2563eb"),
|
| 311 |
hoverinfo="skip",
|
| 312 |
showlegend=False,
|
| 313 |
))
|
|
|
|
| 526 |
# ── UI constants ──────────────────────────────────────────────────────────────
|
| 527 |
|
| 528 |
CSS = """
|
|
|
|
| 529 |
footer { display: none !important; }
|
| 530 |
+
.gradio-container {
|
| 531 |
+
max-width: 1180px !important;
|
| 532 |
+
margin: 0 auto !important;
|
| 533 |
+
}
|
| 534 |
+
.hero {
|
| 535 |
+
padding: 28px 28px 22px;
|
| 536 |
+
border-radius: 12px;
|
| 537 |
+
background: linear-gradient(135deg, #0f172a 0%, #164e63 54%, #065f46 100%);
|
| 538 |
+
color: white;
|
| 539 |
+
margin-bottom: 18px;
|
| 540 |
+
}
|
| 541 |
+
.hero h1 {
|
| 542 |
+
margin: 0 0 8px;
|
| 543 |
+
font-size: 2.35rem;
|
| 544 |
+
line-height: 1.08;
|
| 545 |
+
letter-spacing: 0;
|
| 546 |
+
}
|
| 547 |
+
.hero p {
|
| 548 |
+
max-width: 760px;
|
| 549 |
+
margin: 0;
|
| 550 |
+
color: #dbeafe;
|
| 551 |
+
font-size: 1.05rem;
|
| 552 |
+
}
|
| 553 |
+
.hero a {
|
| 554 |
+
color: white !important;
|
| 555 |
+
}
|
| 556 |
+
.hero-links {
|
| 557 |
+
display: flex;
|
| 558 |
+
flex-wrap: wrap;
|
| 559 |
+
gap: 8px;
|
| 560 |
+
margin-top: 16px;
|
| 561 |
+
}
|
| 562 |
+
.hero-links a {
|
| 563 |
+
text-decoration: none;
|
| 564 |
+
}
|
| 565 |
+
.stat-strip {
|
| 566 |
+
display: grid;
|
| 567 |
+
grid-template-columns: repeat(4, minmax(0, 1fr));
|
| 568 |
+
gap: 10px;
|
| 569 |
+
margin: 14px 0 18px;
|
| 570 |
+
}
|
| 571 |
+
.stat-card {
|
| 572 |
+
border: 1px solid #d8dee9;
|
| 573 |
+
border-radius: 8px;
|
| 574 |
+
padding: 12px 14px;
|
| 575 |
+
background: #ffffff;
|
| 576 |
+
}
|
| 577 |
+
.stat-card b {
|
| 578 |
+
display: block;
|
| 579 |
+
font-size: 1.35rem;
|
| 580 |
+
color: #0f172a;
|
| 581 |
+
line-height: 1.1;
|
| 582 |
+
}
|
| 583 |
+
.stat-card span {
|
| 584 |
+
color: #475569;
|
| 585 |
+
font-size: 0.9rem;
|
| 586 |
+
}
|
| 587 |
+
.section-head {
|
| 588 |
+
margin: 18px 0 8px;
|
| 589 |
+
color: #0f172a;
|
| 590 |
+
}
|
| 591 |
+
.note-box {
|
| 592 |
+
font-size: 1.02em;
|
| 593 |
+
background: #eef6ff;
|
| 594 |
+
border: 1px solid #bfdbfe;
|
| 595 |
+
border-left: 4px solid #2563eb;
|
| 596 |
+
border-radius: 8px;
|
| 597 |
+
padding: 10px 12px;
|
| 598 |
+
}
|
| 599 |
+
.audio-panel {
|
| 600 |
+
border: 1px solid #e2e8f0;
|
| 601 |
+
border-radius: 8px;
|
| 602 |
+
padding: 12px;
|
| 603 |
+
background: #ffffff;
|
| 604 |
+
}
|
| 605 |
+
.audio-panel h3,
|
| 606 |
+
.audio-panel h4 {
|
| 607 |
+
margin-top: 0;
|
| 608 |
+
}
|
| 609 |
+
.workflow-copy {
|
| 610 |
+
color: #475569;
|
| 611 |
+
margin-bottom: 12px;
|
| 612 |
+
}
|
| 613 |
+
@media (max-width: 760px) {
|
| 614 |
+
.hero {
|
| 615 |
+
padding: 22px 18px 18px;
|
| 616 |
+
}
|
| 617 |
+
.hero h1 {
|
| 618 |
+
font-size: 1.75rem;
|
| 619 |
+
}
|
| 620 |
+
.stat-strip {
|
| 621 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 622 |
+
}
|
| 623 |
+
}
|
| 624 |
"""
|
| 625 |
|
| 626 |
INTRO_MD = """
|
| 627 |
+
<div class="hero">
|
| 628 |
+
<h1>RVCBench</h1>
|
| 629 |
+
<p>Voice cloning attacks and audio protection methods, compared through paired listening examples and speaker-similarity results.</p>
|
| 630 |
+
<div class="hero-links">
|
| 631 |
+
<a href="https://arxiv.org/abs/2602.00443"><img alt="Paper" src="https://img.shields.io/badge/arXiv-2602.00443-b31b1b.svg"></a>
|
| 632 |
+
<a href="https://huggingface.co/datasets/Nanboy/RVCBench"><img alt="Dataset" src="https://img.shields.io/badge/HuggingFace-Dataset-ffcc00.svg"></a>
|
| 633 |
+
<a href="https://github.com/Nanboy-Ronan/RVCBench"><img alt="GitHub" src="https://img.shields.io/badge/GitHub-RVCBench-181717.svg"></a>
|
| 634 |
+
</div>
|
| 635 |
+
</div>
|
| 636 |
|
| 637 |
+
<div class="stat-strip">
|
| 638 |
+
<div class="stat-card"><b>18</b><span>voice cloning models</span></div>
|
| 639 |
+
<div class="stat-card"><b>5</b><span>protection methods</span></div>
|
| 640 |
+
<div class="stat-card"><b>7</b><span>evaluation metrics</span></div>
|
| 641 |
+
<div class="stat-card"><b>10</b><span>speech datasets</span></div>
|
| 642 |
</div>
|
| 643 |
"""
|
| 644 |
|
| 645 |
GALLERY_INTRO_MD = """
|
| 646 |
+
<div class="workflow-copy">
|
| 647 |
+
Select a cloning model, compare clean and protected audio, then inspect how much each protection method lowers speaker similarity.
|
| 648 |
+
</div>
|
|
|
|
|
|
|
| 649 |
"""
|
| 650 |
|
| 651 |
PROT_INTRO_MD = """
|
|
|
|
| 679 |
with gr.Tab("🎧 Voice Cloning Gallery"):
|
| 680 |
gr.Markdown(GALLERY_INTRO_MD)
|
| 681 |
|
| 682 |
+
with gr.Row():
|
| 683 |
+
model_dd = gr.Dropdown(
|
| 684 |
+
choices=list(GALLERY_MODELS.keys()),
|
| 685 |
+
value="ZipVoice",
|
| 686 |
+
label="Voice Cloning Model",
|
| 687 |
+
scale=3,
|
| 688 |
+
)
|
| 689 |
+
load_btn = gr.Button("Load Example", variant="primary", scale=1)
|
| 690 |
|
| 691 |
sim_note = gr.Markdown("", elem_classes="note-box")
|
| 692 |
|
| 693 |
with gr.Row():
|
| 694 |
+
with gr.Column(elem_classes="audio-panel"):
|
| 695 |
+
gr.Markdown('<h3 class="section-head">1. Reference Voice</h3>')
|
| 696 |
gr.Markdown(f"*\"{REF_TEXT}\"*")
|
| 697 |
ref_out = gr.Audio(label="Reference (original)", interactive=False)
|
| 698 |
+
with gr.Column(elem_classes="audio-panel"):
|
| 699 |
+
gr.Markdown('<h3 class="section-head">2. Target Speech</h3>')
|
| 700 |
gr.Markdown(f"*\"{TARGET_TEXT}\"*")
|
| 701 |
target_out = gr.Audio(label="Target utterance", interactive=False)
|
| 702 |
|
| 703 |
+
gr.Markdown('<h3 class="section-head">3. Cloning Results</h3>')
|
|
|
|
| 704 |
|
| 705 |
with gr.Row():
|
| 706 |
+
with gr.Column(elem_classes="audio-panel"):
|
| 707 |
+
gr.Markdown("#### Clean Reference")
|
| 708 |
clean_out = gr.Audio(label="Clean clone", interactive=False)
|
| 709 |
+
with gr.Column(elem_classes="audio-panel"):
|
| 710 |
+
gr.Markdown("#### SafeSpeech-Protected Reference")
|
| 711 |
prot_ref_out = gr.Audio(label="Protected reference", interactive=False)
|
| 712 |
prot_clone_out = gr.Audio(label="Clone from protected (degraded)", interactive=False)
|
| 713 |
|
| 714 |
+
gr.Markdown('<h3 class="section-head">4. Protection Effectiveness Across Methods</h3>')
|
|
|
|
| 715 |
sim_chart = gr.Plot(label="", show_label=False)
|
| 716 |
|
| 717 |
gallery_outputs = [ref_out, target_out, clean_out, prot_ref_out,
|