|
|
import gradio as gr |
|
|
import json |
|
|
import os |
|
|
import pandas as pd |
|
|
import plotly.express as px |
|
|
import plotly.graph_objects as go |
|
|
from datetime import datetime |
|
|
from plotly.subplots import make_subplots |
|
|
|
|
|
|
|
|
def load_leaderboard_data(): |
|
|
try: |
|
|
with open('leaderboard.json', 'r') as f: |
|
|
return json.load(f) |
|
|
except: |
|
|
return [] |
|
|
|
|
|
|
|
|
def filter_data(data, model, metric): |
|
|
filtered = [] |
|
|
for item in data: |
|
|
if item.get('model') == model: |
|
|
if metric == "Attack-free": |
|
|
if item.get('normalizedUtility') is not None and item.get('detectionRate') is not None: |
|
|
filtered.append({ |
|
|
'name': item.get('name', ''), |
|
|
'model': item.get('model', ''), |
|
|
'normalizedUtility': item.get('normalizedUtility', 0), |
|
|
'detectionRate': item.get('detectionRate', 0) |
|
|
}) |
|
|
elif metric == "Watermark Removal": |
|
|
if (item.get('absoluteUtilityDegregation') is not None and |
|
|
item.get('removal_detectionRate') is not None): |
|
|
filtered.append({ |
|
|
'name': item.get('name', ''), |
|
|
'model': item.get('model', ''), |
|
|
'absoluteUtilityDegregation': item.get('absoluteUtilityDegregation', 0), |
|
|
'removal_detectionRate': item.get('removal_detectionRate', 0) |
|
|
}) |
|
|
elif metric == "Stealing Attack": |
|
|
if (item.get('adversaryBERTscore') is not None and |
|
|
item.get('adversaryDetectionRate') is not None): |
|
|
filtered.append({ |
|
|
'name': item.get('name', ''), |
|
|
'model': item.get('model', ''), |
|
|
'adversaryBERTscore': item.get('adversaryBERTscore', 0), |
|
|
'adversaryDetectionRate': item.get('adversaryDetectionRate', 0) |
|
|
}) |
|
|
|
|
|
|
|
|
if metric == "Attack-free": |
|
|
filtered.sort(key=lambda x: x['detectionRate'], reverse=True) |
|
|
elif metric == "Watermark Removal": |
|
|
filtered.sort(key=lambda x: x['removal_detectionRate'], reverse=True) |
|
|
else: |
|
|
filtered.sort(key=lambda x: x['adversaryDetectionRate'], reverse=True) |
|
|
|
|
|
return filtered |
|
|
|
|
|
|
|
|
def create_scatter_plot(data, metric): |
|
|
if not data: |
|
|
return go.Figure() |
|
|
|
|
|
|
|
|
x_data = [] |
|
|
y_data = [] |
|
|
names = [] |
|
|
|
|
|
for item in data: |
|
|
names.append(item['name']) |
|
|
if metric == "Attack-free": |
|
|
x_data.append(item['normalizedUtility']) |
|
|
y_data.append(item['detectionRate']) |
|
|
elif metric == "Watermark Removal": |
|
|
x_data.append(item['absoluteUtilityDegregation']) |
|
|
y_data.append(item['removal_detectionRate']) |
|
|
else: |
|
|
x_data.append(item['adversaryBERTscore']) |
|
|
y_data.append(item['adversaryDetectionRate']) |
|
|
|
|
|
|
|
|
fig = go.Figure() |
|
|
|
|
|
|
|
|
fig.add_trace(go.Scatter( |
|
|
x=x_data, |
|
|
y=y_data, |
|
|
mode='markers+text', |
|
|
marker=dict( |
|
|
size=12, |
|
|
color='#3B82F6', |
|
|
line=dict(width=2, color='white') |
|
|
), |
|
|
text=names, |
|
|
textposition='top center', |
|
|
textfont=dict(size=10, color='#374151'), |
|
|
hovertemplate='<b>%{text}</b><br>' + |
|
|
('Normalized Utility: %{x:.3f}<br>' if metric == "Attack-free" else |
|
|
'Abs Utility Degradation: %{x:.3f}<br' if metric == "Watermark Removal" else |
|
|
'Adversary BERT Score: %{x:.3f}<br>') + |
|
|
('Detection Rate: %{y:.3f}%<br>' if metric != "Stealing Attack" else |
|
|
'Adversary Detection Rate: %{y:.3f}%<br>') + |
|
|
'<extra></extra>' |
|
|
)) |
|
|
|
|
|
|
|
|
if metric == "Attack-free": |
|
|
x_title = "Normalized Utility" |
|
|
y_title = "Detection Rate (%)" |
|
|
elif metric == "Watermark Removal": |
|
|
x_title = "Absolute Utility Degradation" |
|
|
y_title = "Removal Detection Rate (%)" |
|
|
else: |
|
|
x_title = "Adversary BERT Score" |
|
|
y_title = "Adversary Detection Rate (%)" |
|
|
|
|
|
fig.update_layout( |
|
|
title=f"{metric} Performance Scatter Plot", |
|
|
xaxis_title=x_title, |
|
|
yaxis_title=y_title, |
|
|
font=dict(size=12, color='#374151'), |
|
|
plot_bgcolor='white', |
|
|
paper_bgcolor='white', |
|
|
xaxis=dict( |
|
|
gridcolor='lightgray', |
|
|
showgrid=True, |
|
|
zeroline=False |
|
|
), |
|
|
yaxis=dict( |
|
|
gridcolor='lightgray', |
|
|
showgrid=True, |
|
|
zeroline=False |
|
|
), |
|
|
margin=dict(l=60, r=60, t=80, b=60) |
|
|
) |
|
|
|
|
|
return fig |
|
|
|
|
|
|
|
|
def create_table_data(data, metric): |
|
|
if not data: |
|
|
return pd.DataFrame() |
|
|
|
|
|
table_data = [] |
|
|
for i, item in enumerate(data, 1): |
|
|
row = {'Rank': i, 'Watermark': item['name']} |
|
|
|
|
|
if metric == "Attack-free": |
|
|
row['Normalized Utility β'] = f"{item['normalizedUtility']:.3f}" |
|
|
row['Detection Rate (%) β'] = f"{item['detectionRate']:.3f}" |
|
|
elif metric == "Watermark Removal": |
|
|
row['Abs Utility Degradation β'] = f"{item['absoluteUtilityDegregation']:.3f}" |
|
|
row['Removal Detection Rate (%) β'] = f"{item['removal_detectionRate']:.3f}" |
|
|
else: |
|
|
row['Adversary BERT Score β'] = f"{item['adversaryBERTscore']:.3f}" |
|
|
row['Adversary Detection Rate (%) β'] = f"{item['adversaryDetectionRate']:.3f}" |
|
|
|
|
|
table_data.append(row) |
|
|
|
|
|
return pd.DataFrame(table_data) |
|
|
|
|
|
|
|
|
def create_table_data(data, metric): |
|
|
if not data: |
|
|
return pd.DataFrame() |
|
|
|
|
|
table_data = [] |
|
|
for i, item in enumerate(data, 1): |
|
|
watermark_name = item['name'] |
|
|
paper_link = item.get('paperLink') |
|
|
model = item.get('model', 'N/A') |
|
|
|
|
|
|
|
|
if paper_link: |
|
|
reference_link = f'<a href="{paper_link}" target="_blank" style="color: #3B82F6; text-decoration: underline; font-size: 0.8em;">π Paper</a>' |
|
|
else: |
|
|
reference_link = '-' |
|
|
|
|
|
row = { |
|
|
'Watermark': watermark_name |
|
|
} |
|
|
|
|
|
if metric == "Attack-free": |
|
|
row['Normalized Utility β'] = f"{item['normalizedUtility']:.3f}" |
|
|
row['Detection Rate (%) β'] = f"{item['detectionRate']:.3f}" |
|
|
elif metric == "Watermark Removal": |
|
|
row['Abs Utility Degradation β'] = f"{item['absoluteUtilityDegregation']:.3f}" |
|
|
row['Removal Detection Rate (%) β'] = f"{item['removal_detectionRate']:.3f}" |
|
|
else: |
|
|
row['Adversary BERT Score β'] = f"{item['adversaryBERTscore']:.3f}" |
|
|
row['Adversary Detection Rate (%) β'] = f"{item['adversaryDetectionRate']:.3f}" |
|
|
|
|
|
|
|
|
row['Reference'] = reference_link |
|
|
|
|
|
table_data.append(row) |
|
|
|
|
|
return pd.DataFrame(table_data) |
|
|
|
|
|
|
|
|
def update_interface(model, metric): |
|
|
data = load_leaderboard_data() |
|
|
filtered_data = filter_data(data, model, metric) |
|
|
|
|
|
|
|
|
scatter_plot = create_scatter_plot(filtered_data, metric) |
|
|
|
|
|
|
|
|
table_data = create_table_data(filtered_data, metric) |
|
|
|
|
|
return scatter_plot, table_data |
|
|
|
|
|
|
|
|
def submit_watermark_data(name, model, paper_link, normalized_utility, detection_rate, |
|
|
absolute_utility_degradation, removal_detection_rate, |
|
|
adversary_bert_score, adversary_detection_rate): |
|
|
"""Handle watermark data submission""" |
|
|
|
|
|
|
|
|
if not name or not name.strip(): |
|
|
return "β Error: Watermark name is required", gr.update() |
|
|
|
|
|
if not model: |
|
|
return "β Error: Model selection is required", gr.update() |
|
|
|
|
|
|
|
|
if paper_link and paper_link.strip(): |
|
|
paper_link = paper_link.strip() |
|
|
if not (paper_link.startswith('http://') or paper_link.startswith('https://')): |
|
|
return "β Error: Paper link must start with http:// or https://", gr.update() |
|
|
else: |
|
|
paper_link = None |
|
|
|
|
|
|
|
|
has_attack_free_data = normalized_utility is not None and detection_rate is not None |
|
|
has_removal_data = absolute_utility_degradation is not None and removal_detection_rate is not None |
|
|
has_stealing_data = adversary_bert_score is not None and adversary_detection_rate is not None |
|
|
|
|
|
|
|
|
if not has_attack_free_data and not has_removal_data and not has_stealing_data: |
|
|
return "β Error: Please provide at least one complete set of metrics:\nβ’ Attack-free: Normalized Utility + Detection Rate\nβ’ Watermark Removal: Absolute Utility Degradation + Removal Detection Rate\nβ’ Stealing Attack: Adversary BERT Score + Adversary Detection Rate", gr.update() |
|
|
|
|
|
|
|
|
if has_attack_free_data: |
|
|
if normalized_utility <= 0 or normalized_utility > 1.0: |
|
|
return "β Error: Normalized Utility must be between 0.000 and 1.000", gr.update() |
|
|
if detection_rate < 0.0 or detection_rate > 100.0: |
|
|
return "β Error: Detection Rate must be between 0.000 and 100.000", gr.update() |
|
|
|
|
|
|
|
|
if has_removal_data: |
|
|
if absolute_utility_degradation <= 0 or absolute_utility_degradation > 1.0: |
|
|
return "β Error: Absolute Utility Degradation must be between 0.000 and 1.000", gr.update() |
|
|
if removal_detection_rate < 0.0 or removal_detection_rate > 100.0: |
|
|
return "β Error: Removal Detection Rate must be between 0.000 and 100.000", gr.update() |
|
|
|
|
|
|
|
|
if has_stealing_data: |
|
|
if adversary_bert_score <= 0 or adversary_bert_score > 1.0: |
|
|
return "β Error: Adversary BERT Score must be between 0.000 and 1.000", gr.update() |
|
|
if adversary_detection_rate < 0.0 or adversary_detection_rate > 100.0: |
|
|
return "β Error: Adversary Detection Rate must be between 0.000 and 100.000", gr.update() |
|
|
|
|
|
|
|
|
has_partial_adversary = (adversary_bert_score is not None and adversary_bert_score > 0) or \ |
|
|
(adversary_detection_rate is not None and adversary_detection_rate > 0) |
|
|
|
|
|
if has_partial_adversary and not has_stealing_data: |
|
|
return "β Error: If you provide one adversary metric, you must provide both Adversary BERT Score and Adversary Detection Rate", gr.update() |
|
|
|
|
|
|
|
|
new_entry = { |
|
|
"name": name.strip(), |
|
|
"model": model, |
|
|
"normalizedUtility": normalized_utility, |
|
|
"detectionRate": detection_rate |
|
|
} |
|
|
|
|
|
|
|
|
if paper_link: |
|
|
new_entry["paperLink"] = paper_link |
|
|
|
|
|
|
|
|
if absolute_utility_degradation is not None: |
|
|
new_entry["absoluteUtilityDegregation"] = absolute_utility_degradation |
|
|
if removal_detection_rate is not None: |
|
|
new_entry["removal_detectionRate"] = removal_detection_rate |
|
|
if adversary_bert_score is not None: |
|
|
new_entry["adversaryBERTscore"] = adversary_bert_score |
|
|
if adversary_detection_rate is not None: |
|
|
new_entry["adversaryDetectionRate"] = adversary_detection_rate |
|
|
|
|
|
|
|
|
try: |
|
|
with open('leaderboard.json', 'r') as f: |
|
|
approved_data = json.load(f) |
|
|
except: |
|
|
approved_data = [] |
|
|
|
|
|
|
|
|
for entry in approved_data: |
|
|
if entry.get('name') == name.strip() and entry.get('model') == model: |
|
|
return f"β Error: A watermark named '{name.strip()}' already exists for {model}", gr.update() |
|
|
|
|
|
|
|
|
try: |
|
|
with open('pending_submissions.json', 'r') as f: |
|
|
pending_data = json.load(f) |
|
|
except: |
|
|
pending_data = [] |
|
|
|
|
|
|
|
|
for entry in pending_data: |
|
|
if entry.get('name') == name.strip() and entry.get('model') == model: |
|
|
return f"β Error: A watermark named '{name.strip()}' is already pending approval for {model}", gr.update() |
|
|
|
|
|
|
|
|
new_entry['submitted_at'] = datetime.now().isoformat() |
|
|
new_entry['status'] = 'pending' |
|
|
new_entry['submission_id'] = f"{name.strip()}_{model}_{int(datetime.now().timestamp())}" |
|
|
|
|
|
|
|
|
pending_data.append(new_entry) |
|
|
|
|
|
|
|
|
try: |
|
|
with open('pending_submissions.json', 'w') as f: |
|
|
json.dump(pending_data, f, indent=2) |
|
|
|
|
|
|
|
|
filtered_data = filter_data(approved_data, model, "Attack-free") |
|
|
scatter_plot = create_scatter_plot(filtered_data, "Attack-free") |
|
|
table_data = create_table_data(filtered_data, "Attack-free") |
|
|
|
|
|
success_msg = f"β
Successfully submitted '{name.strip()}' for {model} for approval! Your submission will be reviewed by the administrator before appearing on the leaderboard." |
|
|
return success_msg, scatter_plot, table_data |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error saving submission: {str(e)}", gr.update() |
|
|
|
|
|
|
|
|
def clear_form(): |
|
|
return (None, None, None, None, None, None, None, None, None) |
|
|
|
|
|
|
|
|
def load_pending_submissions(): |
|
|
"""Load pending submissions for owner review""" |
|
|
try: |
|
|
with open('pending_submissions.json', 'r') as f: |
|
|
pending_data = json.load(f) |
|
|
|
|
|
if not pending_data: |
|
|
return pd.DataFrame(columns=["ID", "Name", "Model", "Paper Link", "Attack-free Utility", "Attack-free Detection", |
|
|
"Removal Degradation", "Removal Detection", "Adversary BERT", "Adversary Detection", "Submitted At"]) |
|
|
|
|
|
|
|
|
formatted_data = [] |
|
|
for entry in pending_data: |
|
|
watermark_name = entry.get('name', 'N/A') |
|
|
paper_link = entry.get('paperLink', '-') |
|
|
model = entry.get('model', 'N/A') |
|
|
|
|
|
|
|
|
formatted_entry = { |
|
|
"ID": entry.get('submission_id', 'N/A'), |
|
|
"Name": watermark_name, |
|
|
"Model": model, |
|
|
"Paper Link": paper_link if paper_link != '-' else '-', |
|
|
"Attack-free Utility": f"{entry.get('normalizedUtility', 0):.3f}" if entry.get('normalizedUtility') is not None else '-', |
|
|
"Attack-free Detection": f"{entry.get('detectionRate', 0):.3f}" if entry.get('detectionRate') is not None else '-', |
|
|
"Removal Degradation": f"{entry.get('absoluteUtilityDegregation', 0):.3f}" if entry.get('absoluteUtilityDegregation') is not None else '-', |
|
|
"Removal Detection": f"{entry.get('removal_detectionRate', 0):.3f}" if entry.get('removal_detectionRate') is not None else '-', |
|
|
"Adversary BERT": f"{entry.get('adversaryBERTscore', 0):.3f}" if entry.get('adversaryBERTscore') is not None else '-', |
|
|
"Adversary Detection": f"{entry.get('adversaryDetectionRate', 0):.3f}" if entry.get('adversaryDetectionRate') is not None else '-', |
|
|
"Submitted At": entry.get('submitted_at', 'N/A')[:19] if entry.get('submitted_at') else 'N/A', |
|
|
} |
|
|
formatted_data.append(formatted_entry) |
|
|
|
|
|
return pd.DataFrame(formatted_data) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error loading pending submissions: {e}") |
|
|
return pd.DataFrame(columns=["ID", "Name", "Model", "Paper Link", "Attack-free Utility", "Attack-free Detection", |
|
|
"Removal Degradation", "Removal Detection", "Adversary BERT", "Adversary Detection", "Submitted At"]) |
|
|
|
|
|
def approve_submission(submission_id, admin_password): |
|
|
"""Approve a pending submission""" |
|
|
|
|
|
if admin_password != "admin123": |
|
|
return "β Access denied: Invalid admin password", gr.update() |
|
|
|
|
|
try: |
|
|
|
|
|
try: |
|
|
with open('pending_submissions.json', 'r') as f: |
|
|
pending_data = json.load(f) |
|
|
except: |
|
|
pending_data = [] |
|
|
|
|
|
|
|
|
approved_entry = None |
|
|
for i, entry in enumerate(pending_data): |
|
|
if entry.get('submission_id') == submission_id: |
|
|
approved_entry = pending_data.pop(i) |
|
|
break |
|
|
|
|
|
if not approved_entry: |
|
|
return "β Submission not found", gr.update() |
|
|
|
|
|
|
|
|
approved_entry.pop('submitted_at', None) |
|
|
approved_entry.pop('status', None) |
|
|
approved_entry.pop('submission_id', None) |
|
|
|
|
|
|
|
|
try: |
|
|
with open('leaderboard.json', 'r') as f: |
|
|
approved_data = json.load(f) |
|
|
except: |
|
|
approved_data = [] |
|
|
|
|
|
|
|
|
approved_data.append(approved_entry) |
|
|
|
|
|
|
|
|
with open('leaderboard.json', 'w') as f: |
|
|
json.dump(approved_data, f, indent=2) |
|
|
|
|
|
|
|
|
with open('pending_submissions.json', 'w') as f: |
|
|
json.dump(pending_data, f, indent=2) |
|
|
|
|
|
return f"β
Approved submission: {approved_entry.get('name', 'Unknown')}", load_pending_submissions() |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error approving submission: {str(e)}", gr.update() |
|
|
|
|
|
def reject_submission(submission_id, admin_password): |
|
|
"""Reject a pending submission""" |
|
|
|
|
|
if admin_password != "admin123": |
|
|
return "β Access denied: Invalid admin password", gr.update() |
|
|
|
|
|
try: |
|
|
|
|
|
try: |
|
|
with open('pending_submissions.json', 'r') as f: |
|
|
pending_data = json.load(f) |
|
|
except: |
|
|
pending_data = [] |
|
|
|
|
|
|
|
|
rejected_entry = None |
|
|
for i, entry in enumerate(pending_data): |
|
|
if entry.get('submission_id') == submission_id: |
|
|
rejected_entry = pending_data.pop(i) |
|
|
break |
|
|
|
|
|
if not rejected_entry: |
|
|
return "β Submission not found", gr.update() |
|
|
|
|
|
|
|
|
with open('pending_submissions.json', 'w') as f: |
|
|
json.dump(pending_data, f, indent=2) |
|
|
|
|
|
return f"β Rejected submission: {rejected_entry.get('name', 'Unknown')}", load_pending_submissions() |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error rejecting submission: {str(e)}", gr.update() |
|
|
|
|
|
|
|
|
def toggle_add_data_section(section): |
|
|
return gr.update(visible=not section.visible) |
|
|
|
|
|
|
|
|
def create_interface(): |
|
|
|
|
|
css = """ |
|
|
.gradio-container { |
|
|
max-width: 1200px !important; |
|
|
margin: 0 auto !important; |
|
|
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); |
|
|
min-height: 100vh; |
|
|
} |
|
|
.title { |
|
|
text-align: center; |
|
|
margin: 20px 0; |
|
|
font-size: 3rem; |
|
|
font-weight: bold; |
|
|
background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); |
|
|
-webkit-background-clip: text; |
|
|
-webkit-text-fill-color: transparent; |
|
|
background-clip: text; |
|
|
text-shadow: 2px 2px 4px rgba(0,0,0,0.1); |
|
|
} |
|
|
.subtitle { |
|
|
text-align: center; |
|
|
margin-bottom: 30px; |
|
|
font-size: 1.3rem; |
|
|
color: #4a5568; |
|
|
font-weight: 500; |
|
|
} |
|
|
.controls { |
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
|
padding: 30px; |
|
|
border-radius: 15px; |
|
|
margin-bottom: 25px; |
|
|
box-shadow: 0 8px 32px rgba(0,0,0,0.1); |
|
|
border: 1px solid rgba(255,255,255,0.2); |
|
|
} |
|
|
.controls label { |
|
|
color: white !important; |
|
|
font-weight: bold !important; |
|
|
font-size: 1.2rem !important; |
|
|
} |
|
|
.controls .gr-radio { |
|
|
background: rgba(255,255,255,0.1) !important; |
|
|
border-radius: 10px !important; |
|
|
padding: 12px !important; |
|
|
} |
|
|
.controls .gr-radio label { |
|
|
color: white !important; |
|
|
font-size: 1.1rem !important; |
|
|
} |
|
|
.controls h3 { |
|
|
font-size: 1.4rem !important; |
|
|
margin-bottom: 15px !important; |
|
|
} |
|
|
#highlighted-add-data { |
|
|
background: linear-gradient(135deg, #E0F2FE 0%, #B3E5FC 100%) !important; |
|
|
border: 2px solid #81D4FA !important; |
|
|
border-radius: 15px !important; |
|
|
box-shadow: 0 10px 40px rgba(129, 212, 250, 0.3) !important; |
|
|
margin: 20px 0 !important; |
|
|
} |
|
|
#highlighted-add-data .gr-accordion-header { |
|
|
background: linear-gradient(135deg, #81D4FA 0%, #4FC3F7 100%) !important; |
|
|
color: white !important; |
|
|
font-weight: bold !important; |
|
|
font-size: 1.2rem !important; |
|
|
padding: 15px 20px !important; |
|
|
border-radius: 15px 15px 0 0 !important; |
|
|
} |
|
|
#highlighted-add-data .gr-accordion-content { |
|
|
background: rgba(255,255,255,0.95) !important; |
|
|
border-radius: 0 0 15px 15px !important; |
|
|
padding: 25px !important; |
|
|
} |
|
|
.gr-button { |
|
|
border-radius: 10px !important; |
|
|
font-weight: bold !important; |
|
|
transition: all 0.3s ease !important; |
|
|
} |
|
|
.gr-button:hover { |
|
|
transform: translateY(-2px) !important; |
|
|
box-shadow: 0 5px 15px rgba(0,0,0,0.2) !important; |
|
|
} |
|
|
.gr-plot { |
|
|
border-radius: 15px !important; |
|
|
box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important; |
|
|
background: white !important; |
|
|
padding: 20px !important; |
|
|
} |
|
|
.gr-dataframe { |
|
|
border-radius: 15px !important; |
|
|
box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important; |
|
|
background: white !important; |
|
|
overflow: hidden !important; |
|
|
} |
|
|
.gr-accordion { |
|
|
border-radius: 15px !important; |
|
|
box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important; |
|
|
background: white !important; |
|
|
margin: 15px 0 !important; |
|
|
} |
|
|
.gr-accordion-header { |
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; |
|
|
color: white !important; |
|
|
font-weight: bold !important; |
|
|
padding: 15px 20px !important; |
|
|
border-radius: 15px 15px 0 0 !important; |
|
|
} |
|
|
.gr-accordion-content { |
|
|
background: rgba(255,255,255,0.95) !important; |
|
|
border-radius: 0 0 15px 15px !important; |
|
|
padding: 20px !important; |
|
|
} |
|
|
#submit-btn { |
|
|
background: linear-gradient(135deg, #29B6F6 0%, #0288D1 100%) !important; |
|
|
border: 2px solid #0277BD !important; |
|
|
color: white !important; |
|
|
font-weight: bold !important; |
|
|
font-size: 1.1rem !important; |
|
|
padding: 15px 30px !important; |
|
|
border-radius: 12px !important; |
|
|
box-shadow: 0 8px 25px rgba(41, 182, 246, 0.4) !important; |
|
|
transition: all 0.3s ease !important; |
|
|
} |
|
|
#submit-btn:hover { |
|
|
background: linear-gradient(135deg, #0288D1 0%, #0277BD 100%) !important; |
|
|
transform: translateY(-3px) !important; |
|
|
box-shadow: 0 12px 35px rgba(41, 182, 246, 0.6) !important; |
|
|
} |
|
|
#owner-controls { |
|
|
background: linear-gradient(135deg, #FFE0E0 0%, #FFCDD2 100%) !important; |
|
|
border: 2px solid #FF5722 !important; |
|
|
border-radius: 15px !important; |
|
|
box-shadow: 0 10px 40px rgba(255, 87, 34, 0.3) !important; |
|
|
margin: 20px 0 !important; |
|
|
} |
|
|
#owner-controls .gr-accordion-header { |
|
|
background: linear-gradient(135deg, #FF5722 0%, #D32F2F 100%) !important; |
|
|
color: white !important; |
|
|
font-weight: bold !important; |
|
|
font-size: 1.2rem !important; |
|
|
padding: 15px 20px !important; |
|
|
border-radius: 15px 15px 0 0 !important; |
|
|
} |
|
|
#owner-controls .gr-accordion-content { |
|
|
background: rgba(255,255,255,0.95) !important; |
|
|
border-radius: 0 0 15px 15px !important; |
|
|
padding: 25px !important; |
|
|
} |
|
|
#approve-btn { |
|
|
background: linear-gradient(135deg, #4CAF50 0%, #2E7D32 100%) !important; |
|
|
border: 2px solid #388E3C !important; |
|
|
color: white !important; |
|
|
font-weight: bold !important; |
|
|
font-size: 1.1rem !important; |
|
|
padding: 15px 30px !important; |
|
|
border-radius: 12px !important; |
|
|
box-shadow: 0 8px 25px rgba(76, 175, 80, 0.4) !important; |
|
|
transition: all 0.3s ease !important; |
|
|
} |
|
|
#approve-btn:hover { |
|
|
background: linear-gradient(135deg, #2E7D32 0%, #1B5E20 100%) !important; |
|
|
transform: translateY(-3px) !important; |
|
|
box-shadow: 0 12px 35px rgba(76, 175, 80, 0.6) !important; |
|
|
} |
|
|
#reject-btn { |
|
|
background: linear-gradient(135deg, #F44336 0%, #C62828 100%) !important; |
|
|
border: 2px solid #D32F2F !important; |
|
|
color: white !important; |
|
|
font-weight: bold !important; |
|
|
font-size: 1.1rem !important; |
|
|
padding: 15px 30px !important; |
|
|
border-radius: 12px !important; |
|
|
box-shadow: 0 8px 25px rgba(244, 67, 54, 0.4) !important; |
|
|
transition: all 0.3s ease !important; |
|
|
} |
|
|
#reject-btn:hover { |
|
|
background: linear-gradient(135deg, #C62828 0%, #B71C1C 100%) !important; |
|
|
transform: translateY(-3px) !important; |
|
|
box-shadow: 0 12px 35px rgba(244, 67, 54, 0.6) !important; |
|
|
} |
|
|
#guideline-section { |
|
|
background: linear-gradient(135deg, #E8F5E8 0%, #C8E6C9 100%) !important; |
|
|
border: 2px solid #4CAF50 !important; |
|
|
border-radius: 15px !important; |
|
|
box-shadow: 0 10px 40px rgba(76, 175, 80, 0.3) !important; |
|
|
margin: 20px 0 !important; |
|
|
} |
|
|
#guideline-section .gr-accordion-header { |
|
|
background: linear-gradient(135deg, #4CAF50 0%, #2E7D32 100%) !important; |
|
|
color: white !important; |
|
|
font-weight: bold !important; |
|
|
font-size: 1.2rem !important; |
|
|
padding: 15px 20px !important; |
|
|
border-radius: 15px 15px 0 0 !important; |
|
|
} |
|
|
#guideline-section .gr-accordion-content { |
|
|
background: rgba(255,255,255,0.95) !important; |
|
|
border-radius: 0 0 15px 15px !important; |
|
|
padding: 25px !important; |
|
|
} |
|
|
""" |
|
|
|
|
|
with gr.Blocks(css=css, title="Watermark Leaderboard for LLMs") as demo: |
|
|
|
|
|
gr.HTML(""" |
|
|
<div class="title"> |
|
|
π Watermark Leaderboard for LLMs π |
|
|
</div> |
|
|
<div class="subtitle"> |
|
|
π Interactive leaderboard for comparing watermark performance across different models and evaluation settings |
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #667eea; margin: 0; font-weight: bold;'>π€ Model Selection</h3></div>") |
|
|
model_selector = gr.Radio( |
|
|
choices=["LLaMA3", "DeepSeek"], |
|
|
value="LLaMA3", |
|
|
label="Model", |
|
|
info="Select the model to display" |
|
|
) |
|
|
with gr.Column(scale=1): |
|
|
gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #667eea; margin: 0; font-weight: bold;'>βοΈ Evaluation Setting</h3></div>") |
|
|
metric_selector = gr.Radio( |
|
|
choices=["Attack-free", "Watermark Removal", "Stealing Attack"], |
|
|
value="Attack-free", |
|
|
label="Setting", |
|
|
info="Select the evaluation setting" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Accordion("π Add Your Data to the Leaderboard", open=False, elem_id="highlighted-add-data"): |
|
|
gr.HTML(""" |
|
|
<div style='text-align: center; margin-bottom: 20px;'> |
|
|
<h2 style='color: #0277BD; margin: 0; font-size: 1.5rem;'>π Submit Your Watermark Performance Results</h2> |
|
|
<p style='color: #374151; margin: 10px 0 0 0;'>Contribute to the community by sharing your watermark evaluation results</p> |
|
|
</div> |
|
|
<div style='background: #E3F2FD; border: 1px solid #2196F3; border-radius: 8px; padding: 15px; margin-bottom: 20px;'> |
|
|
<h4 style='color: #1976D2; margin: 0 0 10px 0;'>π Submission Requirements</h4> |
|
|
<p style='color: #374151; margin: 0 0 8px 0;'>Provide at least one complete set of metrics:</p> |
|
|
<ul style='color: #374151; margin: 0; padding-left: 20px;'> |
|
|
<li><strong>Attack-free:</strong> Normalized Utility + Detection Rate</li> |
|
|
<li><strong>Watermark Removal:</strong> Absolute Utility Degradation + Removal Detection Rate</li> |
|
|
<li><strong>Stealing Attack:</strong> Adversary BERT Score + Adversary Detection Rate</li> |
|
|
</ul> |
|
|
</div> |
|
|
""") |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
|
|
|
gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #0277BD; margin: 0;'>π Basic Information</h3></div>") |
|
|
watermark_name = gr.Textbox( |
|
|
label="Watermark Name", |
|
|
placeholder="e.g., MyWatermark, Watermark-X", |
|
|
info="Unique identifier for your watermark" |
|
|
) |
|
|
paper_link = gr.Textbox( |
|
|
label="Paper Link (Optional)", |
|
|
placeholder="https://arxiv.org/abs/xxxx.xxxxx or https://...", |
|
|
info="Link to the paper describing this watermark method" |
|
|
) |
|
|
submission_model = gr.Radio( |
|
|
choices=["LLaMA3", "DeepSeek"], |
|
|
label="Model", |
|
|
value="LLaMA3", |
|
|
info="Select the model used" |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
|
|
|
gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #0277BD; margin: 0;'>β‘ Attack-free Metrics (Optional - Both Required if One is Provided)</h3></div>") |
|
|
normalized_utility = gr.Number( |
|
|
label="Normalized Utility", |
|
|
value=None, |
|
|
minimum=0.0, |
|
|
maximum=1.0, |
|
|
step=0.001, |
|
|
info="Text quality metric (0.000 - 1.000)" |
|
|
) |
|
|
detection_rate = gr.Number( |
|
|
label="Detection Rate (%)", |
|
|
value=None, |
|
|
minimum=0.0, |
|
|
maximum=100.0, |
|
|
step=0.001, |
|
|
info="Watermark detection accuracy (0.000 - 100.000%)" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
|
|
|
gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #0277BD; margin: 0;'>π‘οΈ Watermark Removal (Optional)</h3></div>") |
|
|
absolute_utility_degradation = gr.Number( |
|
|
label="Absolute Utility Degradation", |
|
|
value=None, |
|
|
minimum=0.0, |
|
|
maximum=1.0, |
|
|
step=0.001, |
|
|
info="Resistance to removal attacks (0.000 - 1.000)" |
|
|
) |
|
|
removal_detection_rate = gr.Number( |
|
|
label="Removal Detection Rate (%)", |
|
|
value=None, |
|
|
minimum=0.0, |
|
|
maximum=100.0, |
|
|
step=0.001, |
|
|
info="Detection rate under removal attacks (0.000 - 100.000%)" |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
|
|
|
gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #0277BD; margin: 0;'>π― Stealing Attack (Optional)</h3></div>") |
|
|
adversary_bert_score = gr.Number( |
|
|
label="Adversary BERT Score", |
|
|
value=None, |
|
|
minimum=0.0, |
|
|
maximum=1.0, |
|
|
step=0.001, |
|
|
info="Performance under adversarial conditions (0.000 - 1.000)" |
|
|
) |
|
|
adversary_detection_rate = gr.Number( |
|
|
label="Adversary Detection Rate (%)", |
|
|
value=None, |
|
|
minimum=0.0, |
|
|
maximum=100.0, |
|
|
step=0.001, |
|
|
info="Detection rate under adversarial attacks (0.000 - 100.000%)" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
submit_btn = gr.Button( |
|
|
"π Submit Data to Leaderboard", |
|
|
variant="primary", |
|
|
size="lg", |
|
|
elem_id="submit-btn" |
|
|
) |
|
|
with gr.Column(scale=1): |
|
|
clear_btn = gr.Button( |
|
|
"ποΈ Clear Form", |
|
|
variant="secondary", |
|
|
size="lg" |
|
|
) |
|
|
|
|
|
|
|
|
status_message = gr.Markdown("", visible=True) |
|
|
|
|
|
|
|
|
|
|
|
scatter_plot = gr.Plot( |
|
|
label="Performance Scatter Plot", |
|
|
show_label=True |
|
|
) |
|
|
|
|
|
|
|
|
table = gr.DataFrame( |
|
|
label="Performance Table", |
|
|
show_label=True, |
|
|
interactive=False, |
|
|
wrap=True |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Accordion("π Guideline for Submitting Watermark Performance Results", open=False, elem_id="guideline-section"): |
|
|
gr.HTML(""" |
|
|
<div style="padding: 20px;"> |
|
|
<h3>Guideline for Submitting Watermark Performance Results</h3> |
|
|
<h4>1. Datasets</h4> |
|
|
<ul> |
|
|
<li><strong>Text Generation (C4 dataset)</strong> |
|
|
<ul> |
|
|
<li>Training: first 20,000 samples</li> |
|
|
<li>Testing: 13,860 samples</li> |
|
|
<li>Reference script: <code>Files/Reproducibility/C4_dataset_download.py</code></li> |
|
|
</ul> |
|
|
</li> |
|
|
<li><strong>Text Summarization (CNN/Daily Mail dataset)</strong> |
|
|
<ul> |
|
|
<li>Training: first 10,000β20,000 samples</li> |
|
|
<li>Testing: 1,000 samples</li> |
|
|
<li>Reference script: <code>Files/Reproducibility/CNN_dataset_download.py</code></li> |
|
|
</ul> |
|
|
</li> |
|
|
</ul> |
|
|
<h4>2. Models</h4> |
|
|
<ul> |
|
|
<li>Use open-source models available on Hugging Face: |
|
|
<ul> |
|
|
<li>DeepSeek: "deepseek-ai/deepseek-llm-7b-base"</li> |
|
|
<li>LLaMA-3: "meta-llama/Meta-Llama-3-8B"</li> |
|
|
</ul> |
|
|
</li> |
|
|
</ul> |
|
|
<h4>3. Evaluation Settings</h4> |
|
|
<ul> |
|
|
<li><strong>(a) Attack-Free Setting</strong> |
|
|
<ul> |
|
|
<li>Generate 13,860 watermarked outputs on the C4 test set.</li> |
|
|
<li>Report: Detection Rate and Normalized Utility (see Metrics).</li> |
|
|
</ul> |
|
|
</li> |
|
|
<li><strong>(b) Watermark Removal Setting</strong> |
|
|
<ul> |
|
|
<li>Apply Dipper to paraphrase watermarked outputs.</li> |
|
|
<li>Report: |
|
|
<ul> |
|
|
<li>Detection Rate after attack</li> |
|
|
<li>Normalized Utility after attack</li> |
|
|
<li>Absolute Utility Degradation (difference before vs. after attack)</li> |
|
|
</ul> |
|
|
</li> |
|
|
<li>Reference scripts: <code>Files/Reproducibility/Attack_dipper.py</code></li> |
|
|
</ul> |
|
|
</li> |
|
|
<li><strong>(c) Stealing Attack Setting</strong> |
|
|
<ul> |
|
|
<li>Generate 20,000 watermarked samples for training a surrogate model using LoRA.</li> |
|
|
<li>Use the surrogate model for summarization on 1,000 test samples.</li> |
|
|
<li>Report: Detection Rate and Normalized Utility on the surrogate's outputs.</li> |
|
|
<li>Reference scripts: <code>Files/Reproducibility/Finetune_sum.py</code>, <code>Files/Reproducibility/Inference_sum.py</code></li> |
|
|
</ul> |
|
|
</li> |
|
|
</ul> |
|
|
<h4>4. Metrics</h4> |
|
|
<ul> |
|
|
<li><strong>Detection Rate</strong> |
|
|
<ul> |
|
|
<li>Average accuracy across the test set (e.g., 13,860 examples for text generation).</li> |
|
|
<li>Use your own detector implementation.</li> |
|
|
</ul> |
|
|
</li> |
|
|
<li><strong>Normalized Utility</strong> |
|
|
<ul> |
|
|
<li>Defined as the mean of:</li> |
|
|
<li>BERTScore (<code>Files/Reproducibility/BERT_score.py</code>)</li> |
|
|
<li>Entity Similarity Score (<code>Files/Reproducibility/Entity_similarity_score.py</code>)</li> |
|
|
</ul> |
|
|
</li> |
|
|
<li><strong>Absolute Utility Degradation</strong> |
|
|
<ul> |
|
|
<li>The absolute change in Normalized Utility between attack-free and attacked outputs.</li> |
|
|
</ul> |
|
|
</li> |
|
|
</ul> |
|
|
<h4>5. Submission</h4> |
|
|
<ul> |
|
|
<li>You may submit results for one or more evaluation settings (Attack-Free, Removal, Stealing).</li> |
|
|
<li>Please include: |
|
|
<ul> |
|
|
<li>Model(s) evaluated</li> |
|
|
<li>Dataset(s) used</li> |
|
|
<li>Scripts/configuration details if modified</li> |
|
|
<li>Reported metrics in the required format</li> |
|
|
</ul> |
|
|
</li> |
|
|
</ul> |
|
|
<p><strong>Reproducibility codes are available in the Files tab of this Space.</strong></p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
with gr.Accordion("π Owner Controls - Pending Submissions", open=False, elem_id="owner-controls"): |
|
|
gr.HTML(""" |
|
|
<div style='text-align: center; margin-bottom: 20px;'> |
|
|
<h2 style='color: #D32F2F; margin: 0; font-size: 1.5rem;'>π‘οΈ Administrator Approval Panel</h2> |
|
|
<p style='color: #374151; margin: 10px 0 0 0;'>Review and approve pending submissions before they appear on the leaderboard</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
pending_table = gr.DataFrame( |
|
|
label="π Pending Submissions", |
|
|
show_label=True, |
|
|
interactive=False, |
|
|
wrap=True, |
|
|
headers=["ID", "Name", "Model", "Paper Link", "Attack-free Utility", "Attack-free Detection", |
|
|
"Removal Degradation", "Removal Detection", "Adversary BERT", "Adversary Detection", "Submitted At"] |
|
|
) |
|
|
|
|
|
|
|
|
admin_password_input = gr.Textbox( |
|
|
label="π Admin Password", |
|
|
placeholder="Enter admin password to access controls", |
|
|
type="password", |
|
|
info="Required for approval/rejection actions" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
submission_id_input = gr.Textbox( |
|
|
label="Submission ID", |
|
|
placeholder="Enter submission ID to approve/reject", |
|
|
info="Copy from the pending submissions table" |
|
|
) |
|
|
approve_btn = gr.Button( |
|
|
"β
Approve Submission", |
|
|
variant="primary", |
|
|
size="lg", |
|
|
elem_id="approve-btn" |
|
|
) |
|
|
with gr.Column(scale=1): |
|
|
reject_btn = gr.Button( |
|
|
"β Reject Submission", |
|
|
variant="stop", |
|
|
size="lg", |
|
|
elem_id="reject-btn" |
|
|
) |
|
|
refresh_pending_btn = gr.Button( |
|
|
"π Refresh Pending", |
|
|
variant="secondary", |
|
|
size="lg" |
|
|
) |
|
|
|
|
|
approval_status = gr.Markdown("", visible=True) |
|
|
|
|
|
|
|
|
model_selector.change( |
|
|
fn=update_interface, |
|
|
inputs=[model_selector, metric_selector], |
|
|
outputs=[scatter_plot, table] |
|
|
) |
|
|
|
|
|
metric_selector.change( |
|
|
fn=update_interface, |
|
|
inputs=[model_selector, metric_selector], |
|
|
outputs=[scatter_plot, table] |
|
|
) |
|
|
|
|
|
|
|
|
submit_btn.click( |
|
|
fn=submit_watermark_data, |
|
|
inputs=[ |
|
|
watermark_name, |
|
|
submission_model, |
|
|
paper_link, |
|
|
normalized_utility, |
|
|
detection_rate, |
|
|
absolute_utility_degradation, |
|
|
removal_detection_rate, |
|
|
adversary_bert_score, |
|
|
adversary_detection_rate |
|
|
], |
|
|
outputs=[status_message, scatter_plot, table] |
|
|
) |
|
|
|
|
|
|
|
|
clear_btn.click( |
|
|
fn=clear_form, |
|
|
outputs=[ |
|
|
watermark_name, |
|
|
paper_link, |
|
|
submission_model, |
|
|
normalized_utility, |
|
|
detection_rate, |
|
|
absolute_utility_degradation, |
|
|
removal_detection_rate, |
|
|
adversary_bert_score, |
|
|
adversary_detection_rate |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
approve_btn.click( |
|
|
fn=approve_submission, |
|
|
inputs=[submission_id_input, admin_password_input], |
|
|
outputs=[approval_status, pending_table] |
|
|
) |
|
|
|
|
|
reject_btn.click( |
|
|
fn=reject_submission, |
|
|
inputs=[submission_id_input, admin_password_input], |
|
|
outputs=[approval_status, pending_table] |
|
|
) |
|
|
|
|
|
refresh_pending_btn.click( |
|
|
fn=load_pending_submissions, |
|
|
outputs=[pending_table] |
|
|
) |
|
|
|
|
|
|
|
|
demo.load( |
|
|
fn=lambda: update_interface("LLaMA3", "Attack-free"), |
|
|
outputs=[scatter_plot, table] |
|
|
) |
|
|
|
|
|
|
|
|
demo.load( |
|
|
fn=load_pending_submissions, |
|
|
outputs=[pending_table] |
|
|
) |
|
|
|
|
|
|
|
|
def clear_admin_password(): |
|
|
return gr.update(value="") |
|
|
|
|
|
|
|
|
approve_btn.click( |
|
|
fn=clear_admin_password, |
|
|
outputs=[admin_password_input] |
|
|
) |
|
|
|
|
|
reject_btn.click( |
|
|
fn=clear_admin_password, |
|
|
outputs=[admin_password_input] |
|
|
) |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo = create_interface() |
|
|
demo.launch() |