Spaces:

AnonymousResearch
/

WatermarkLeaderboard

Sleeping

App Files Files Community

WatermarkLeaderboard / app.py

kirudang

Copy files from original watermark leaderboard

40b3335 about 1 month ago

raw

history blame

46.7 kB

	import gradio as gr
	import json
	import os
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from datetime import datetime
	from plotly.subplots import make_subplots

	# Load leaderboard data
	def load_leaderboard_data():
	try:
	with open('leaderboard.json', 'r') as f:
	return json.load(f)
	except:
	return []

	# Filter data based on model and metric
	def filter_data(data, model, metric):
	filtered = []
	for item in data:
	if item.get('model') == model:
	if metric == "Attack-free":
	if item.get('normalizedUtility') is not None and item.get('detectionRate') is not None:
	filtered.append({
	'name': item.get('name', ''),
	'model': item.get('model', ''),
	'normalizedUtility': item.get('normalizedUtility', 0),
	'detectionRate': item.get('detectionRate', 0)
	})
	elif metric == "Watermark Removal":
	if (item.get('absoluteUtilityDegregation') is not None and
	item.get('removal_detectionRate') is not None):
	filtered.append({
	'name': item.get('name', ''),
	'model': item.get('model', ''),
	'absoluteUtilityDegregation': item.get('absoluteUtilityDegregation', 0),
	'removal_detectionRate': item.get('removal_detectionRate', 0)
	})
	elif metric == "Stealing Attack":
	if (item.get('adversaryBERTscore') is not None and
	item.get('adversaryDetectionRate') is not None):
	filtered.append({
	'name': item.get('name', ''),
	'model': item.get('model', ''),
	'adversaryBERTscore': item.get('adversaryBERTscore', 0),
	'adversaryDetectionRate': item.get('adversaryDetectionRate', 0)
	})

	# Sort by detection rate (descending)
	if metric == "Attack-free":
	filtered.sort(key=lambda x: x['detectionRate'], reverse=True)
	elif metric == "Watermark Removal":
	filtered.sort(key=lambda x: x['removal_detectionRate'], reverse=True)
	else: # Stealing Attack
	filtered.sort(key=lambda x: x['adversaryDetectionRate'], reverse=True)

	return filtered

	# Create scatter plot
	def create_scatter_plot(data, metric):
	if not data:
	return go.Figure()

	# Prepare data for plotting
	x_data = []
	y_data = []
	names = []

	for item in data:
	names.append(item['name'])
	if metric == "Attack-free":
	x_data.append(item['normalizedUtility'])
	y_data.append(item['detectionRate'])
	elif metric == "Watermark Removal":
	x_data.append(item['absoluteUtilityDegregation'])
	y_data.append(item['removal_detectionRate'])
	else: # Stealing Attack
	x_data.append(item['adversaryBERTscore'])
	y_data.append(item['adversaryDetectionRate'])

	# Create scatter plot
	fig = go.Figure()

	# Add scatter points
	fig.add_trace(go.Scatter(
	x=x_data,
	y=y_data,
	mode='markers+text',
	marker=dict(
	size=12,
	color='#3B82F6',
	line=dict(width=2, color='white')
	),
	text=names,
	textposition='top center',
	textfont=dict(size=10, color='#374151'),
	hovertemplate='<b>%{text}</b><br>' +
	('Normalized Utility: %{x:.3f}<br>' if metric == "Attack-free" else
	'Abs Utility Degradation: %{x:.3f}<br' if metric == "Watermark Removal" else
	'Adversary BERT Score: %{x:.3f}<br>') +
	('Detection Rate: %{y:.3f}%<br>' if metric != "Stealing Attack" else
	'Adversary Detection Rate: %{y:.3f}%<br>') +
	'<extra></extra>'
	))

	# Set axis labels
	if metric == "Attack-free":
	x_title = "Normalized Utility"
	y_title = "Detection Rate (%)"
	elif metric == "Watermark Removal":
	x_title = "Absolute Utility Degradation"
	y_title = "Removal Detection Rate (%)"
	else: # Stealing Attack
	x_title = "Adversary BERT Score"
	y_title = "Adversary Detection Rate (%)"

	fig.update_layout(
	title=f"{metric} Performance Scatter Plot",
	xaxis_title=x_title,
	yaxis_title=y_title,
	font=dict(size=12, color='#374151'),
	plot_bgcolor='white',
	paper_bgcolor='white',
	xaxis=dict(
	gridcolor='lightgray',
	showgrid=True,
	zeroline=False
	),
	yaxis=dict(
	gridcolor='lightgray',
	showgrid=True,
	zeroline=False
	),
	margin=dict(l=60, r=60, t=80, b=60)
	)

	return fig

	# Create table data with heatmap styling
	def create_table_data(data, metric):
	if not data:
	return pd.DataFrame()

	table_data = []
	for i, item in enumerate(data, 1):
	row = {'Rank': i, 'Watermark': item['name']}

	if metric == "Attack-free":
	row['Normalized Utility ↑'] = f"{item['normalizedUtility']:.3f}"
	row['Detection Rate (%) ↑'] = f"{item['detectionRate']:.3f}"
	elif metric == "Watermark Removal":
	row['Abs Utility Degradation ↑'] = f"{item['absoluteUtilityDegregation']:.3f}"
	row['Removal Detection Rate (%) ↑'] = f"{item['removal_detectionRate']:.3f}"
	else: # Stealing Attack
	row['Adversary BERT Score ↑'] = f"{item['adversaryBERTscore']:.3f}"
	row['Adversary Detection Rate (%) ↑'] = f"{item['adversaryDetectionRate']:.3f}"

	table_data.append(row)

	return pd.DataFrame(table_data)

	# Create table data with green arrows and reference links
	def create_table_data(data, metric):
	if not data:
	return pd.DataFrame()

	table_data = []
	for i, item in enumerate(data, 1):
	watermark_name = item['name']
	paper_link = item.get('paperLink')
	model = item.get('model', 'N/A')

	# Create reference link if paper link exists (smaller text)
	if paper_link:
	reference_link = f'<a href="{paper_link}" target="_blank" style="color: #3B82F6; text-decoration: underline; font-size: 0.8em;">📄 Paper</a>'
	else:
	reference_link = '-'

	row = {
	'Watermark': watermark_name
	}

	if metric == "Attack-free":
	row['Normalized Utility ↑'] = f"{item['normalizedUtility']:.3f}"
	row['Detection Rate (%) ↑'] = f"{item['detectionRate']:.3f}"
	elif metric == "Watermark Removal":
	row['Abs Utility Degradation ↑'] = f"{item['absoluteUtilityDegregation']:.3f}"
	row['Removal Detection Rate (%) ↑'] = f"{item['removal_detectionRate']:.3f}"
	else: # Stealing Attack
	row['Adversary BERT Score ↑'] = f"{item['adversaryBERTscore']:.3f}"
	row['Adversary Detection Rate (%) ↑'] = f"{item['adversaryDetectionRate']:.3f}"

	# Add Reference column at the end
	row['Reference'] = reference_link

	table_data.append(row)

	return pd.DataFrame(table_data)

	# Update interface based on selections
	def update_interface(model, metric):
	data = load_leaderboard_data()
	filtered_data = filter_data(data, model, metric)

	# Create scatter plot
	scatter_plot = create_scatter_plot(filtered_data, metric)

	# Create table with green arrows
	table_data = create_table_data(filtered_data, metric)

	return scatter_plot, table_data

	# Handle form submission
	def submit_watermark_data(name, model, paper_link, normalized_utility, detection_rate,
	absolute_utility_degradation, removal_detection_rate,
	adversary_bert_score, adversary_detection_rate):
	"""Handle watermark data submission"""

	# Validation
	if not name or not name.strip():
	return "❌ Error: Watermark name is required", gr.update()

	if not model:
	return "❌ Error: Model selection is required", gr.update()

	# Validate paper link if provided
	if paper_link and paper_link.strip():
	paper_link = paper_link.strip()
	if not (paper_link.startswith('http://') or paper_link.startswith('https://')):
	return "❌ Error: Paper link must start with http:// or https://", gr.update()
	else:
	paper_link = None

	# Check what type of submission this is based on provided fields
	has_attack_free_data = normalized_utility is not None and detection_rate is not None
	has_removal_data = absolute_utility_degradation is not None and removal_detection_rate is not None
	has_stealing_data = adversary_bert_score is not None and adversary_detection_rate is not None

	# At least one complete set of metrics must be provided
	if not has_attack_free_data and not has_removal_data and not has_stealing_data:
	return "❌ Error: Please provide at least one complete set of metrics:\n• Attack-free: Normalized Utility + Detection Rate\n• Watermark Removal: Absolute Utility Degradation + Removal Detection Rate\n• Stealing Attack: Adversary BERT Score + Adversary Detection Rate", gr.update()

	# Validate Attack-free metrics if provided
	if has_attack_free_data:
	if normalized_utility <= 0 or normalized_utility > 1.0:
	return "❌ Error: Normalized Utility must be between 0.000 and 1.000", gr.update()
	if detection_rate < 0.0 or detection_rate > 100.0:
	return "❌ Error: Detection Rate must be between 0.000 and 100.000", gr.update()

	# Validate Watermark Removal metrics if provided
	if has_removal_data:
	if absolute_utility_degradation <= 0 or absolute_utility_degradation > 1.0:
	return "❌ Error: Absolute Utility Degradation must be between 0.000 and 1.000", gr.update()
	if removal_detection_rate < 0.0 or removal_detection_rate > 100.0:
	return "❌ Error: Removal Detection Rate must be between 0.000 and 100.000", gr.update()

	# Validate Stealing Attack metrics if provided
	if has_stealing_data:
	if adversary_bert_score <= 0 or adversary_bert_score > 1.0:
	return "❌ Error: Adversary BERT Score must be between 0.000 and 1.000", gr.update()
	if adversary_detection_rate < 0.0 or adversary_detection_rate > 100.0:
	return "❌ Error: Adversary Detection Rate must be between 0.000 and 100.000", gr.update()

	# Validate partial adversary data (if one is provided, both are required)
	has_partial_adversary = (adversary_bert_score is not None and adversary_bert_score > 0) or \
	(adversary_detection_rate is not None and adversary_detection_rate > 0)

	if has_partial_adversary and not has_stealing_data:
	return "❌ Error: If you provide one adversary metric, you must provide both Adversary BERT Score and Adversary Detection Rate", gr.update()

	# Create new entry - only include provided values, don't set missing ones to 0
	new_entry = {
	"name": name.strip(),
	"model": model,
	"normalizedUtility": normalized_utility,
	"detectionRate": detection_rate
	}

	# Add paper link if provided
	if paper_link:
	new_entry["paperLink"] = paper_link

	# Only add optional metrics if they were provided
	if absolute_utility_degradation is not None:
	new_entry["absoluteUtilityDegregation"] = absolute_utility_degradation
	if removal_detection_rate is not None:
	new_entry["removal_detectionRate"] = removal_detection_rate
	if adversary_bert_score is not None:
	new_entry["adversaryBERTscore"] = adversary_bert_score
	if adversary_detection_rate is not None:
	new_entry["adversaryDetectionRate"] = adversary_detection_rate

	# Load existing approved data to check for duplicates
	try:
	with open('leaderboard.json', 'r') as f:
	approved_data = json.load(f)
	except:
	approved_data = []

	# Check for duplicate names in approved data
	for entry in approved_data:
	if entry.get('name') == name.strip() and entry.get('model') == model:
	return f"❌ Error: A watermark named '{name.strip()}' already exists for {model}", gr.update()

	# Load pending submissions to check for duplicates there too
	try:
	with open('pending_submissions.json', 'r') as f:
	pending_data = json.load(f)
	except:
	pending_data = []

	# Check for duplicate names in pending data
	for entry in pending_data:
	if entry.get('name') == name.strip() and entry.get('model') == model:
	return f"❌ Error: A watermark named '{name.strip()}' is already pending approval for {model}", gr.update()

	# Add submission timestamp and status
	new_entry['submitted_at'] = datetime.now().isoformat()
	new_entry['status'] = 'pending'
	new_entry['submission_id'] = f"{name.strip()}_{model}_{int(datetime.now().timestamp())}"

	# Add to pending submissions instead of approved data
	pending_data.append(new_entry)

	# Save pending submissions
	try:
	with open('pending_submissions.json', 'w') as f:
	json.dump(pending_data, f, indent=2)

	# Update the interface with current approved data only
	filtered_data = filter_data(approved_data, model, "Attack-free")
	scatter_plot = create_scatter_plot(filtered_data, "Attack-free")
	table_data = create_table_data(filtered_data, "Attack-free")

	success_msg = f"✅ Successfully submitted '{name.strip()}' for {model} for approval! Your submission will be reviewed by the administrator before appearing on the leaderboard."
	return success_msg, scatter_plot, table_data

	except Exception as e:
	return f"❌ Error saving submission: {str(e)}", gr.update()

	# Clear form function
	def clear_form():
	return (None, None, None, None, None, None, None, None, None)

	# Owner approval functions
	def load_pending_submissions():
	"""Load pending submissions for owner review"""
	try:
	with open('pending_submissions.json', 'r') as f:
	pending_data = json.load(f)

	if not pending_data:
	return pd.DataFrame(columns=["ID", "Name", "Model", "Paper Link", "Attack-free Utility", "Attack-free Detection",
	"Removal Degradation", "Removal Detection", "Adversary BERT", "Adversary Detection", "Submitted At"])

	# Format data for display with all fields
	formatted_data = []
	for entry in pending_data:
	watermark_name = entry.get('name', 'N/A')
	paper_link = entry.get('paperLink', '-')
	model = entry.get('model', 'N/A')

	# Format all metric fields
	formatted_entry = {
	"ID": entry.get('submission_id', 'N/A'),
	"Name": watermark_name,
	"Model": model,
	"Paper Link": paper_link if paper_link != '-' else '-',
	"Attack-free Utility": f"{entry.get('normalizedUtility', 0):.3f}" if entry.get('normalizedUtility') is not None else '-',
	"Attack-free Detection": f"{entry.get('detectionRate', 0):.3f}" if entry.get('detectionRate') is not None else '-',
	"Removal Degradation": f"{entry.get('absoluteUtilityDegregation', 0):.3f}" if entry.get('absoluteUtilityDegregation') is not None else '-',
	"Removal Detection": f"{entry.get('removal_detectionRate', 0):.3f}" if entry.get('removal_detectionRate') is not None else '-',
	"Adversary BERT": f"{entry.get('adversaryBERTscore', 0):.3f}" if entry.get('adversaryBERTscore') is not None else '-',
	"Adversary Detection": f"{entry.get('adversaryDetectionRate', 0):.3f}" if entry.get('adversaryDetectionRate') is not None else '-',
	"Submitted At": entry.get('submitted_at', 'N/A')[:19] if entry.get('submitted_at') else 'N/A', # Show only date and time
	}
	formatted_data.append(formatted_entry)

	return pd.DataFrame(formatted_data)

	except Exception as e:
	print(f"Error loading pending submissions: {e}")
	return pd.DataFrame(columns=["ID", "Name", "Model", "Paper Link", "Attack-free Utility", "Attack-free Detection",
	"Removal Degradation", "Removal Detection", "Adversary BERT", "Adversary Detection", "Submitted At"])

	def approve_submission(submission_id, admin_password):
	"""Approve a pending submission"""
	# Check admin password
	if admin_password != "admin123": # You can change this password
	return "❌ Access denied: Invalid admin password", gr.update()

	try:
	# Load pending submissions from file (not from the formatted function)
	try:
	with open('pending_submissions.json', 'r') as f:
	pending_data = json.load(f)
	except:
	pending_data = []

	# Find and remove the submission
	approved_entry = None
	for i, entry in enumerate(pending_data):
	if entry.get('submission_id') == submission_id:
	approved_entry = pending_data.pop(i)
	break

	if not approved_entry:
	return "❌ Submission not found", gr.update()

	# Remove submission metadata
	approved_entry.pop('submitted_at', None)
	approved_entry.pop('status', None)
	approved_entry.pop('submission_id', None)

	# Load approved data
	try:
	with open('leaderboard.json', 'r') as f:
	approved_data = json.load(f)
	except:
	approved_data = []

	# Add to approved data
	approved_data.append(approved_entry)

	# Save approved data
	with open('leaderboard.json', 'w') as f:
	json.dump(approved_data, f, indent=2)

	# Save updated pending data
	with open('pending_submissions.json', 'w') as f:
	json.dump(pending_data, f, indent=2)

	return f"✅ Approved submission: {approved_entry.get('name', 'Unknown')}", load_pending_submissions()

	except Exception as e:
	return f"❌ Error approving submission: {str(e)}", gr.update()

	def reject_submission(submission_id, admin_password):
	"""Reject a pending submission"""
	# Check admin password
	if admin_password != "admin123": # You can change this password
	return "❌ Access denied: Invalid admin password", gr.update()

	try:
	# Load pending submissions from file (not from the formatted function)
	try:
	with open('pending_submissions.json', 'r') as f:
	pending_data = json.load(f)
	except:
	pending_data = []

	# Find and remove the submission
	rejected_entry = None
	for i, entry in enumerate(pending_data):
	if entry.get('submission_id') == submission_id:
	rejected_entry = pending_data.pop(i)
	break

	if not rejected_entry:
	return "❌ Submission not found", gr.update()

	# Save updated pending data
	with open('pending_submissions.json', 'w') as f:
	json.dump(pending_data, f, indent=2)

	return f"❌ Rejected submission: {rejected_entry.get('name', 'Unknown')}", load_pending_submissions()

	except Exception as e:
	return f"❌ Error rejecting submission: {str(e)}", gr.update()

	# Toggle add data section visibility
	def toggle_add_data_section(section):
	return gr.update(visible=not section.visible)

	# Create the main interface
	def create_interface():
	# Custom CSS for better styling
	css = """
	.gradio-container {
	max-width: 1200px !important;
	margin: 0 auto !important;
	background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
	min-height: 100vh;
	}
	.title {
	text-align: center;
	margin: 20px 0;
	font-size: 3rem;
	font-weight: bold;
	background: linear-gradient(45deg, #667eea 0%, #764ba2 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
	}
	.subtitle {
	text-align: center;
	margin-bottom: 30px;
	font-size: 1.3rem;
	color: #4a5568;
	font-weight: 500;
	}
	.controls {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	padding: 30px;
	border-radius: 15px;
	margin-bottom: 25px;
	box-shadow: 0 8px 32px rgba(0,0,0,0.1);
	border: 1px solid rgba(255,255,255,0.2);
	}
	.controls label {
	color: white !important;
	font-weight: bold !important;
	font-size: 1.2rem !important;
	}
	.controls .gr-radio {
	background: rgba(255,255,255,0.1) !important;
	border-radius: 10px !important;
	padding: 12px !important;
	}
	.controls .gr-radio label {
	color: white !important;
	font-size: 1.1rem !important;
	}
	.controls h3 {
	font-size: 1.4rem !important;
	margin-bottom: 15px !important;
	}
	#highlighted-add-data {
	background: linear-gradient(135deg, #E0F2FE 0%, #B3E5FC 100%) !important;
	border: 2px solid #81D4FA !important;
	border-radius: 15px !important;
	box-shadow: 0 10px 40px rgba(129, 212, 250, 0.3) !important;
	margin: 20px 0 !important;
	}
	#highlighted-add-data .gr-accordion-header {
	background: linear-gradient(135deg, #81D4FA 0%, #4FC3F7 100%) !important;
	color: white !important;
	font-weight: bold !important;
	font-size: 1.2rem !important;
	padding: 15px 20px !important;
	border-radius: 15px 15px 0 0 !important;
	}
	#highlighted-add-data .gr-accordion-content {
	background: rgba(255,255,255,0.95) !important;
	border-radius: 0 0 15px 15px !important;
	padding: 25px !important;
	}
	.gr-button {
	border-radius: 10px !important;
	font-weight: bold !important;
	transition: all 0.3s ease !important;
	}
	.gr-button:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 5px 15px rgba(0,0,0,0.2) !important;
	}
	.gr-plot {
	border-radius: 15px !important;
	box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important;
	background: white !important;
	padding: 20px !important;
	}
	.gr-dataframe {
	border-radius: 15px !important;
	box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important;
	background: white !important;
	overflow: hidden !important;
	}
	.gr-accordion {
	border-radius: 15px !important;
	box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important;
	background: white !important;
	margin: 15px 0 !important;
	}
	.gr-accordion-header {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	color: white !important;
	font-weight: bold !important;
	padding: 15px 20px !important;
	border-radius: 15px 15px 0 0 !important;
	}
	.gr-accordion-content {
	background: rgba(255,255,255,0.95) !important;
	border-radius: 0 0 15px 15px !important;
	padding: 20px !important;
	}
	#submit-btn {
	background: linear-gradient(135deg, #29B6F6 0%, #0288D1 100%) !important;
	border: 2px solid #0277BD !important;
	color: white !important;
	font-weight: bold !important;
	font-size: 1.1rem !important;
	padding: 15px 30px !important;
	border-radius: 12px !important;
	box-shadow: 0 8px 25px rgba(41, 182, 246, 0.4) !important;
	transition: all 0.3s ease !important;
	}
	#submit-btn:hover {
	background: linear-gradient(135deg, #0288D1 0%, #0277BD 100%) !important;
	transform: translateY(-3px) !important;
	box-shadow: 0 12px 35px rgba(41, 182, 246, 0.6) !important;
	}
	#owner-controls {
	background: linear-gradient(135deg, #FFE0E0 0%, #FFCDD2 100%) !important;
	border: 2px solid #FF5722 !important;
	border-radius: 15px !important;
	box-shadow: 0 10px 40px rgba(255, 87, 34, 0.3) !important;
	margin: 20px 0 !important;
	}
	#owner-controls .gr-accordion-header {
	background: linear-gradient(135deg, #FF5722 0%, #D32F2F 100%) !important;
	color: white !important;
	font-weight: bold !important;
	font-size: 1.2rem !important;
	padding: 15px 20px !important;
	border-radius: 15px 15px 0 0 !important;
	}
	#owner-controls .gr-accordion-content {
	background: rgba(255,255,255,0.95) !important;
	border-radius: 0 0 15px 15px !important;
	padding: 25px !important;
	}
	#approve-btn {
	background: linear-gradient(135deg, #4CAF50 0%, #2E7D32 100%) !important;
	border: 2px solid #388E3C !important;
	color: white !important;
	font-weight: bold !important;
	font-size: 1.1rem !important;
	padding: 15px 30px !important;
	border-radius: 12px !important;
	box-shadow: 0 8px 25px rgba(76, 175, 80, 0.4) !important;
	transition: all 0.3s ease !important;
	}
	#approve-btn:hover {
	background: linear-gradient(135deg, #2E7D32 0%, #1B5E20 100%) !important;
	transform: translateY(-3px) !important;
	box-shadow: 0 12px 35px rgba(76, 175, 80, 0.6) !important;
	}
	#reject-btn {
	background: linear-gradient(135deg, #F44336 0%, #C62828 100%) !important;
	border: 2px solid #D32F2F !important;
	color: white !important;
	font-weight: bold !important;
	font-size: 1.1rem !important;
	padding: 15px 30px !important;
	border-radius: 12px !important;
	box-shadow: 0 8px 25px rgba(244, 67, 54, 0.4) !important;
	transition: all 0.3s ease !important;
	}
	#reject-btn:hover {
	background: linear-gradient(135deg, #C62828 0%, #B71C1C 100%) !important;
	transform: translateY(-3px) !important;
	box-shadow: 0 12px 35px rgba(244, 67, 54, 0.6) !important;
	}
	#guideline-section {
	background: linear-gradient(135deg, #E8F5E8 0%, #C8E6C9 100%) !important;
	border: 2px solid #4CAF50 !important;
	border-radius: 15px !important;
	box-shadow: 0 10px 40px rgba(76, 175, 80, 0.3) !important;
	margin: 20px 0 !important;
	}
	#guideline-section .gr-accordion-header {
	background: linear-gradient(135deg, #4CAF50 0%, #2E7D32 100%) !important;
	color: white !important;
	font-weight: bold !important;
	font-size: 1.2rem !important;
	padding: 15px 20px !important;
	border-radius: 15px 15px 0 0 !important;
	}
	#guideline-section .gr-accordion-content {
	background: rgba(255,255,255,0.95) !important;
	border-radius: 0 0 15px 15px !important;
	padding: 25px !important;
	}
	"""

	with gr.Blocks(css=css, title="Watermark Leaderboard for LLMs") as demo:
	# Header
	gr.HTML("""
	<div class="title">
	🏆 Watermark Leaderboard for LLMs 🏆
	</div>
	<div class="subtitle">
	📊 Interactive leaderboard for comparing watermark performance across different models and evaluation settings
	</div>
	""")

	# Controls
	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #667eea; margin: 0; font-weight: bold;'>🤖 Model Selection</h3></div>")
	model_selector = gr.Radio(
	choices=["LLaMA3", "DeepSeek"],
	value="LLaMA3",
	label="Model",
	info="Select the model to display"
	)
	with gr.Column(scale=1):
	gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #667eea; margin: 0; font-weight: bold;'>⚙️ Evaluation Setting</h3></div>")
	metric_selector = gr.Radio(
	choices=["Attack-free", "Watermark Removal", "Stealing Attack"],
	value="Attack-free",
	label="Setting",
	info="Select the evaluation setting"
	)


	# Add Your Data Section (Highlighted)
	with gr.Accordion("🚀 Add Your Data to the Leaderboard", open=False, elem_id="highlighted-add-data"):
	gr.HTML("""
	<div style='text-align: center; margin-bottom: 20px;'>
	<h2 style='color: #0277BD; margin: 0; font-size: 1.5rem;'>📝 Submit Your Watermark Performance Results</h2>
	<p style='color: #374151; margin: 10px 0 0 0;'>Contribute to the community by sharing your watermark evaluation results</p>
	</div>
	<div style='background: #E3F2FD; border: 1px solid #2196F3; border-radius: 8px; padding: 15px; margin-bottom: 20px;'>
	<h4 style='color: #1976D2; margin: 0 0 10px 0;'>📋 Submission Requirements</h4>
	<p style='color: #374151; margin: 0 0 8px 0;'>Provide at least one complete set of metrics:</p>
	<ul style='color: #374151; margin: 0; padding-left: 20px;'>
	<li><strong>Attack-free:</strong> Normalized Utility + Detection Rate</li>
	<li><strong>Watermark Removal:</strong> Absolute Utility Degradation + Removal Detection Rate</li>
	<li><strong>Stealing Attack:</strong> Adversary BERT Score + Adversary Detection Rate</li>
	</ul>
	</div>
	""")
	with gr.Row():
	with gr.Column(scale=1):
	# Basic Information
	gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #0277BD; margin: 0;'>📋 Basic Information</h3></div>")
	watermark_name = gr.Textbox(
	label="Watermark Name",
	placeholder="e.g., MyWatermark, Watermark-X",
	info="Unique identifier for your watermark"
	)
	paper_link = gr.Textbox(
	label="Paper Link (Optional)",
	placeholder="https://arxiv.org/abs/xxxx.xxxxx or https://...",
	info="Link to the paper describing this watermark method"
	)
	submission_model = gr.Radio(
	choices=["LLaMA3", "DeepSeek"],
	label="Model",
	value="LLaMA3",
	info="Select the model used"
	)

	with gr.Column(scale=1):
	# Attack-free Metrics (Optional)
	gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #0277BD; margin: 0;'>⚡ Attack-free Metrics (Optional - Both Required if One is Provided)</h3></div>")
	normalized_utility = gr.Number(
	label="Normalized Utility",
	value=None,
	minimum=0.0,
	maximum=1.0,
	step=0.001,
	info="Text quality metric (0.000 - 1.000)"
	)
	detection_rate = gr.Number(
	label="Detection Rate (%)",
	value=None,
	minimum=0.0,
	maximum=100.0,
	step=0.001,
	info="Watermark detection accuracy (0.000 - 100.000%)"
	)

	with gr.Row():
	with gr.Column(scale=1):
	# Watermark Removal Metrics (Optional)
	gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #0277BD; margin: 0;'>🛡️ Watermark Removal (Optional)</h3></div>")
	absolute_utility_degradation = gr.Number(
	label="Absolute Utility Degradation",
	value=None,
	minimum=0.0,
	maximum=1.0,
	step=0.001,
	info="Resistance to removal attacks (0.000 - 1.000)"
	)
	removal_detection_rate = gr.Number(
	label="Removal Detection Rate (%)",
	value=None,
	minimum=0.0,
	maximum=100.0,
	step=0.001,
	info="Detection rate under removal attacks (0.000 - 100.000%)"
	)

	with gr.Column(scale=1):
	# Stealing Attack Metrics (Optional)
	gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #0277BD; margin: 0;'>🎯 Stealing Attack (Optional)</h3></div>")
	adversary_bert_score = gr.Number(
	label="Adversary BERT Score",
	value=None,
	minimum=0.0,
	maximum=1.0,
	step=0.001,
	info="Performance under adversarial conditions (0.000 - 1.000)"
	)
	adversary_detection_rate = gr.Number(
	label="Adversary Detection Rate (%)",
	value=None,
	minimum=0.0,
	maximum=100.0,
	step=0.001,
	info="Detection rate under adversarial attacks (0.000 - 100.000%)"
	)

	# Submit and Clear buttons
	with gr.Row():
	with gr.Column(scale=1):
	submit_btn = gr.Button(
	"🚀 Submit Data to Leaderboard",
	variant="primary",
	size="lg",
	elem_id="submit-btn"
	)
	with gr.Column(scale=1):
	clear_btn = gr.Button(
	"🗑️ Clear Form",
	variant="secondary",
	size="lg"
	)

	# Status message
	status_message = gr.Markdown("", visible=True)


	# Scatter Plot
	scatter_plot = gr.Plot(
	label="Performance Scatter Plot",
	show_label=True
	)

	# Table
	table = gr.DataFrame(
	label="Performance Table",
	show_label=True,
	interactive=False,
	wrap=True
	)

	# Guideline and Metrics Explained Section (At bottom with light green background)
	with gr.Accordion("📋 Guideline for Submitting Watermark Performance Results", open=False, elem_id="guideline-section"):
	gr.HTML("""
	<div style="padding: 20px;">
	<h3>Guideline for Submitting Watermark Performance Results</h3>
	<h4>1. Datasets</h4>
	<ul>
	<li><strong>Text Generation (C4 dataset)</strong>
	<ul>
	<li>Training: first 20,000 samples</li>
	<li>Testing: 13,860 samples</li>
	<li>Reference script: <code>Files/Reproducibility/C4_dataset_download.py</code></li>
	</ul>
	</li>
	<li><strong>Text Summarization (CNN/Daily Mail dataset)</strong>
	<ul>
	<li>Training: first 10,000–20,000 samples</li>
	<li>Testing: 1,000 samples</li>
	<li>Reference script: <code>Files/Reproducibility/CNN_dataset_download.py</code></li>
	</ul>
	</li>
	</ul>
	<h4>2. Models</h4>
	<ul>
	<li>Use open-source models available on Hugging Face:
	<ul>
	<li>DeepSeek: "deepseek-ai/deepseek-llm-7b-base"</li>
	<li>LLaMA-3: "meta-llama/Meta-Llama-3-8B"</li>
	</ul>
	</li>
	</ul>
	<h4>3. Evaluation Settings</h4>
	<ul>
	<li><strong>(a) Attack-Free Setting</strong>
	<ul>
	<li>Generate 13,860 watermarked outputs on the C4 test set.</li>
	<li>Report: Detection Rate and Normalized Utility (see Metrics).</li>
	</ul>
	</li>
	<li><strong>(b) Watermark Removal Setting</strong>
	<ul>
	<li>Apply Dipper to paraphrase watermarked outputs.</li>
	<li>Report:
	<ul>
	<li>Detection Rate after attack</li>
	<li>Normalized Utility after attack</li>
	<li>Absolute Utility Degradation (difference before vs. after attack)</li>
	</ul>
	</li>
	<li>Reference scripts: <code>Files/Reproducibility/Attack_dipper.py</code></li>
	</ul>
	</li>
	<li><strong>(c) Stealing Attack Setting</strong>
	<ul>
	<li>Generate 20,000 watermarked samples for training a surrogate model using LoRA.</li>
	<li>Use the surrogate model for summarization on 1,000 test samples.</li>
	<li>Report: Detection Rate and Normalized Utility on the surrogate's outputs.</li>
	<li>Reference scripts: <code>Files/Reproducibility/Finetune_sum.py</code>, <code>Files/Reproducibility/Inference_sum.py</code></li>
	</ul>
	</li>
	</ul>
	<h4>4. Metrics</h4>
	<ul>
	<li><strong>Detection Rate</strong>
	<ul>
	<li>Average accuracy across the test set (e.g., 13,860 examples for text generation).</li>
	<li>Use your own detector implementation.</li>
	</ul>
	</li>
	<li><strong>Normalized Utility</strong>
	<ul>
	<li>Defined as the mean of:</li>
	<li>BERTScore (<code>Files/Reproducibility/BERT_score.py</code>)</li>
	<li>Entity Similarity Score (<code>Files/Reproducibility/Entity_similarity_score.py</code>)</li>
	</ul>
	</li>
	<li><strong>Absolute Utility Degradation</strong>
	<ul>
	<li>The absolute change in Normalized Utility between attack-free and attacked outputs.</li>
	</ul>
	</li>
	</ul>
	<h4>5. Submission</h4>
	<ul>
	<li>You may submit results for one or more evaluation settings (Attack-Free, Removal, Stealing).</li>
	<li>Please include:
	<ul>
	<li>Model(s) evaluated</li>
	<li>Dataset(s) used</li>
	<li>Scripts/configuration details if modified</li>
	<li>Reported metrics in the required format</li>
	</ul>
	</li>
	</ul>
	<p><strong>Reproducibility codes are available in the Files tab of this Space.</strong></p>
	</div>
	""")

	# Owner Approval Section (At the very bottom)
	with gr.Accordion("🔒 Owner Controls - Pending Submissions", open=False, elem_id="owner-controls"):
	gr.HTML("""
	<div style='text-align: center; margin-bottom: 20px;'>
	<h2 style='color: #D32F2F; margin: 0; font-size: 1.5rem;'>🛡️ Administrator Approval Panel</h2>
	<p style='color: #374151; margin: 10px 0 0 0;'>Review and approve pending submissions before they appear on the leaderboard</p>
	</div>
	""")

	# Pending submissions table
	pending_table = gr.DataFrame(
	label="📋 Pending Submissions",
	show_label=True,
	interactive=False,
	wrap=True,
	headers=["ID", "Name", "Model", "Paper Link", "Attack-free Utility", "Attack-free Detection",
	"Removal Degradation", "Removal Detection", "Adversary BERT", "Adversary Detection", "Submitted At"]
	)

	# Admin authentication
	admin_password_input = gr.Textbox(
	label="🔐 Admin Password",
	placeholder="Enter admin password to access controls",
	type="password",
	info="Required for approval/rejection actions"
	)

	# Approval controls
	with gr.Row():
	with gr.Column(scale=1):
	submission_id_input = gr.Textbox(
	label="Submission ID",
	placeholder="Enter submission ID to approve/reject",
	info="Copy from the pending submissions table"
	)
	approve_btn = gr.Button(
	"✅ Approve Submission",
	variant="primary",
	size="lg",
	elem_id="approve-btn"
	)
	with gr.Column(scale=1):
	reject_btn = gr.Button(
	"❌ Reject Submission",
	variant="stop",
	size="lg",
	elem_id="reject-btn"
	)
	refresh_pending_btn = gr.Button(
	"🔄 Refresh Pending",
	variant="secondary",
	size="lg"
	)

	approval_status = gr.Markdown("", visible=True)

	# Event handlers
	model_selector.change(
	fn=update_interface,
	inputs=[model_selector, metric_selector],
	outputs=[scatter_plot, table]
	)

	metric_selector.change(
	fn=update_interface,
	inputs=[model_selector, metric_selector],
	outputs=[scatter_plot, table]
	)

	# Form submission handler
	submit_btn.click(
	fn=submit_watermark_data,
	inputs=[
	watermark_name,
	submission_model,
	paper_link,
	normalized_utility,
	detection_rate,
	absolute_utility_degradation,
	removal_detection_rate,
	adversary_bert_score,
	adversary_detection_rate
	],
	outputs=[status_message, scatter_plot, table]
	)

	# Clear form handler
	clear_btn.click(
	fn=clear_form,
	outputs=[
	watermark_name,
	paper_link,
	submission_model,
	normalized_utility,
	detection_rate,
	absolute_utility_degradation,
	removal_detection_rate,
	adversary_bert_score,
	adversary_detection_rate
	]
	)

	# Add data button handler
	# The add_data_button is removed, so this handler is no longer needed.
	# The highlighted section is now always visible.

	# Owner approval event handlers
	approve_btn.click(
	fn=approve_submission,
	inputs=[submission_id_input, admin_password_input],
	outputs=[approval_status, pending_table]
	)

	reject_btn.click(
	fn=reject_submission,
	inputs=[submission_id_input, admin_password_input],
	outputs=[approval_status, pending_table]
	)

	refresh_pending_btn.click(
	fn=load_pending_submissions,
	outputs=[pending_table]
	)

	# Initial load
	demo.load(
	fn=lambda: update_interface("LLaMA3", "Attack-free"),
	outputs=[scatter_plot, table]
	)

	# Load pending submissions on startup
	demo.load(
	fn=load_pending_submissions,
	outputs=[pending_table]
	)

	# Clear admin password after actions for security
	def clear_admin_password():
	return gr.update(value="")

	# Clear password after approve/reject actions
	approve_btn.click(
	fn=clear_admin_password,
	outputs=[admin_password_input]
	)

	reject_btn.click(
	fn=clear_admin_password,
	outputs=[admin_password_input]
	)

	return demo

	# Create and launch the interface
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()