import gradio as gr import json import os import pandas as pd import plotly.express as px import plotly.graph_objects as go from datetime import datetime from plotly.subplots import make_subplots # Load leaderboard data def load_leaderboard_data(): try: with open('leaderboard.json', 'r') as f: return json.load(f) except: return [] # Filter data based on model and metric def filter_data(data, model, metric): filtered = [] for item in data: if item.get('model') == model: if metric == "Attack-free": if item.get('normalizedUtility') is not None and item.get('detectionRate') is not None: filtered.append({ 'name': item.get('name', ''), 'model': item.get('model', ''), 'normalizedUtility': item.get('normalizedUtility', 0), 'detectionRate': item.get('detectionRate', 0) }) elif metric == "Watermark Removal": if (item.get('absoluteUtilityDegregation') is not None and item.get('removal_detectionRate') is not None): filtered.append({ 'name': item.get('name', ''), 'model': item.get('model', ''), 'absoluteUtilityDegregation': item.get('absoluteUtilityDegregation', 0), 'removal_detectionRate': item.get('removal_detectionRate', 0) }) elif metric == "Stealing Attack": if (item.get('adversaryBERTscore') is not None and item.get('adversaryDetectionRate') is not None): filtered.append({ 'name': item.get('name', ''), 'model': item.get('model', ''), 'adversaryBERTscore': item.get('adversaryBERTscore', 0), 'adversaryDetectionRate': item.get('adversaryDetectionRate', 0) }) # Sort by detection rate (descending) if metric == "Attack-free": filtered.sort(key=lambda x: x['detectionRate'], reverse=True) elif metric == "Watermark Removal": filtered.sort(key=lambda x: x['removal_detectionRate'], reverse=True) else: # Stealing Attack filtered.sort(key=lambda x: x['adversaryDetectionRate'], reverse=True) return filtered # Create scatter plot def create_scatter_plot(data, metric): if not data: return go.Figure() # Prepare data for plotting x_data = [] y_data = [] names = [] for item in data: names.append(item['name']) if metric == "Attack-free": x_data.append(item['normalizedUtility']) y_data.append(item['detectionRate']) elif metric == "Watermark Removal": x_data.append(item['absoluteUtilityDegregation']) y_data.append(item['removal_detectionRate']) else: # Stealing Attack x_data.append(item['adversaryBERTscore']) y_data.append(item['adversaryDetectionRate']) # Create scatter plot fig = go.Figure() # Add scatter points fig.add_trace(go.Scatter( x=x_data, y=y_data, mode='markers+text', marker=dict( size=12, color='#3B82F6', line=dict(width=2, color='white') ), text=names, textposition='top center', textfont=dict(size=10, color='#374151'), hovertemplate='%{text}
' + ('Normalized Utility: %{x:.3f}
' if metric == "Attack-free" else 'Abs Utility Degradation: %{x:.3f}') + ('Detection Rate: %{y:.3f}%
' if metric != "Stealing Attack" else 'Adversary Detection Rate: %{y:.3f}%
') + '' )) # Set axis labels if metric == "Attack-free": x_title = "Normalized Utility" y_title = "Detection Rate (%)" elif metric == "Watermark Removal": x_title = "Absolute Utility Degradation" y_title = "Removal Detection Rate (%)" else: # Stealing Attack x_title = "Adversary BERT Score" y_title = "Adversary Detection Rate (%)" fig.update_layout( title=f"{metric} Performance Scatter Plot", xaxis_title=x_title, yaxis_title=y_title, font=dict(size=12, color='#374151'), plot_bgcolor='white', paper_bgcolor='white', xaxis=dict( gridcolor='lightgray', showgrid=True, zeroline=False ), yaxis=dict( gridcolor='lightgray', showgrid=True, zeroline=False ), margin=dict(l=60, r=60, t=80, b=60) ) return fig # Create table data with heatmap styling def create_table_data(data, metric): if not data: return pd.DataFrame() table_data = [] for i, item in enumerate(data, 1): row = {'Rank': i, 'Watermark': item['name']} if metric == "Attack-free": row['Normalized Utility ↑'] = f"{item['normalizedUtility']:.3f}" row['Detection Rate (%) ↑'] = f"{item['detectionRate']:.3f}" elif metric == "Watermark Removal": row['Abs Utility Degradation ↑'] = f"{item['absoluteUtilityDegregation']:.3f}" row['Removal Detection Rate (%) ↑'] = f"{item['removal_detectionRate']:.3f}" else: # Stealing Attack row['Adversary BERT Score ↑'] = f"{item['adversaryBERTscore']:.3f}" row['Adversary Detection Rate (%) ↑'] = f"{item['adversaryDetectionRate']:.3f}" table_data.append(row) return pd.DataFrame(table_data) # Create table data with green arrows and reference links def create_table_data(data, metric): if not data: return pd.DataFrame() table_data = [] for i, item in enumerate(data, 1): watermark_name = item['name'] paper_link = item.get('paperLink') model = item.get('model', 'N/A') # Create reference link if paper link exists (smaller text) if paper_link: reference_link = f'📄 Paper' else: reference_link = '-' row = { 'Watermark': watermark_name } if metric == "Attack-free": row['Normalized Utility ↑'] = f"{item['normalizedUtility']:.3f}" row['Detection Rate (%) ↑'] = f"{item['detectionRate']:.3f}" elif metric == "Watermark Removal": row['Abs Utility Degradation ↑'] = f"{item['absoluteUtilityDegregation']:.3f}" row['Removal Detection Rate (%) ↑'] = f"{item['removal_detectionRate']:.3f}" else: # Stealing Attack row['Adversary BERT Score ↑'] = f"{item['adversaryBERTscore']:.3f}" row['Adversary Detection Rate (%) ↑'] = f"{item['adversaryDetectionRate']:.3f}" # Add Reference column at the end row['Reference'] = reference_link table_data.append(row) return pd.DataFrame(table_data) # Update interface based on selections def update_interface(model, metric): data = load_leaderboard_data() filtered_data = filter_data(data, model, metric) # Create scatter plot scatter_plot = create_scatter_plot(filtered_data, metric) # Create table with green arrows table_data = create_table_data(filtered_data, metric) return scatter_plot, table_data # Handle form submission def submit_watermark_data(name, model, paper_link, normalized_utility, detection_rate, absolute_utility_degradation, removal_detection_rate, adversary_bert_score, adversary_detection_rate): """Handle watermark data submission""" # Validation if not name or not name.strip(): return "❌ Error: Watermark name is required", gr.update() if not model: return "❌ Error: Model selection is required", gr.update() # Validate paper link if provided if paper_link and paper_link.strip(): paper_link = paper_link.strip() if not (paper_link.startswith('http://') or paper_link.startswith('https://')): return "❌ Error: Paper link must start with http:// or https://", gr.update() else: paper_link = None # Check what type of submission this is based on provided fields has_attack_free_data = normalized_utility is not None and detection_rate is not None has_removal_data = absolute_utility_degradation is not None and removal_detection_rate is not None has_stealing_data = adversary_bert_score is not None and adversary_detection_rate is not None # At least one complete set of metrics must be provided if not has_attack_free_data and not has_removal_data and not has_stealing_data: return "❌ Error: Please provide at least one complete set of metrics:\n• Attack-free: Normalized Utility + Detection Rate\n• Watermark Removal: Absolute Utility Degradation + Removal Detection Rate\n• Stealing Attack: Adversary BERT Score + Adversary Detection Rate", gr.update() # Validate Attack-free metrics if provided if has_attack_free_data: if normalized_utility <= 0 or normalized_utility > 1.0: return "❌ Error: Normalized Utility must be between 0.000 and 1.000", gr.update() if detection_rate < 0.0 or detection_rate > 100.0: return "❌ Error: Detection Rate must be between 0.000 and 100.000", gr.update() # Validate Watermark Removal metrics if provided if has_removal_data: if absolute_utility_degradation <= 0 or absolute_utility_degradation > 1.0: return "❌ Error: Absolute Utility Degradation must be between 0.000 and 1.000", gr.update() if removal_detection_rate < 0.0 or removal_detection_rate > 100.0: return "❌ Error: Removal Detection Rate must be between 0.000 and 100.000", gr.update() # Validate Stealing Attack metrics if provided if has_stealing_data: if adversary_bert_score <= 0 or adversary_bert_score > 1.0: return "❌ Error: Adversary BERT Score must be between 0.000 and 1.000", gr.update() if adversary_detection_rate < 0.0 or adversary_detection_rate > 100.0: return "❌ Error: Adversary Detection Rate must be between 0.000 and 100.000", gr.update() # Validate partial adversary data (if one is provided, both are required) has_partial_adversary = (adversary_bert_score is not None and adversary_bert_score > 0) or \ (adversary_detection_rate is not None and adversary_detection_rate > 0) if has_partial_adversary and not has_stealing_data: return "❌ Error: If you provide one adversary metric, you must provide both Adversary BERT Score and Adversary Detection Rate", gr.update() # Create new entry - only include provided values, don't set missing ones to 0 new_entry = { "name": name.strip(), "model": model, "normalizedUtility": normalized_utility, "detectionRate": detection_rate } # Add paper link if provided if paper_link: new_entry["paperLink"] = paper_link # Only add optional metrics if they were provided if absolute_utility_degradation is not None: new_entry["absoluteUtilityDegregation"] = absolute_utility_degradation if removal_detection_rate is not None: new_entry["removal_detectionRate"] = removal_detection_rate if adversary_bert_score is not None: new_entry["adversaryBERTscore"] = adversary_bert_score if adversary_detection_rate is not None: new_entry["adversaryDetectionRate"] = adversary_detection_rate # Load existing approved data to check for duplicates try: with open('leaderboard.json', 'r') as f: approved_data = json.load(f) except: approved_data = [] # Check for duplicate names in approved data for entry in approved_data: if entry.get('name') == name.strip() and entry.get('model') == model: return f"❌ Error: A watermark named '{name.strip()}' already exists for {model}", gr.update() # Load pending submissions to check for duplicates there too try: with open('pending_submissions.json', 'r') as f: pending_data = json.load(f) except: pending_data = [] # Check for duplicate names in pending data for entry in pending_data: if entry.get('name') == name.strip() and entry.get('model') == model: return f"❌ Error: A watermark named '{name.strip()}' is already pending approval for {model}", gr.update() # Add submission timestamp and status new_entry['submitted_at'] = datetime.now().isoformat() new_entry['status'] = 'pending' new_entry['submission_id'] = f"{name.strip()}_{model}_{int(datetime.now().timestamp())}" # Add to pending submissions instead of approved data pending_data.append(new_entry) # Save pending submissions try: with open('pending_submissions.json', 'w') as f: json.dump(pending_data, f, indent=2) # Update the interface with current approved data only filtered_data = filter_data(approved_data, model, "Attack-free") scatter_plot = create_scatter_plot(filtered_data, "Attack-free") table_data = create_table_data(filtered_data, "Attack-free") success_msg = f"✅ Successfully submitted '{name.strip()}' for {model} for approval! Your submission will be reviewed by the administrator before appearing on the leaderboard." return success_msg, scatter_plot, table_data except Exception as e: return f"❌ Error saving submission: {str(e)}", gr.update() # Clear form function def clear_form(): return (None, None, None, None, None, None, None, None, None) # Owner approval functions def load_pending_submissions(): """Load pending submissions for owner review""" try: with open('pending_submissions.json', 'r') as f: pending_data = json.load(f) if not pending_data: return pd.DataFrame(columns=["ID", "Name", "Model", "Paper Link", "Attack-free Utility", "Attack-free Detection", "Removal Degradation", "Removal Detection", "Adversary BERT", "Adversary Detection", "Submitted At"]) # Format data for display with all fields formatted_data = [] for entry in pending_data: watermark_name = entry.get('name', 'N/A') paper_link = entry.get('paperLink', '-') model = entry.get('model', 'N/A') # Format all metric fields formatted_entry = { "ID": entry.get('submission_id', 'N/A'), "Name": watermark_name, "Model": model, "Paper Link": paper_link if paper_link != '-' else '-', "Attack-free Utility": f"{entry.get('normalizedUtility', 0):.3f}" if entry.get('normalizedUtility') is not None else '-', "Attack-free Detection": f"{entry.get('detectionRate', 0):.3f}" if entry.get('detectionRate') is not None else '-', "Removal Degradation": f"{entry.get('absoluteUtilityDegregation', 0):.3f}" if entry.get('absoluteUtilityDegregation') is not None else '-', "Removal Detection": f"{entry.get('removal_detectionRate', 0):.3f}" if entry.get('removal_detectionRate') is not None else '-', "Adversary BERT": f"{entry.get('adversaryBERTscore', 0):.3f}" if entry.get('adversaryBERTscore') is not None else '-', "Adversary Detection": f"{entry.get('adversaryDetectionRate', 0):.3f}" if entry.get('adversaryDetectionRate') is not None else '-', "Submitted At": entry.get('submitted_at', 'N/A')[:19] if entry.get('submitted_at') else 'N/A', # Show only date and time } formatted_data.append(formatted_entry) return pd.DataFrame(formatted_data) except Exception as e: print(f"Error loading pending submissions: {e}") return pd.DataFrame(columns=["ID", "Name", "Model", "Paper Link", "Attack-free Utility", "Attack-free Detection", "Removal Degradation", "Removal Detection", "Adversary BERT", "Adversary Detection", "Submitted At"]) def approve_submission(submission_id, admin_password): """Approve a pending submission""" # Check admin password if admin_password != "admin123": # You can change this password return "❌ Access denied: Invalid admin password", gr.update() try: # Load pending submissions from file (not from the formatted function) try: with open('pending_submissions.json', 'r') as f: pending_data = json.load(f) except: pending_data = [] # Find and remove the submission approved_entry = None for i, entry in enumerate(pending_data): if entry.get('submission_id') == submission_id: approved_entry = pending_data.pop(i) break if not approved_entry: return "❌ Submission not found", gr.update() # Remove submission metadata approved_entry.pop('submitted_at', None) approved_entry.pop('status', None) approved_entry.pop('submission_id', None) # Load approved data try: with open('leaderboard.json', 'r') as f: approved_data = json.load(f) except: approved_data = [] # Add to approved data approved_data.append(approved_entry) # Save approved data with open('leaderboard.json', 'w') as f: json.dump(approved_data, f, indent=2) # Save updated pending data with open('pending_submissions.json', 'w') as f: json.dump(pending_data, f, indent=2) return f"✅ Approved submission: {approved_entry.get('name', 'Unknown')}", load_pending_submissions() except Exception as e: return f"❌ Error approving submission: {str(e)}", gr.update() def reject_submission(submission_id, admin_password): """Reject a pending submission""" # Check admin password if admin_password != "admin123": # You can change this password return "❌ Access denied: Invalid admin password", gr.update() try: # Load pending submissions from file (not from the formatted function) try: with open('pending_submissions.json', 'r') as f: pending_data = json.load(f) except: pending_data = [] # Find and remove the submission rejected_entry = None for i, entry in enumerate(pending_data): if entry.get('submission_id') == submission_id: rejected_entry = pending_data.pop(i) break if not rejected_entry: return "❌ Submission not found", gr.update() # Save updated pending data with open('pending_submissions.json', 'w') as f: json.dump(pending_data, f, indent=2) return f"❌ Rejected submission: {rejected_entry.get('name', 'Unknown')}", load_pending_submissions() except Exception as e: return f"❌ Error rejecting submission: {str(e)}", gr.update() # Toggle add data section visibility def toggle_add_data_section(section): return gr.update(visible=not section.visible) # Create the main interface def create_interface(): # Custom CSS for better styling css = """ .gradio-container { max-width: 1200px !important; margin: 0 auto !important; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); min-height: 100vh; } .title { text-align: center; margin: 20px 0; font-size: 3rem; font-weight: bold; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; text-shadow: 2px 2px 4px rgba(0,0,0,0.1); } .subtitle { text-align: center; margin-bottom: 30px; font-size: 1.3rem; color: #4a5568; font-weight: 500; } .controls { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 30px; border-radius: 15px; margin-bottom: 25px; box-shadow: 0 8px 32px rgba(0,0,0,0.1); border: 1px solid rgba(255,255,255,0.2); } .controls label { color: white !important; font-weight: bold !important; font-size: 1.2rem !important; } .controls .gr-radio { background: rgba(255,255,255,0.1) !important; border-radius: 10px !important; padding: 12px !important; } .controls .gr-radio label { color: white !important; font-size: 1.1rem !important; } .controls h3 { font-size: 1.4rem !important; margin-bottom: 15px !important; } #highlighted-add-data { background: linear-gradient(135deg, #E0F2FE 0%, #B3E5FC 100%) !important; border: 2px solid #81D4FA !important; border-radius: 15px !important; box-shadow: 0 10px 40px rgba(129, 212, 250, 0.3) !important; margin: 20px 0 !important; } #highlighted-add-data .gr-accordion-header { background: linear-gradient(135deg, #81D4FA 0%, #4FC3F7 100%) !important; color: white !important; font-weight: bold !important; font-size: 1.2rem !important; padding: 15px 20px !important; border-radius: 15px 15px 0 0 !important; } #highlighted-add-data .gr-accordion-content { background: rgba(255,255,255,0.95) !important; border-radius: 0 0 15px 15px !important; padding: 25px !important; } .gr-button { border-radius: 10px !important; font-weight: bold !important; transition: all 0.3s ease !important; } .gr-button:hover { transform: translateY(-2px) !important; box-shadow: 0 5px 15px rgba(0,0,0,0.2) !important; } .gr-plot { border-radius: 15px !important; box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important; background: white !important; padding: 20px !important; } .gr-dataframe { border-radius: 15px !important; box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important; background: white !important; overflow: hidden !important; } .gr-accordion { border-radius: 15px !important; box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important; background: white !important; margin: 15px 0 !important; } .gr-accordion-header { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; color: white !important; font-weight: bold !important; padding: 15px 20px !important; border-radius: 15px 15px 0 0 !important; } .gr-accordion-content { background: rgba(255,255,255,0.95) !important; border-radius: 0 0 15px 15px !important; padding: 20px !important; } #submit-btn { background: linear-gradient(135deg, #29B6F6 0%, #0288D1 100%) !important; border: 2px solid #0277BD !important; color: white !important; font-weight: bold !important; font-size: 1.1rem !important; padding: 15px 30px !important; border-radius: 12px !important; box-shadow: 0 8px 25px rgba(41, 182, 246, 0.4) !important; transition: all 0.3s ease !important; } #submit-btn:hover { background: linear-gradient(135deg, #0288D1 0%, #0277BD 100%) !important; transform: translateY(-3px) !important; box-shadow: 0 12px 35px rgba(41, 182, 246, 0.6) !important; } #owner-controls { background: linear-gradient(135deg, #FFE0E0 0%, #FFCDD2 100%) !important; border: 2px solid #FF5722 !important; border-radius: 15px !important; box-shadow: 0 10px 40px rgba(255, 87, 34, 0.3) !important; margin: 20px 0 !important; } #owner-controls .gr-accordion-header { background: linear-gradient(135deg, #FF5722 0%, #D32F2F 100%) !important; color: white !important; font-weight: bold !important; font-size: 1.2rem !important; padding: 15px 20px !important; border-radius: 15px 15px 0 0 !important; } #owner-controls .gr-accordion-content { background: rgba(255,255,255,0.95) !important; border-radius: 0 0 15px 15px !important; padding: 25px !important; } #approve-btn { background: linear-gradient(135deg, #4CAF50 0%, #2E7D32 100%) !important; border: 2px solid #388E3C !important; color: white !important; font-weight: bold !important; font-size: 1.1rem !important; padding: 15px 30px !important; border-radius: 12px !important; box-shadow: 0 8px 25px rgba(76, 175, 80, 0.4) !important; transition: all 0.3s ease !important; } #approve-btn:hover { background: linear-gradient(135deg, #2E7D32 0%, #1B5E20 100%) !important; transform: translateY(-3px) !important; box-shadow: 0 12px 35px rgba(76, 175, 80, 0.6) !important; } #reject-btn { background: linear-gradient(135deg, #F44336 0%, #C62828 100%) !important; border: 2px solid #D32F2F !important; color: white !important; font-weight: bold !important; font-size: 1.1rem !important; padding: 15px 30px !important; border-radius: 12px !important; box-shadow: 0 8px 25px rgba(244, 67, 54, 0.4) !important; transition: all 0.3s ease !important; } #reject-btn:hover { background: linear-gradient(135deg, #C62828 0%, #B71C1C 100%) !important; transform: translateY(-3px) !important; box-shadow: 0 12px 35px rgba(244, 67, 54, 0.6) !important; } #guideline-section { background: linear-gradient(135deg, #E8F5E8 0%, #C8E6C9 100%) !important; border: 2px solid #4CAF50 !important; border-radius: 15px !important; box-shadow: 0 10px 40px rgba(76, 175, 80, 0.3) !important; margin: 20px 0 !important; } #guideline-section .gr-accordion-header { background: linear-gradient(135deg, #4CAF50 0%, #2E7D32 100%) !important; color: white !important; font-weight: bold !important; font-size: 1.2rem !important; padding: 15px 20px !important; border-radius: 15px 15px 0 0 !important; } #guideline-section .gr-accordion-content { background: rgba(255,255,255,0.95) !important; border-radius: 0 0 15px 15px !important; padding: 25px !important; } """ with gr.Blocks(css=css, title="Watermark Leaderboard for LLMs") as demo: # Header gr.HTML("""
🏆 Watermark Leaderboard for LLMs 🏆
📊 Interactive leaderboard for comparing watermark performance across different models and evaluation settings
""") # Controls with gr.Row(): with gr.Column(scale=1): gr.HTML("

🤖 Model Selection

") model_selector = gr.Radio( choices=["LLaMA3", "DeepSeek"], value="LLaMA3", label="Model", info="Select the model to display" ) with gr.Column(scale=1): gr.HTML("

⚙️ Evaluation Setting

") metric_selector = gr.Radio( choices=["Attack-free", "Watermark Removal", "Stealing Attack"], value="Attack-free", label="Setting", info="Select the evaluation setting" ) # Add Your Data Section (Highlighted) with gr.Accordion("🚀 Add Your Data to the Leaderboard", open=False, elem_id="highlighted-add-data"): gr.HTML("""

📝 Submit Your Watermark Performance Results

Contribute to the community by sharing your watermark evaluation results

📋 Submission Requirements

Provide at least one complete set of metrics:

""") with gr.Row(): with gr.Column(scale=1): # Basic Information gr.HTML("

📋 Basic Information

") watermark_name = gr.Textbox( label="Watermark Name", placeholder="e.g., MyWatermark, Watermark-X", info="Unique identifier for your watermark" ) paper_link = gr.Textbox( label="Paper Link (Optional)", placeholder="https://arxiv.org/abs/xxxx.xxxxx or https://...", info="Link to the paper describing this watermark method" ) submission_model = gr.Radio( choices=["LLaMA3", "DeepSeek"], label="Model", value="LLaMA3", info="Select the model used" ) with gr.Column(scale=1): # Attack-free Metrics (Optional) gr.HTML("

⚡ Attack-free Metrics (Optional - Both Required if One is Provided)

") normalized_utility = gr.Number( label="Normalized Utility", value=None, minimum=0.0, maximum=1.0, step=0.001, info="Text quality metric (0.000 - 1.000)" ) detection_rate = gr.Number( label="Detection Rate (%)", value=None, minimum=0.0, maximum=100.0, step=0.001, info="Watermark detection accuracy (0.000 - 100.000%)" ) with gr.Row(): with gr.Column(scale=1): # Watermark Removal Metrics (Optional) gr.HTML("

🛡️ Watermark Removal (Optional)

") absolute_utility_degradation = gr.Number( label="Absolute Utility Degradation", value=None, minimum=0.0, maximum=1.0, step=0.001, info="Resistance to removal attacks (0.000 - 1.000)" ) removal_detection_rate = gr.Number( label="Removal Detection Rate (%)", value=None, minimum=0.0, maximum=100.0, step=0.001, info="Detection rate under removal attacks (0.000 - 100.000%)" ) with gr.Column(scale=1): # Stealing Attack Metrics (Optional) gr.HTML("

🎯 Stealing Attack (Optional)

") adversary_bert_score = gr.Number( label="Adversary BERT Score", value=None, minimum=0.0, maximum=1.0, step=0.001, info="Performance under adversarial conditions (0.000 - 1.000)" ) adversary_detection_rate = gr.Number( label="Adversary Detection Rate (%)", value=None, minimum=0.0, maximum=100.0, step=0.001, info="Detection rate under adversarial attacks (0.000 - 100.000%)" ) # Submit and Clear buttons with gr.Row(): with gr.Column(scale=1): submit_btn = gr.Button( "🚀 Submit Data to Leaderboard", variant="primary", size="lg", elem_id="submit-btn" ) with gr.Column(scale=1): clear_btn = gr.Button( "🗑️ Clear Form", variant="secondary", size="lg" ) # Status message status_message = gr.Markdown("", visible=True) # Scatter Plot scatter_plot = gr.Plot( label="Performance Scatter Plot", show_label=True ) # Table table = gr.DataFrame( label="Performance Table", show_label=True, interactive=False, wrap=True ) # Guideline and Metrics Explained Section (At bottom with light green background) with gr.Accordion("📋 Guideline for Submitting Watermark Performance Results", open=False, elem_id="guideline-section"): gr.HTML("""

Guideline for Submitting Watermark Performance Results

1. Datasets

2. Models

3. Evaluation Settings

4. Metrics

5. Submission

Reproducibility codes are available in the Files tab of this Space.

""") # Owner Approval Section (At the very bottom) with gr.Accordion("🔒 Owner Controls - Pending Submissions", open=False, elem_id="owner-controls"): gr.HTML("""

🛡️ Administrator Approval Panel

Review and approve pending submissions before they appear on the leaderboard

""") # Pending submissions table pending_table = gr.DataFrame( label="📋 Pending Submissions", show_label=True, interactive=False, wrap=True, headers=["ID", "Name", "Model", "Paper Link", "Attack-free Utility", "Attack-free Detection", "Removal Degradation", "Removal Detection", "Adversary BERT", "Adversary Detection", "Submitted At"] ) # Admin authentication admin_password_input = gr.Textbox( label="🔐 Admin Password", placeholder="Enter admin password to access controls", type="password", info="Required for approval/rejection actions" ) # Approval controls with gr.Row(): with gr.Column(scale=1): submission_id_input = gr.Textbox( label="Submission ID", placeholder="Enter submission ID to approve/reject", info="Copy from the pending submissions table" ) approve_btn = gr.Button( "✅ Approve Submission", variant="primary", size="lg", elem_id="approve-btn" ) with gr.Column(scale=1): reject_btn = gr.Button( "❌ Reject Submission", variant="stop", size="lg", elem_id="reject-btn" ) refresh_pending_btn = gr.Button( "🔄 Refresh Pending", variant="secondary", size="lg" ) approval_status = gr.Markdown("", visible=True) # Event handlers model_selector.change( fn=update_interface, inputs=[model_selector, metric_selector], outputs=[scatter_plot, table] ) metric_selector.change( fn=update_interface, inputs=[model_selector, metric_selector], outputs=[scatter_plot, table] ) # Form submission handler submit_btn.click( fn=submit_watermark_data, inputs=[ watermark_name, submission_model, paper_link, normalized_utility, detection_rate, absolute_utility_degradation, removal_detection_rate, adversary_bert_score, adversary_detection_rate ], outputs=[status_message, scatter_plot, table] ) # Clear form handler clear_btn.click( fn=clear_form, outputs=[ watermark_name, paper_link, submission_model, normalized_utility, detection_rate, absolute_utility_degradation, removal_detection_rate, adversary_bert_score, adversary_detection_rate ] ) # Add data button handler # The add_data_button is removed, so this handler is no longer needed. # The highlighted section is now always visible. # Owner approval event handlers approve_btn.click( fn=approve_submission, inputs=[submission_id_input, admin_password_input], outputs=[approval_status, pending_table] ) reject_btn.click( fn=reject_submission, inputs=[submission_id_input, admin_password_input], outputs=[approval_status, pending_table] ) refresh_pending_btn.click( fn=load_pending_submissions, outputs=[pending_table] ) # Initial load demo.load( fn=lambda: update_interface("LLaMA3", "Attack-free"), outputs=[scatter_plot, table] ) # Load pending submissions on startup demo.load( fn=load_pending_submissions, outputs=[pending_table] ) # Clear admin password after actions for security def clear_admin_password(): return gr.update(value="") # Clear password after approve/reject actions approve_btn.click( fn=clear_admin_password, outputs=[admin_password_input] ) reject_btn.click( fn=clear_admin_password, outputs=[admin_password_input] ) return demo # Create and launch the interface if __name__ == "__main__": demo = create_interface() demo.launch()