Spaces:

AnonymousResearch
/

WatermarkLeaderboard

Sleeping

File size: 46,746 Bytes

40b3335

import gradio as gr
import json
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
from plotly.subplots import make_subplots

# Load leaderboard data
def load_leaderboard_data():
    try:
        with open('leaderboard.json', 'r') as f:
            return json.load(f)
    except:
        return []

# Filter data based on model and metric
def filter_data(data, model, metric):
    filtered = []
    for item in data:
        if item.get('model') == model:
            if metric == "Attack-free":
                if item.get('normalizedUtility') is not None and item.get('detectionRate') is not None:
                    filtered.append({
                        'name': item.get('name', ''),
                        'model': item.get('model', ''),
                        'normalizedUtility': item.get('normalizedUtility', 0),
                        'detectionRate': item.get('detectionRate', 0)
                    })
            elif metric == "Watermark Removal":
                if (item.get('absoluteUtilityDegregation') is not None and 
                    item.get('removal_detectionRate') is not None):
                    filtered.append({
                        'name': item.get('name', ''),
                        'model': item.get('model', ''),
                        'absoluteUtilityDegregation': item.get('absoluteUtilityDegregation', 0),
                        'removal_detectionRate': item.get('removal_detectionRate', 0)
                    })
            elif metric == "Stealing Attack":
                if (item.get('adversaryBERTscore') is not None and 
                    item.get('adversaryDetectionRate') is not None):
                    filtered.append({
                        'name': item.get('name', ''),
                        'model': item.get('model', ''),
                        'adversaryBERTscore': item.get('adversaryBERTscore', 0),
                        'adversaryDetectionRate': item.get('adversaryDetectionRate', 0)
                    })
    
    # Sort by detection rate (descending)
    if metric == "Attack-free":
        filtered.sort(key=lambda x: x['detectionRate'], reverse=True)
    elif metric == "Watermark Removal":
        filtered.sort(key=lambda x: x['removal_detectionRate'], reverse=True)
    else:  # Stealing Attack
        filtered.sort(key=lambda x: x['adversaryDetectionRate'], reverse=True)
    
    return filtered

# Create scatter plot
def create_scatter_plot(data, metric):
    if not data:
        return go.Figure()
    
    # Prepare data for plotting
    x_data = []
    y_data = []
    names = []
    
    for item in data:
        names.append(item['name'])
        if metric == "Attack-free":
            x_data.append(item['normalizedUtility'])
            y_data.append(item['detectionRate'])
        elif metric == "Watermark Removal":
            x_data.append(item['absoluteUtilityDegregation'])
            y_data.append(item['removal_detectionRate'])
        else:  # Stealing Attack
            x_data.append(item['adversaryBERTscore'])
            y_data.append(item['adversaryDetectionRate'])
    
    # Create scatter plot
    fig = go.Figure()
    
    # Add scatter points
    fig.add_trace(go.Scatter(
        x=x_data,
        y=y_data,
        mode='markers+text',
        marker=dict(
            size=12,
            color='#3B82F6',
            line=dict(width=2, color='white')
        ),
        text=names,
        textposition='top center',
        textfont=dict(size=10, color='#374151'),
        hovertemplate='<b>%{text}</b><br>' +
                      ('Normalized Utility: %{x:.3f}<br>' if metric == "Attack-free" else
                       'Abs Utility Degradation: %{x:.3f}<br' if metric == "Watermark Removal" else
                       'Adversary BERT Score: %{x:.3f}<br>') +
                      ('Detection Rate: %{y:.3f}%<br>' if metric != "Stealing Attack" else
                       'Adversary Detection Rate: %{y:.3f}%<br>') +
                      '<extra></extra>'
    ))
    
    # Set axis labels
    if metric == "Attack-free":
        x_title = "Normalized Utility"
        y_title = "Detection Rate (%)"
    elif metric == "Watermark Removal":
        x_title = "Absolute Utility Degradation"
        y_title = "Removal Detection Rate (%)"
    else:  # Stealing Attack
        x_title = "Adversary BERT Score"
        y_title = "Adversary Detection Rate (%)"
    
    fig.update_layout(
        title=f"{metric} Performance Scatter Plot",
        xaxis_title=x_title,
        yaxis_title=y_title,
        font=dict(size=12, color='#374151'),
        plot_bgcolor='white',
        paper_bgcolor='white',
        xaxis=dict(
            gridcolor='lightgray',
            showgrid=True,
            zeroline=False
        ),
        yaxis=dict(
            gridcolor='lightgray',
            showgrid=True,
            zeroline=False
        ),
        margin=dict(l=60, r=60, t=80, b=60)
    )
    
    return fig

# Create table data with heatmap styling
def create_table_data(data, metric):
    if not data:
        return pd.DataFrame()
    
    table_data = []
    for i, item in enumerate(data, 1):
        row = {'Rank': i, 'Watermark': item['name']}
        
        if metric == "Attack-free":
            row['Normalized Utility ↑'] = f"{item['normalizedUtility']:.3f}"
            row['Detection Rate (%) ↑'] = f"{item['detectionRate']:.3f}"
        elif metric == "Watermark Removal":
            row['Abs Utility Degradation ↑'] = f"{item['absoluteUtilityDegregation']:.3f}"
            row['Removal Detection Rate (%) ↑'] = f"{item['removal_detectionRate']:.3f}"
        else:  # Stealing Attack
            row['Adversary BERT Score ↑'] = f"{item['adversaryBERTscore']:.3f}"
            row['Adversary Detection Rate (%) ↑'] = f"{item['adversaryDetectionRate']:.3f}"
        
        table_data.append(row)
    
    return pd.DataFrame(table_data)

# Create table data with green arrows and reference links
def create_table_data(data, metric):
    if not data:
        return pd.DataFrame()
    
    table_data = []
    for i, item in enumerate(data, 1):
        watermark_name = item['name']
        paper_link = item.get('paperLink')
        model = item.get('model', 'N/A')
        
        # Create reference link if paper link exists (smaller text)
        if paper_link:
            reference_link = f'<a href="{paper_link}" target="_blank" style="color: #3B82F6; text-decoration: underline; font-size: 0.8em;">📄 Paper</a>'
        else:
            reference_link = '-'
        
        row = {
            'Watermark': watermark_name
        }
        
        if metric == "Attack-free":
            row['Normalized Utility ↑'] = f"{item['normalizedUtility']:.3f}"
            row['Detection Rate (%) ↑'] = f"{item['detectionRate']:.3f}"
        elif metric == "Watermark Removal":
            row['Abs Utility Degradation ↑'] = f"{item['absoluteUtilityDegregation']:.3f}"
            row['Removal Detection Rate (%) ↑'] = f"{item['removal_detectionRate']:.3f}"
        else:  # Stealing Attack
            row['Adversary BERT Score ↑'] = f"{item['adversaryBERTscore']:.3f}"
            row['Adversary Detection Rate (%) ↑'] = f"{item['adversaryDetectionRate']:.3f}"
        
        # Add Reference column at the end
        row['Reference'] = reference_link
        
        table_data.append(row)
    
    return pd.DataFrame(table_data)

# Update interface based on selections
def update_interface(model, metric):
    data = load_leaderboard_data()
    filtered_data = filter_data(data, model, metric)
    
    # Create scatter plot
    scatter_plot = create_scatter_plot(filtered_data, metric)
    
    # Create table with green arrows
    table_data = create_table_data(filtered_data, metric)
    
    return scatter_plot, table_data

# Handle form submission
def submit_watermark_data(name, model, paper_link, normalized_utility, detection_rate, 
                         absolute_utility_degradation, removal_detection_rate,
                         adversary_bert_score, adversary_detection_rate):
    """Handle watermark data submission"""
    
    # Validation
    if not name or not name.strip():
        return "❌ Error: Watermark name is required", gr.update()
    
    if not model:
        return "❌ Error: Model selection is required", gr.update()
    
    # Validate paper link if provided
    if paper_link and paper_link.strip():
        paper_link = paper_link.strip()
        if not (paper_link.startswith('http://') or paper_link.startswith('https://')):
            return "❌ Error: Paper link must start with http:// or https://", gr.update()
    else:
        paper_link = None
    
    # Check what type of submission this is based on provided fields
    has_attack_free_data = normalized_utility is not None and detection_rate is not None
    has_removal_data = absolute_utility_degradation is not None and removal_detection_rate is not None
    has_stealing_data = adversary_bert_score is not None and adversary_detection_rate is not None

    # At least one complete set of metrics must be provided
    if not has_attack_free_data and not has_removal_data and not has_stealing_data:
        return "❌ Error: Please provide at least one complete set of metrics:\n• Attack-free: Normalized Utility + Detection Rate\n• Watermark Removal: Absolute Utility Degradation + Removal Detection Rate\n• Stealing Attack: Adversary BERT Score + Adversary Detection Rate", gr.update()

    # Validate Attack-free metrics if provided
    if has_attack_free_data:
        if normalized_utility <= 0 or normalized_utility > 1.0:
            return "❌ Error: Normalized Utility must be between 0.000 and 1.000", gr.update()
        if detection_rate < 0.0 or detection_rate > 100.0:
            return "❌ Error: Detection Rate must be between 0.000 and 100.000", gr.update()

    # Validate Watermark Removal metrics if provided
    if has_removal_data:
        if absolute_utility_degradation <= 0 or absolute_utility_degradation > 1.0:
            return "❌ Error: Absolute Utility Degradation must be between 0.000 and 1.000", gr.update()
        if removal_detection_rate < 0.0 or removal_detection_rate > 100.0:
            return "❌ Error: Removal Detection Rate must be between 0.000 and 100.000", gr.update()

    # Validate Stealing Attack metrics if provided
    if has_stealing_data:
        if adversary_bert_score <= 0 or adversary_bert_score > 1.0:
            return "❌ Error: Adversary BERT Score must be between 0.000 and 1.000", gr.update()
        if adversary_detection_rate < 0.0 or adversary_detection_rate > 100.0:
            return "❌ Error: Adversary Detection Rate must be between 0.000 and 100.000", gr.update()

    # Validate partial adversary data (if one is provided, both are required)
    has_partial_adversary = (adversary_bert_score is not None and adversary_bert_score > 0) or \
                           (adversary_detection_rate is not None and adversary_detection_rate > 0)
    
    if has_partial_adversary and not has_stealing_data:
        return "❌ Error: If you provide one adversary metric, you must provide both Adversary BERT Score and Adversary Detection Rate", gr.update()
    
    # Create new entry - only include provided values, don't set missing ones to 0
    new_entry = {
        "name": name.strip(),
        "model": model,
        "normalizedUtility": normalized_utility,
        "detectionRate": detection_rate
    }
    
    # Add paper link if provided
    if paper_link:
        new_entry["paperLink"] = paper_link
    
    # Only add optional metrics if they were provided
    if absolute_utility_degradation is not None:
        new_entry["absoluteUtilityDegregation"] = absolute_utility_degradation
    if removal_detection_rate is not None:
        new_entry["removal_detectionRate"] = removal_detection_rate
    if adversary_bert_score is not None:
        new_entry["adversaryBERTscore"] = adversary_bert_score
    if adversary_detection_rate is not None:
        new_entry["adversaryDetectionRate"] = adversary_detection_rate
    
    # Load existing approved data to check for duplicates
    try:
        with open('leaderboard.json', 'r') as f:
            approved_data = json.load(f)
    except:
        approved_data = []
    
    # Check for duplicate names in approved data
    for entry in approved_data:
        if entry.get('name') == name.strip() and entry.get('model') == model:
            return f"❌ Error: A watermark named '{name.strip()}' already exists for {model}", gr.update()
    
    # Load pending submissions to check for duplicates there too
    try:
        with open('pending_submissions.json', 'r') as f:
            pending_data = json.load(f)
    except:
        pending_data = []
    
    # Check for duplicate names in pending data
    for entry in pending_data:
        if entry.get('name') == name.strip() and entry.get('model') == model:
            return f"❌ Error: A watermark named '{name.strip()}' is already pending approval for {model}", gr.update()
    
    # Add submission timestamp and status
    new_entry['submitted_at'] = datetime.now().isoformat()
    new_entry['status'] = 'pending'
    new_entry['submission_id'] = f"{name.strip()}_{model}_{int(datetime.now().timestamp())}"
    
    # Add to pending submissions instead of approved data
    pending_data.append(new_entry)
    
    # Save pending submissions
    try:
        with open('pending_submissions.json', 'w') as f:
            json.dump(pending_data, f, indent=2)
        
        # Update the interface with current approved data only
        filtered_data = filter_data(approved_data, model, "Attack-free")
        scatter_plot = create_scatter_plot(filtered_data, "Attack-free")
        table_data = create_table_data(filtered_data, "Attack-free")
        
        success_msg = f"✅ Successfully submitted '{name.strip()}' for {model} for approval! Your submission will be reviewed by the administrator before appearing on the leaderboard."
        return success_msg, scatter_plot, table_data
        
    except Exception as e:
        return f"❌ Error saving submission: {str(e)}", gr.update()

# Clear form function
def clear_form():
    return (None, None, None, None, None, None, None, None, None)

# Owner approval functions
def load_pending_submissions():
    """Load pending submissions for owner review"""
    try:
        with open('pending_submissions.json', 'r') as f:
            pending_data = json.load(f)
        
        if not pending_data:
            return pd.DataFrame(columns=["ID", "Name", "Model", "Paper Link", "Attack-free Utility", "Attack-free Detection", 
                                        "Removal Degradation", "Removal Detection", "Adversary BERT", "Adversary Detection", "Submitted At"])
        
        # Format data for display with all fields
        formatted_data = []
        for entry in pending_data:
            watermark_name = entry.get('name', 'N/A')
            paper_link = entry.get('paperLink', '-')
            model = entry.get('model', 'N/A')
            
            # Format all metric fields
            formatted_entry = {
                "ID": entry.get('submission_id', 'N/A'),
                "Name": watermark_name,
                "Model": model,
                "Paper Link": paper_link if paper_link != '-' else '-',
                "Attack-free Utility": f"{entry.get('normalizedUtility', 0):.3f}" if entry.get('normalizedUtility') is not None else '-',
                "Attack-free Detection": f"{entry.get('detectionRate', 0):.3f}" if entry.get('detectionRate') is not None else '-',
                "Removal Degradation": f"{entry.get('absoluteUtilityDegregation', 0):.3f}" if entry.get('absoluteUtilityDegregation') is not None else '-',
                "Removal Detection": f"{entry.get('removal_detectionRate', 0):.3f}" if entry.get('removal_detectionRate') is not None else '-',
                "Adversary BERT": f"{entry.get('adversaryBERTscore', 0):.3f}" if entry.get('adversaryBERTscore') is not None else '-',
                "Adversary Detection": f"{entry.get('adversaryDetectionRate', 0):.3f}" if entry.get('adversaryDetectionRate') is not None else '-',
                "Submitted At": entry.get('submitted_at', 'N/A')[:19] if entry.get('submitted_at') else 'N/A',  # Show only date and time
            }
            formatted_data.append(formatted_entry)
        
        return pd.DataFrame(formatted_data)
        
    except Exception as e:
        print(f"Error loading pending submissions: {e}")
        return pd.DataFrame(columns=["ID", "Name", "Model", "Paper Link", "Attack-free Utility", "Attack-free Detection", 
                                    "Removal Degradation", "Removal Detection", "Adversary BERT", "Adversary Detection", "Submitted At"])

def approve_submission(submission_id, admin_password):
    """Approve a pending submission"""
    # Check admin password
    if admin_password != "admin123":  # You can change this password
        return "❌ Access denied: Invalid admin password", gr.update()
    
    try:
        # Load pending submissions from file (not from the formatted function)
        try:
            with open('pending_submissions.json', 'r') as f:
                pending_data = json.load(f)
        except:
            pending_data = []
        
        # Find and remove the submission
        approved_entry = None
        for i, entry in enumerate(pending_data):
            if entry.get('submission_id') == submission_id:
                approved_entry = pending_data.pop(i)
                break
        
        if not approved_entry:
            return "❌ Submission not found", gr.update()
        
        # Remove submission metadata
        approved_entry.pop('submitted_at', None)
        approved_entry.pop('status', None)
        approved_entry.pop('submission_id', None)
        
        # Load approved data
        try:
            with open('leaderboard.json', 'r') as f:
                approved_data = json.load(f)
        except:
            approved_data = []
        
        # Add to approved data
        approved_data.append(approved_entry)
        
        # Save approved data
        with open('leaderboard.json', 'w') as f:
            json.dump(approved_data, f, indent=2)
        
        # Save updated pending data
        with open('pending_submissions.json', 'w') as f:
            json.dump(pending_data, f, indent=2)
        
        return f"✅ Approved submission: {approved_entry.get('name', 'Unknown')}", load_pending_submissions()
        
    except Exception as e:
        return f"❌ Error approving submission: {str(e)}", gr.update()

def reject_submission(submission_id, admin_password):
    """Reject a pending submission"""
    # Check admin password
    if admin_password != "admin123":  # You can change this password
        return "❌ Access denied: Invalid admin password", gr.update()
    
    try:
        # Load pending submissions from file (not from the formatted function)
        try:
            with open('pending_submissions.json', 'r') as f:
                pending_data = json.load(f)
        except:
            pending_data = []
        
        # Find and remove the submission
        rejected_entry = None
        for i, entry in enumerate(pending_data):
            if entry.get('submission_id') == submission_id:
                rejected_entry = pending_data.pop(i)
                break
        
        if not rejected_entry:
            return "❌ Submission not found", gr.update()
        
        # Save updated pending data
        with open('pending_submissions.json', 'w') as f:
            json.dump(pending_data, f, indent=2)
        
        return f"❌ Rejected submission: {rejected_entry.get('name', 'Unknown')}", load_pending_submissions()
        
    except Exception as e:
        return f"❌ Error rejecting submission: {str(e)}", gr.update()

# Toggle add data section visibility
def toggle_add_data_section(section):
    return gr.update(visible=not section.visible)

# Create the main interface
def create_interface():
    # Custom CSS for better styling
    css = """
    .gradio-container {
        max-width: 1200px !important;
        margin: 0 auto !important;
        background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
        min-height: 100vh;
    }
    .title {
        text-align: center;
        margin: 20px 0;
        font-size: 3rem;
        font-weight: bold;
        background: linear-gradient(45deg, #667eea 0%, #764ba2 100%);
        -webkit-background-clip: text;
        -webkit-text-fill-color: transparent;
        background-clip: text;
        text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
    }
    .subtitle {
        text-align: center;
        margin-bottom: 30px;
        font-size: 1.3rem;
        color: #4a5568;
        font-weight: 500;
    }
    .controls {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        padding: 30px;
        border-radius: 15px;
        margin-bottom: 25px;
        box-shadow: 0 8px 32px rgba(0,0,0,0.1);
        border: 1px solid rgba(255,255,255,0.2);
    }
    .controls label {
        color: white !important;
        font-weight: bold !important;
        font-size: 1.2rem !important;
    }
    .controls .gr-radio {
        background: rgba(255,255,255,0.1) !important;
        border-radius: 10px !important;
        padding: 12px !important;
    }
    .controls .gr-radio label {
        color: white !important;
        font-size: 1.1rem !important;
    }
    .controls h3 {
        font-size: 1.4rem !important;
        margin-bottom: 15px !important;
    }
    #highlighted-add-data {
        background: linear-gradient(135deg, #E0F2FE 0%, #B3E5FC 100%) !important;
        border: 2px solid #81D4FA !important;
        border-radius: 15px !important;
        box-shadow: 0 10px 40px rgba(129, 212, 250, 0.3) !important;
        margin: 20px 0 !important;
    }
    #highlighted-add-data .gr-accordion-header {
        background: linear-gradient(135deg, #81D4FA 0%, #4FC3F7 100%) !important;
        color: white !important;
        font-weight: bold !important;
        font-size: 1.2rem !important;
        padding: 15px 20px !important;
        border-radius: 15px 15px 0 0 !important;
    }
    #highlighted-add-data .gr-accordion-content {
        background: rgba(255,255,255,0.95) !important;
        border-radius: 0 0 15px 15px !important;
        padding: 25px !important;
    }
    .gr-button {
        border-radius: 10px !important;
        font-weight: bold !important;
        transition: all 0.3s ease !important;
    }
    .gr-button:hover {
        transform: translateY(-2px) !important;
        box-shadow: 0 5px 15px rgba(0,0,0,0.2) !important;
    }
    .gr-plot {
        border-radius: 15px !important;
        box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important;
        background: white !important;
        padding: 20px !important;
    }
    .gr-dataframe {
        border-radius: 15px !important;
        box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important;
        background: white !important;
        overflow: hidden !important;
    }
    .gr-accordion {
        border-radius: 15px !important;
        box-shadow: 0 8px 32px rgba(0,0,0,0.1) !important;
        background: white !important;
        margin: 15px 0 !important;
    }
    .gr-accordion-header {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
        color: white !important;
        font-weight: bold !important;
        padding: 15px 20px !important;
        border-radius: 15px 15px 0 0 !important;
    }
    .gr-accordion-content {
        background: rgba(255,255,255,0.95) !important;
        border-radius: 0 0 15px 15px !important;
        padding: 20px !important;
    }
    #submit-btn {
        background: linear-gradient(135deg, #29B6F6 0%, #0288D1 100%) !important;
        border: 2px solid #0277BD !important;
        color: white !important;
        font-weight: bold !important;
        font-size: 1.1rem !important;
        padding: 15px 30px !important;
        border-radius: 12px !important;
        box-shadow: 0 8px 25px rgba(41, 182, 246, 0.4) !important;
        transition: all 0.3s ease !important;
    }
       #submit-btn:hover {
           background: linear-gradient(135deg, #0288D1 0%, #0277BD 100%) !important;
           transform: translateY(-3px) !important;
           box-shadow: 0 12px 35px rgba(41, 182, 246, 0.6) !important;
       }
       #owner-controls {
           background: linear-gradient(135deg, #FFE0E0 0%, #FFCDD2 100%) !important;
           border: 2px solid #FF5722 !important;
           border-radius: 15px !important;
           box-shadow: 0 10px 40px rgba(255, 87, 34, 0.3) !important;
           margin: 20px 0 !important;
       }
       #owner-controls .gr-accordion-header {
           background: linear-gradient(135deg, #FF5722 0%, #D32F2F 100%) !important;
           color: white !important;
           font-weight: bold !important;
           font-size: 1.2rem !important;
           padding: 15px 20px !important;
           border-radius: 15px 15px 0 0 !important;
       }
       #owner-controls .gr-accordion-content {
           background: rgba(255,255,255,0.95) !important;
           border-radius: 0 0 15px 15px !important;
           padding: 25px !important;
       }
       #approve-btn {
           background: linear-gradient(135deg, #4CAF50 0%, #2E7D32 100%) !important;
           border: 2px solid #388E3C !important;
           color: white !important;
           font-weight: bold !important;
           font-size: 1.1rem !important;
           padding: 15px 30px !important;
           border-radius: 12px !important;
           box-shadow: 0 8px 25px rgba(76, 175, 80, 0.4) !important;
           transition: all 0.3s ease !important;
       }
       #approve-btn:hover {
           background: linear-gradient(135deg, #2E7D32 0%, #1B5E20 100%) !important;
           transform: translateY(-3px) !important;
           box-shadow: 0 12px 35px rgba(76, 175, 80, 0.6) !important;
       }
       #reject-btn {
           background: linear-gradient(135deg, #F44336 0%, #C62828 100%) !important;
           border: 2px solid #D32F2F !important;
           color: white !important;
           font-weight: bold !important;
           font-size: 1.1rem !important;
           padding: 15px 30px !important;
           border-radius: 12px !important;
           box-shadow: 0 8px 25px rgba(244, 67, 54, 0.4) !important;
           transition: all 0.3s ease !important;
       }
       #reject-btn:hover {
           background: linear-gradient(135deg, #C62828 0%, #B71C1C 100%) !important;
           transform: translateY(-3px) !important;
           box-shadow: 0 12px 35px rgba(244, 67, 54, 0.6) !important;
       }
       #guideline-section {
           background: linear-gradient(135deg, #E8F5E8 0%, #C8E6C9 100%) !important;
           border: 2px solid #4CAF50 !important;
           border-radius: 15px !important;
           box-shadow: 0 10px 40px rgba(76, 175, 80, 0.3) !important;
           margin: 20px 0 !important;
       }
       #guideline-section .gr-accordion-header {
           background: linear-gradient(135deg, #4CAF50 0%, #2E7D32 100%) !important;
           color: white !important;
           font-weight: bold !important;
           font-size: 1.2rem !important;
           padding: 15px 20px !important;
           border-radius: 15px 15px 0 0 !important;
       }
       #guideline-section .gr-accordion-content {
           background: rgba(255,255,255,0.95) !important;
           border-radius: 0 0 15px 15px !important;
           padding: 25px !important;
       }
    """
    
    with gr.Blocks(css=css, title="Watermark Leaderboard for LLMs") as demo:
        # Header
        gr.HTML("""
        <div class="title">
            🏆 Watermark Leaderboard for LLMs 🏆
        </div>
        <div class="subtitle">
            📊 Interactive leaderboard for comparing watermark performance across different models and evaluation settings
        </div>
        """)
        
        # Controls
        with gr.Row():
            with gr.Column(scale=1):
                gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #667eea; margin: 0; font-weight: bold;'>🤖 Model Selection</h3></div>")
                model_selector = gr.Radio(
                    choices=["LLaMA3", "DeepSeek"],
                    value="LLaMA3",
                    label="Model",
                    info="Select the model to display"
                )
            with gr.Column(scale=1):
                gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #667eea; margin: 0; font-weight: bold;'>⚙️ Evaluation Setting</h3></div>")
                metric_selector = gr.Radio(
                    choices=["Attack-free", "Watermark Removal", "Stealing Attack"],
                    value="Attack-free",
                    label="Setting",
                    info="Select the evaluation setting"
                )
        
        
        # Add Your Data Section (Highlighted)
        with gr.Accordion("🚀 Add Your Data to the Leaderboard", open=False, elem_id="highlighted-add-data"):
            gr.HTML("""
            <div style='text-align: center; margin-bottom: 20px;'>
                <h2 style='color: #0277BD; margin: 0; font-size: 1.5rem;'>📝 Submit Your Watermark Performance Results</h2>
                <p style='color: #374151; margin: 10px 0 0 0;'>Contribute to the community by sharing your watermark evaluation results</p>
            </div>
            <div style='background: #E3F2FD; border: 1px solid #2196F3; border-radius: 8px; padding: 15px; margin-bottom: 20px;'>
                <h4 style='color: #1976D2; margin: 0 0 10px 0;'>📋 Submission Requirements</h4>
                <p style='color: #374151; margin: 0 0 8px 0;'>Provide at least one complete set of metrics:</p>
                <ul style='color: #374151; margin: 0; padding-left: 20px;'>
                    <li><strong>Attack-free:</strong> Normalized Utility + Detection Rate</li>
                    <li><strong>Watermark Removal:</strong> Absolute Utility Degradation + Removal Detection Rate</li>
                    <li><strong>Stealing Attack:</strong> Adversary BERT Score + Adversary Detection Rate</li>
                </ul>
            </div>
            """)
            with gr.Row():
                with gr.Column(scale=1):
                    # Basic Information
                    gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #0277BD; margin: 0;'>📋 Basic Information</h3></div>")
                    watermark_name = gr.Textbox(
                        label="Watermark Name",
                        placeholder="e.g., MyWatermark, Watermark-X",
                        info="Unique identifier for your watermark"
                    )
                    paper_link = gr.Textbox(
                        label="Paper Link (Optional)",
                        placeholder="https://arxiv.org/abs/xxxx.xxxxx or https://...",
                        info="Link to the paper describing this watermark method"
                    )
                    submission_model = gr.Radio(
                        choices=["LLaMA3", "DeepSeek"],
                        label="Model",
                        value="LLaMA3",
                        info="Select the model used"
                    )
                    
                with gr.Column(scale=1):
                    # Attack-free Metrics (Optional)
                    gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #0277BD; margin: 0;'>⚡ Attack-free Metrics (Optional - Both Required if One is Provided)</h3></div>")
                    normalized_utility = gr.Number(
                        label="Normalized Utility",
                        value=None,
                        minimum=0.0,
                        maximum=1.0,
                        step=0.001,
                        info="Text quality metric (0.000 - 1.000)"
                    )
                    detection_rate = gr.Number(
                        label="Detection Rate (%)",
                        value=None,
                        minimum=0.0,
                        maximum=100.0,
                        step=0.001,
                        info="Watermark detection accuracy (0.000 - 100.000%)"
                    )
            
            with gr.Row():
                with gr.Column(scale=1):
                    # Watermark Removal Metrics (Optional)
                    gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #0277BD; margin: 0;'>🛡️ Watermark Removal (Optional)</h3></div>")
                    absolute_utility_degradation = gr.Number(
                        label="Absolute Utility Degradation",
                        value=None,
                        minimum=0.0,
                        maximum=1.0,
                        step=0.001,
                        info="Resistance to removal attacks (0.000 - 1.000)"
                    )
                    removal_detection_rate = gr.Number(
                        label="Removal Detection Rate (%)",
                        value=None,
                        minimum=0.0,
                        maximum=100.0,
                        step=0.001,
                        info="Detection rate under removal attacks (0.000 - 100.000%)"
                    )
                
                with gr.Column(scale=1):
                    # Stealing Attack Metrics (Optional)
                    gr.HTML("<div style='text-align: center; margin-bottom: 15px;'><h3 style='color: #0277BD; margin: 0;'>🎯 Stealing Attack (Optional)</h3></div>")
                    adversary_bert_score = gr.Number(
                        label="Adversary BERT Score",
                        value=None,
                        minimum=0.0,
                        maximum=1.0,
                        step=0.001,
                        info="Performance under adversarial conditions (0.000 - 1.000)"
                    )
                    adversary_detection_rate = gr.Number(
                        label="Adversary Detection Rate (%)",
                        value=None,
                        minimum=0.0,
                        maximum=100.0,
                        step=0.001,
                        info="Detection rate under adversarial attacks (0.000 - 100.000%)"
                    )
            
            # Submit and Clear buttons
            with gr.Row():
                with gr.Column(scale=1):
                    submit_btn = gr.Button(
                        "🚀 Submit Data to Leaderboard",
                        variant="primary",
                        size="lg",
                        elem_id="submit-btn"
                    )
                with gr.Column(scale=1):
                    clear_btn = gr.Button(
                        "🗑️ Clear Form",
                        variant="secondary",
                        size="lg"
                    )
            
            # Status message
            status_message = gr.Markdown("", visible=True)

        
        # Scatter Plot
        scatter_plot = gr.Plot(
            label="Performance Scatter Plot",
            show_label=True
        )
        
        # Table
        table = gr.DataFrame(
            label="Performance Table",
            show_label=True,
            interactive=False,
            wrap=True
        )
        
        # Guideline and Metrics Explained Section (At bottom with light green background)
        with gr.Accordion("📋 Guideline for Submitting Watermark Performance Results", open=False, elem_id="guideline-section"):
            gr.HTML("""
            <div style="padding: 20px;">
                <h3>Guideline for Submitting Watermark Performance Results</h3>
                <h4>1. Datasets</h4>
                <ul>
                    <li><strong>Text Generation (C4 dataset)</strong>
                        <ul>
                            <li>Training: first 20,000 samples</li>
                            <li>Testing: 13,860 samples</li>
                            <li>Reference script: <code>Files/Reproducibility/C4_dataset_download.py</code></li>
                        </ul>
                    </li>
                    <li><strong>Text Summarization (CNN/Daily Mail dataset)</strong>
                        <ul>
                            <li>Training: first 10,000–20,000 samples</li>
                            <li>Testing: 1,000 samples</li>
                            <li>Reference script: <code>Files/Reproducibility/CNN_dataset_download.py</code></li>
                        </ul>
                    </li>
                </ul>
                <h4>2. Models</h4>
                <ul>
                    <li>Use open-source models available on Hugging Face:
                        <ul>
                            <li>DeepSeek: "deepseek-ai/deepseek-llm-7b-base"</li>
                            <li>LLaMA-3: "meta-llama/Meta-Llama-3-8B"</li>
                        </ul>
                    </li>
                </ul>
                <h4>3. Evaluation Settings</h4>
                <ul>
                    <li><strong>(a) Attack-Free Setting</strong>
                        <ul>
                            <li>Generate 13,860 watermarked outputs on the C4 test set.</li>
                            <li>Report: Detection Rate and Normalized Utility (see Metrics).</li>
                        </ul>
                    </li>
                    <li><strong>(b) Watermark Removal Setting</strong>
                        <ul>
                            <li>Apply Dipper to paraphrase watermarked outputs.</li>
                            <li>Report:
                                <ul>
                                    <li>Detection Rate after attack</li>
                                    <li>Normalized Utility after attack</li>
                                    <li>Absolute Utility Degradation (difference before vs. after attack)</li>
                                </ul>
                            </li>
                            <li>Reference scripts: <code>Files/Reproducibility/Attack_dipper.py</code></li>
                        </ul>
                    </li>
                    <li><strong>(c) Stealing Attack Setting</strong>
                        <ul>
                            <li>Generate 20,000 watermarked samples for training a surrogate model using LoRA.</li>
                            <li>Use the surrogate model for summarization on 1,000 test samples.</li>
                            <li>Report: Detection Rate and Normalized Utility on the surrogate's outputs.</li>
                            <li>Reference scripts: <code>Files/Reproducibility/Finetune_sum.py</code>, <code>Files/Reproducibility/Inference_sum.py</code></li>
                        </ul>
                    </li>
                </ul>
                <h4>4. Metrics</h4>
                <ul>
                    <li><strong>Detection Rate</strong>
                        <ul>
                            <li>Average accuracy across the test set (e.g., 13,860 examples for text generation).</li>
                            <li>Use your own detector implementation.</li>
                        </ul>
                    </li>
                    <li><strong>Normalized Utility</strong>
                        <ul>
                            <li>Defined as the mean of:</li>
                            <li>BERTScore (<code>Files/Reproducibility/BERT_score.py</code>)</li>
                            <li>Entity Similarity Score (<code>Files/Reproducibility/Entity_similarity_score.py</code>)</li>
                        </ul>
                    </li>
                    <li><strong>Absolute Utility Degradation</strong>
                        <ul>
                            <li>The absolute change in Normalized Utility between attack-free and attacked outputs.</li>
                        </ul>
                    </li>
                </ul>
                <h4>5. Submission</h4>
                <ul>
                    <li>You may submit results for one or more evaluation settings (Attack-Free, Removal, Stealing).</li>
                    <li>Please include:
                        <ul>
                            <li>Model(s) evaluated</li>
                            <li>Dataset(s) used</li>
                            <li>Scripts/configuration details if modified</li>
                            <li>Reported metrics in the required format</li>
                        </ul>
                    </li>
                </ul>
                <p><strong>Reproducibility codes are available in the Files tab of this Space.</strong></p>
            </div>
            """)
        
        # Owner Approval Section (At the very bottom)
        with gr.Accordion("🔒 Owner Controls - Pending Submissions", open=False, elem_id="owner-controls"):
            gr.HTML("""
            <div style='text-align: center; margin-bottom: 20px;'>
                <h2 style='color: #D32F2F; margin: 0; font-size: 1.5rem;'>🛡️ Administrator Approval Panel</h2>
                <p style='color: #374151; margin: 10px 0 0 0;'>Review and approve pending submissions before they appear on the leaderboard</p>
            </div>
            """)
            
            # Pending submissions table
            pending_table = gr.DataFrame(
                label="📋 Pending Submissions",
                show_label=True,
                interactive=False,
                wrap=True,
                headers=["ID", "Name", "Model", "Paper Link", "Attack-free Utility", "Attack-free Detection", 
                        "Removal Degradation", "Removal Detection", "Adversary BERT", "Adversary Detection", "Submitted At"]
            )
            
            # Admin authentication
            admin_password_input = gr.Textbox(
                label="🔐 Admin Password",
                placeholder="Enter admin password to access controls",
                type="password",
                info="Required for approval/rejection actions"
            )
            
            # Approval controls
            with gr.Row():
                with gr.Column(scale=1):
                    submission_id_input = gr.Textbox(
                        label="Submission ID",
                        placeholder="Enter submission ID to approve/reject",
                        info="Copy from the pending submissions table"
                    )
                    approve_btn = gr.Button(
                        "✅ Approve Submission",
                        variant="primary",
                        size="lg",
                        elem_id="approve-btn"
                    )
                with gr.Column(scale=1):
                    reject_btn = gr.Button(
                        "❌ Reject Submission",
                        variant="stop",
                        size="lg",
                        elem_id="reject-btn"
                    )
                    refresh_pending_btn = gr.Button(
                        "🔄 Refresh Pending",
                        variant="secondary",
                        size="lg"
                    )
            
            approval_status = gr.Markdown("", visible=True)
        
        # Event handlers
        model_selector.change(
            fn=update_interface,
            inputs=[model_selector, metric_selector],
            outputs=[scatter_plot, table]
        )
        
        metric_selector.change(
            fn=update_interface,
            inputs=[model_selector, metric_selector],
            outputs=[scatter_plot, table]
        )
        
        # Form submission handler
        submit_btn.click(
            fn=submit_watermark_data,
            inputs=[
                watermark_name,
                submission_model,
                paper_link,
                normalized_utility,
                detection_rate,
                absolute_utility_degradation,
                removal_detection_rate,
                adversary_bert_score,
                adversary_detection_rate
            ],
            outputs=[status_message, scatter_plot, table]
        )
        
        # Clear form handler
        clear_btn.click(
            fn=clear_form,
            outputs=[
                watermark_name,
                paper_link,
                submission_model,
                normalized_utility,
                detection_rate,
                absolute_utility_degradation,
                removal_detection_rate,
                adversary_bert_score,
                adversary_detection_rate
            ]
        )
        
        # Add data button handler
        # The add_data_button is removed, so this handler is no longer needed.
        # The highlighted section is now always visible.
        
        # Owner approval event handlers
        approve_btn.click(
            fn=approve_submission,
            inputs=[submission_id_input, admin_password_input],
            outputs=[approval_status, pending_table]
        )
        
        reject_btn.click(
            fn=reject_submission,
            inputs=[submission_id_input, admin_password_input],
            outputs=[approval_status, pending_table]
        )
        
        refresh_pending_btn.click(
            fn=load_pending_submissions,
            outputs=[pending_table]
        )
        
        # Initial load
        demo.load(
            fn=lambda: update_interface("LLaMA3", "Attack-free"),
            outputs=[scatter_plot, table]
        )
        
        # Load pending submissions on startup
        demo.load(
            fn=load_pending_submissions,
            outputs=[pending_table]
        )
        
        # Clear admin password after actions for security
        def clear_admin_password():
            return gr.update(value="")
        
        # Clear password after approve/reject actions
        approve_btn.click(
            fn=clear_admin_password,
            outputs=[admin_password_input]
        )
        
        reject_btn.click(
            fn=clear_admin_password,
            outputs=[admin_password_input]
        )
    
    return demo

# Create and launch the interface
if __name__ == "__main__":
    demo = create_interface()
    demo.launch()