dfd-arena-leaderboard

Running

App Files Files Community

dfd-arena-leaderboard / app.py

andrew-bitmind

Checklist new line

6e1b7c0 verified about 1 year ago

raw

history blame contribute delete

12.3 kB

	import gradio as gr
	import pandas as pd
	import requests
	from huggingface_hub import HfApi, create_repo, upload_file, hf_hub_download
	from apscheduler.schedulers.background import BackgroundScheduler
	from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
	import os

	# Hugging Face API setup
	HF_TOKEN = os.getenv("HF_TOKEN") # This should be set as an environment variable

	api = HfApi(token=HF_TOKEN)

	# Deepfake detector data
	data_avg_performance = {
	"Detector": ["NPR", "UCF", "CAMO"],
	"Accuracy": [0.7169, 0.7229, 0.7555],
	"Precision": [0.9193, 0.9436, 0.9442],
	"Recall": [0.5996, 0.592, 0.647],
	"F1-Score": [0.7258, 0.7275, 0.7679],
	"MCC": [0.5044, 0.5285, 0.5707],
	}

	data_dataset_accuracy = {
	"Detector": ["NPR", "UCF", "CAMO"],
	"CelebA-HQ": [0.987, 0.995, 0.999],
	"Flickr30k": [0.916, 0.981, 0.979],
	"ImageNet": [0.834, 0.847, 0.831],
	"DiffusionDB": [0.876, 0.85, 0.961],
	"CelebA-HQ-SDXL": [0.386, 0.484, 0.682],
	"CelebA-HQ-Flux": [0.846, 0.794, 0.722],
	"Flickr30k-SDXL": [0.302, 0.256, 0.28],
	"MS-COCO-Flux": [0.588, 0.576, 0.59],
	}

	# Convert data to DataFrames
	df_avg_performance = pd.DataFrame(data_avg_performance)
	df_dataset_accuracy = pd.DataFrame(data_dataset_accuracy)

	# Ensure correct data types
	df_avg_performance['Detector'] = df_avg_performance['Detector'].astype(str)
	df_avg_performance[['Accuracy', 'Precision', 'Recall', 'F1-Score', 'MCC']] = df_avg_performance[
	['Accuracy', 'Precision', 'Recall', 'F1-Score', 'MCC']
	].apply(pd.to_numeric)

	def init_leaderboard():
	if df_avg_performance.empty:
	raise ValueError("Leaderboard DataFrame is empty.")

	# Display average performance leaderboard only
	leaderboard = Leaderboard(
	value=df_avg_performance,
	datatype=['str', 'number', 'number', 'number', 'number', 'number'],
	select_columns=SelectColumns(
	default_selection=["Detector", "Accuracy", "Precision", "Recall", "F1-Score", "MCC"],
	label="Select Columns to Display:"
	),
	search_columns=["Detector"],
	filter_columns=[
	ColumnFilter("Detector", type="checkboxgroup", label="Detectors"),
	],
	bool_checkboxgroup_label="Hide detectors",
	interactive=False,
	)
	return leaderboard

	# Function to validate if a file exists in the Hugging Face repo
	def check_file_in_repo(repo_id, file_path):
	try:
	# List the repo contents and check if the file exists
	contents = api.list_repo_files(repo_id)
	return file_path in contents
	except Exception as e:
	return False

	# Function to handle form submission and validate inputs
	def submit_detector(detector_name, model_repo, detector_file_path, configs_file_path):
	# Check if the Hugging Face repository exists
	try:
	repo_info = api.repo_info(model_repo)
	except Exception as e:
	return f"Repository '{model_repo}' not found. Error: {e}"

	# Validate the existence of the .py file and weights file in the repo
	if not check_file_in_repo(model_repo, detector_file_path):
	return f"The file '{detector_file_path}' was not found in the repository."

	if not check_file_in_repo(model_repo, configs_file_path):
	return f"The configs file '{configs_file_path}' was not found in the repository."

	# Prepare the data to push to the Hugging Face dataset
	submission_data = {
	"detector_name": detector_name,
	"model_repo": model_repo,
	"detector_file_path": detector_file_path,
	"configs_file_path": configs_file_path,
	"passed_invocation_test": "Pending",
	"evaluation_status": "Under Review" # Set evaluated to False by default
	}
	df_submission = pd.DataFrame([submission_data])

	dataset_repo_id = "bitmind/dfd-arena-detectors"
	# Push to the dataset or create it if it doesn't exist
	try:
	# Create the dataset if it doesn't exist
	create_repo(repo_id=dataset_repo_id,
	token=HF_TOKEN,
	repo_type="dataset",
	exist_ok=True)

	# Try to download the existing dataset
	existing_dataset_path = hf_hub_download(
	repo_id=dataset_repo_id,
	filename="submissions/submission.csv",
	token=HF_TOKEN,
	repo_type="dataset"
	)
	# If the file is found, load the existing dataset into a DataFrame
	df_existing = pd.read_csv(existing_dataset_path)
	print(df_existing)
	print(df_submission)
	# Check if the submission is already in the dataset (exact match)
	is_in_submissions = (df_existing == df_submission.iloc[0]).all(axis=1).any()
	if not is_in_submissions:
	# If no duplicate is found, append the new submission
	df_updated = pd.concat([df_existing, df_submission], ignore_index=True)
	else:
	return "Duplicate submission: This submission already exists in the dataset."

	# Append the new submission data
	df_updated = pd.concat([df_existing, df_submission], ignore_index=True)
	except Exception as e:
	# If the dataset doesn't exist, create a new DataFrame with the submission data
	df_updated = pd.DataFrame([submission_data])

	# Save the updated (or new) DataFrame as a CSV file
	submission_file = "submission.csv"
	df_updated.to_csv(submission_file, index=False)

	# Upload the updated (or new) file to the Hugging Face repository
	try:
	upload_file(
	path_or_fileobj=submission_file,
	path_in_repo="submissions/submission.csv", # Location in the repo
	repo_id=dataset_repo_id,
	token=HF_TOKEN,
	repo_type="dataset",
	commit_message="Submitted for review: "+detector_name
	)
	return "Submission successful!"
	except Exception as e:
	return f"Failed to push submission: {str(e)}"

	# Gradio demo
	with gr.Blocks(theme=gr.themes.Base()) as demo:
	# Title
	gr.HTML("""
	<div style="text-align:center;">
	<h1> Deepfake Detection Arena (DFD) Leaderboard</h1>
	</div>
	""")

	# Description/Intro Section
	gr.Markdown("""
	## 🎯 The Open Benchmark for Detecting AI-Generated Images
	[DFD-Arena](https://github.com/BitMind-AI/dfd-arena) is the first benchmark to address the open-source computer vision community's need for a comprehensive evaluation framework for state-of-the-art (SOTA) detection of AI-generated images.

	While [previous studies](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9721302) have focused on benchmarking the SOTA on content-specific subsets of the deepfake detection problem, e.g. human face deepfake benchmarking via [DeepfakeBench](https://github.com/SCLBD/DeepfakeBench), these benchmarks do not adequately account for the broad spectrum of real and generated image types seen in everyday scenarios.

	### 🔍 Explore DFD-Arena

	Learn how the framework evaluates on diverse, content-rich images with semantic balance between real and generated data:

	- 📂 [Code: DFD-Arena GitHub Repository](https://github.com/BitMind-AI/dfd-arena)

	- 📝 [Blog: Technical Write-Up](https://bitmindlabs.notion.site/BitMind-Deepfake-Detection-Arena-106af85402838007830ece5a6f3f35a8?pvs=25)
	### ✍️ Authorship

	Both DFD-Arena and novel synthetic image datasets used for evaluation are created by [BitMind](https://www.bitmind.ca/).

	- 🐦 [X/Twitter: @BitMindAI](https://x.com/BitMindAI)
	""")

	with gr.Tabs():
	with gr.TabItem("🏅 Deepfake Detection Arena"):
	# Add text for Average Performance Metrics
	gr.Markdown("## Average Performance Metrics")
	leaderboard = init_leaderboard()

	# Add dataset-specific accuracy
	gr.Markdown("## Dataset-specific Accuracy")
	gr.DataFrame(value=df_dataset_accuracy)

	# # Submit Detector Results Form
	with gr.TabItem("🚀 Submit Detector Results"):
	# Add submission instructions
	gr.Markdown("""
	## Submission Instructions

	<h3>🏗️ 1. Ensure that your detector code follows the DFD Arena detectors format.</h3>

	The best way to guarantee compatibility is to develop and test your detector within a local copy of the [DFD Arena repository](https://github.com/BitMind-AI/dfd-arena/tree/main), with dependencies, detector file, and configs in relative locations similar to how we structured our implementations of UCF, NPR, and CAMO detectors.

	✅ Check list:
	- Your code should work with detector dependencies (architecture and training code) imported from a dependencies directory a level above the detector directory.
	- e.g., `arena/detectors/deepfake_detectors/ucf_detector.py` relies on a dependency folder at `arena/detectors/UCF/`
	- Our automated benchmarking pipeline will reconstruct the required directory at evaluation time
	- Implement a `.py` file in `arena/detectors/deepfake_detectors/` containing a `DeepfakeDetector` subclass with PascalCase naming convention, registered as a module in the dfd-arena `DETECTOR_REGISTRY`.
	- e.g., in `myCustomDetector.py`,
	```python
	@DETECTOR_REGISTRY.register_module(module_name='MyCustomModuleName')
	class MyCustomDetector(DeepfakeDetector):
	# implementation
	```
	- The module name should match the detector name you want to appear on the leaderboard
	- Create a config YAML file that the DeepfakeDetector loads in `arena/detectors/deepfake_detectors/configs/`.


	<h3>🤗 2. Create a HuggingFace model repo with the detector `.py` file, config `.yaml`, and dependencies in the same root level.</h3>

	[Check out our Sample Leaderboard Submission Repo for Reference!](https://huggingface.co/caliangandrew/submit_test/tree/main)


	<h3>📤 3. Fill out the form below with the correct paths and submit!</h3>

	The results will be processed after a code review by the BitMind team, and an automated test/benchmarking stage.


	⚠️ Note: The Detector Name must match the name of the registered detector module in the dfd arena detector registry. This will be the name of your detector on our leaderboard.
	- For example, using the [Sample Submission Repo](https://huggingface.co/caliangandrew/submit_test/tree/main) provided, you would submit:
	- Detector Name: `test`
	- Hugging Face Model Repo: `caliangandrew/submit_test`
	- Path to detector `.py`: `test_detector.py`
	- Path to config `.YAML`: `test_config.yaml`

	You can monitor your submission via the companion [detectors](https://huggingface.co/datasets/bitmind/dfd-arena-detectors) and [results](https://huggingface.co/datasets/bitmind/dfd-arena-results) datasets.
	""")

	with gr.Row():
	detector_name = gr.Textbox(label="Detector Name", placeholder="e.g., MyDetector")
	model_repo = gr.Textbox(label="Hugging Face Model Repo", placeholder="e.g., username/repo-name")
	detector_file_path = gr.Textbox(label="Path to detector .py", placeholder="e.g., my_detector.py")
	configs_file_path = gr.Textbox(label="Path to config .YAML", placeholder="e.g., my_config.yaml")

	submit_button = gr.Button("Submit",interactive=True)
	output = gr.Textbox(label="Submission Result",interactive=True)

	submit_button.click(fn=submit_detector, inputs=[detector_name, model_repo, detector_file_path, configs_file_path], outputs=output)

	# Launch the Gradio app
	demo.queue(default_concurrency_limit=40).launch()