Spaces:

Bekhouche
/

ImageNet-1k_leaderboard

Running

App Files Files Community

ImageNet-1k_leaderboard / app.py

Bekhouche

fix models path

9e36d13 7 months ago

raw

history blame contribute delete

11.9 kB

	import gradio as gr
	import pandas as pd
	import json
	import os
	from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS, LEADERBOARD_CSS
	from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
	from utils_display import AutoEvalColumn, fields, make_clickable_model, make_clickable_paper, styled_error, styled_message, get_imagenet_columns
	import numpy as np
	from datetime import datetime, timezone

	def get_last_updated_date(file_path):
	"""Get the last modification date of a file and format it nicely"""
	try:
	timestamp = os.path.getmtime(file_path)
	dt = datetime.fromtimestamp(timestamp)
	# Format as "Month DDth YYYY"
	day = dt.day
	if 4 <= day <= 20 or 24 <= day <= 30:
	suffix = "th"
	else:
	suffix = ["st", "nd", "rd"][day % 10 - 1] if day % 10 in [1, 2, 3] else "th"

	return dt.strftime(f"%b {day}{suffix} %Y")
	except OSError:
	return "Unknown"

	def load_models_list(json_path):
	"""Load models list with paper and year information"""
	with open(json_path, 'r') as f:
	models_list = json.load(f)

	# Create a dictionary for quick lookup
	models_dict = {}
	for model in models_list:
	models_dict[model['path']] = {
	'paper': model['paper'],
	'year': model['year'],
	'license': model['license']
	}
	return models_dict

	def read_jsonl_to_dataframe(jsonl_path, models_dict):
	"""Read JSONL file and convert to pandas DataFrame with proper unit conversions and model info"""
	data = []
	with open(jsonl_path, 'r') as f:
	for line in f:
	if line.strip(): # Skip empty lines
	record = json.loads(line.strip())

	# Convert units to match expected column names
	if 'parameters' in record:
	record['parameters_millions'] = record['parameters'] / 1_000_000

	if 'flops' in record:
	record['flops_giga'] = record['flops'] / 1_000_000_000

	if 'model_size' in record:
	record['model_size_mb'] = record['model_size'] / (1024 * 1024)

	# Add paper and year information if model exists in models list
	model_name = record.get('model', '')
	if model_name in models_dict:
	record['paper'] = models_dict[model_name]['paper']
	record['year'] = str(models_dict[model_name]['year'])
	# Use license from models_dict if available, otherwise keep existing
	if 'license' not in record:
	record['license'] = models_dict[model_name]['license']
	else:
	# Set default values for models not in the list
	record['paper'] = "N/A"
	record['year'] = "N/A"

	data.append(record)
	return pd.DataFrame(data)

	# Column names mapping for ImageNet-1k leaderboard
	column_names = {
	"model": "Model",
	"top1_accuracy": "Top-1 Accuracy ⬆️",
	"top5_accuracy": "Top-5 Accuracy ⬆️",
	"parameters_millions": "Parameters (M)",
	"flops_giga": "FLOPs (G)",
	"model_size_mb": "Model Size (MB)",
	"paper": "Paper",
	"year": "Year",
	"license": "License"
	}

	eval_queue_repo, requested_models, jsonl_results, _ = load_all_info_from_dataset_hub()

	if not jsonl_results.exists():
	raise Exception(f"JSONL file {jsonl_results} does not exist locally")

	# Load models list with paper and year information
	models_dict = load_models_list("models_list.json")

	# Get jsonl with data and parse columns
	original_df = read_jsonl_to_dataframe(jsonl_results, models_dict)

	# Get last updated date from the jsonl file
	LAST_UPDATED = get_last_updated_date(jsonl_results)

	# Formats the columns
	def formatter(x):
	if type(x) is str:
	x = x
	elif x == -1 or pd.isna(x):
	x = "NA"
	else:
	x = round(x, 2)
	return x

	# Select only the columns we want to display in the final table
	display_columns = ['model', 'top1_accuracy', 'top5_accuracy', 'parameters_millions',
	'flops_giga', 'model_size_mb', 'year','paper']

	# Filter dataframe to only include display columns that exist
	available_columns = [col for col in display_columns if col in original_df.columns]
	filtered_df = original_df[available_columns].copy()

	# Format the columns
	for col in filtered_df.columns:
	if col == "model":
	filtered_df[col] = filtered_df[col].apply(lambda x: make_clickable_model(x))
	elif col == "paper":
	filtered_df[col] = filtered_df[col].apply(lambda x: make_clickable_paper(x))
	else:
	filtered_df[col] = filtered_df[col].apply(formatter) # For numerical values

	# Rename columns for display
	filtered_df.rename(columns=column_names, inplace=True)
	filtered_df.sort_values(by='Top-1 Accuracy ⬆️', ascending=False, inplace=True)

	# Update original_df to be the filtered version
	original_df = filtered_df

	# Get column definitions for ImageNet-1k
	imagenet_columns = get_imagenet_columns()
	COLS = [c.name for c in imagenet_columns]
	TYPES = [c.type for c in imagenet_columns]

	# ImageNet-1k specific functions (no multilingual functionality needed)


	def request_model(model_text, chb_imagenet):
	"""Request evaluation of a model on ImageNet-1k dataset"""

	# Determine the selected checkboxes
	dataset_selection = []
	if chb_imagenet:
	dataset_selection.append("ImageNet-1k validation set")

	if len(dataset_selection) == 0:
	return styled_error("You need to select at least one dataset")

	base_model_on_hub, error_msg = is_model_on_hub(model_text)

	if not base_model_on_hub:
	return styled_error(f"Base model '{model_text}' {error_msg}")

	# Construct the output dictionary
	current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
	required_datasets = ', '.join(dataset_selection)
	eval_entry = {
	"date": current_time,
	"model": model_text,
	"datasets_selected": required_datasets,
	"evaluation_type": "ImageNet-1k_classification"
	}

	# Prepare file path
	DIR_OUTPUT_REQUESTS.mkdir(parents=True, exist_ok=True)

	fn_datasets = '@ '.join(dataset_selection)
	filename = model_text.replace("/","@") + "@@" + fn_datasets
	if filename in requested_models:
	return styled_error(f"A request for this model '{model_text}' and dataset(s) was already made.")
	try:
	filename_ext = filename + ".txt"
	out_filepath = DIR_OUTPUT_REQUESTS / filename_ext

	# Write the results to a text file
	with open(out_filepath, "w") as f:
	f.write(json.dumps(eval_entry))

	upload_file(filename, out_filepath)

	# Include file in the list of uploaded files
	requested_models.append(filename)

	# Remove the local file
	out_filepath.unlink()

	return styled_message("🤗 Your request has been submitted and will be evaluated soon!")
	except Exception as e:
	return styled_error(f"Error submitting request!")

	def filter_main_table(show_proprietary=True):
	filtered_df = original_df.copy()

	# Filter proprietary models if needed
	if not show_proprietary and "License" in filtered_df.columns:
	# Keep only models with "Open" license
	filtered_df = filtered_df[filtered_df["License"] == "Open"]

	return filtered_df

	with gr.Blocks(css=LEADERBOARD_CSS) as demo:
	gr.HTML(BANNER, elem_id="banner")
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.TabItem("🏅 Leaderboard", elem_id="imagenet-benchmark-tab-table", id=0):
	leaderboard_table = gr.components.Dataframe(
	value=original_df,
	datatype=TYPES,
	elem_id="leaderboard-table",
	interactive=False,
	visible=True,
	)
	with gr.Row():
	show_proprietary_checkbox = gr.Checkbox(
	label="Show proprietary models",
	value=True,
	elem_id="show-proprietary-checkbox"
	)

	# Connect checkbox to the filtering function
	show_proprietary_checkbox.change(
	filter_main_table,
	inputs=[show_proprietary_checkbox],
	outputs=leaderboard_table
	)

	with gr.TabItem("📈 Metrics", elem_id="imagenet-metrics-tab", id=1):
	gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")

	with gr.TabItem("✉️✨ Request a model here!", elem_id="imagenet-request-tab", id=2):
	with gr.Column():
	gr.Markdown("# ✉️✨ Request evaluation for a new model here!", elem_classes="markdown-text")
	with gr.Column():
	gr.Markdown("Select a dataset:", elem_classes="markdown-text")
	with gr.Column():
	model_name_textbox = gr.Textbox(label="Model name (user_name/model_name)")
	chb_imagenet = gr.Checkbox(label="ImageNet-1k validation set", value=True, interactive=True)
	with gr.Column():
	mdw_submission_result = gr.Markdown()
	btn_submitt = gr.Button(value="🚀 Request Evaluation")
	btn_submitt.click(request_model,
	[model_name_textbox, chb_imagenet],
	mdw_submission_result)

	# add an about section
	with gr.TabItem("🤗 About", elem_id="imagenet-about-tab", id=3):
	gr.Markdown("## About", elem_classes="markdown-text")
	gr.Markdown("""
	### ImageNet-1k Leaderboard

	This leaderboard tracks the performance of computer vision models on the ImageNet-1k dataset,
	which is one of the most widely used benchmarks for image classification.

	#### Dataset Information
	- Training images: 1.2 million
	- Validation images: 50,000
	- Classes: 1,000 object categories
	- Image resolution: Variable (typically 224×224 or 384×384)

	#### Hardware Configuration
	- GPU: NVIDIA L4
	- All results are tested on the same hardware configuration to ensure fair comparison

	#### Evaluation Metrics
	- Top-1 Accuracy: Percentage of images where the top prediction is correct
	- Top-5 Accuracy: Percentage of images where the correct class is in top 5 predictions
	- Parameters: Number of trainable parameters in millions
	- FLOPs: Floating point operations in billions
	- Model Size: Size of the model file in MB

	#### Contributing
	To add your model to the leaderboard, use the "Request a model here!" tab.
	Your model will be evaluated on the ImageNet-1k validation set using NVIDIA L4 GPU and added to the leaderboard.
	""", elem_classes="markdown-text")

	gr.Markdown(f"Last updated on {LAST_UPDATED}", elem_classes="markdown-text")

	with gr.Row():
	with gr.Accordion("📙 Citation", open=False):
	gr.Textbox(
	value=CITATION_TEXT, lines=7,
	label="Copy the BibTeX snippet to cite this source",
	elem_id="citation-button",
	show_copy_button=True,
	)

	if __name__ == "__main__":
	demo.launch()