Spaces:

SpectrumWorld
/

SpectrumLeaderboard

Sleeping

App Files Files Community

SpectrumLeaderboard / app.py

little1d

feat(gradio): file path

3c09a21 5 months ago

raw

history blame contribute delete

19.1 kB

	import gradio as gr
	import pandas as pd
	import json
	from pathlib import Path
	from typing import Dict, Optional


	class SpectralLeaderboard:
	def __init__(self, data_file: str = "./leaderboard_v_1.0.json"):
	self.data_file = Path(data_file)
	self.data = self._load_data()

	def _load_data(self) -> Dict:
	"""加载排行榜数据"""
	try:
	with open(self.data_file, "r", encoding="utf-8") as f:
	data = json.load(f)
	print(f"✅ Successfully loaded {data['leaderboard_info']['total_models']} models from {self.data_file}")
	return data
	except FileNotFoundError:
	print(f"❌ Data file {self.data_file} not found. Creating empty leaderboard.")
	return {"leaderboard_info": {"total_models": 0}, "models": []}
	except Exception as e:
	print(f"❌ Error loading data: {e}")
	return {"leaderboard_info": {"total_models": 0}, "models": []}

	def _format_accuracy(self, accuracy: Optional[float]) -> str:
	"""格式化准确率显示"""
	if accuracy is None:
	return "-"
	return f"{accuracy:.1f}"

	def _calculate_average(self, results: Dict) -> Optional[float]:
	"""计算平均准确率，使用overall_accuracy字段"""
	return results.get("overall_accuracy")

	def _get_model_type_icon(self, model_type: str) -> str:
	"""获取模型类型图标"""
	icons = {"open_source": "🔓", "proprietary": "🔒", "baseline": "📊"}
	return icons.get(model_type, "❓")

	def _get_multimodal_icon(self, is_multimodal: bool) -> str:
	"""获取多模态图标"""
	return "👁️" if is_multimodal else "📝"

	def _get_rank_display(self, rank: int) -> str:
	"""获取排名显示，前三名显示奖牌"""
	medals = {1: "🥇", 2: "🥈", 3: "🥉"}
	return medals.get(rank, str(rank))

	def _create_link(self, text: str, url: str) -> str:
	"""创建HTML链接"""
	if url and url.strip():
	return f'<a href="{url}" target="_blank" style="text-decoration: none; color: inherit;">{text}</a>'
	return text

	def get_leaderboard_df(
	self,
	model_type_filter: str = "All",
	multimodal_filter: str = "All",
	sort_by: str = "Overall",
	ascending: bool = False,
	) -> pd.DataFrame:
	"""生成排行榜DataFrame"""

	models = self.data.get("models", [])
	print(f"📊 Processing {len(models)} models")

	# 筛选模型
	filtered_models = []
	for model in models:
	# 模型类型筛选
	if model_type_filter != "All" and model.get("model_type", "") != model_type_filter:
	continue

	# 多模态筛选
	if multimodal_filter == "Multimodal Only" and not model.get("is_multimodal", False):
	continue
	elif multimodal_filter == "Text Only" and model.get("is_multimodal", False):
	continue

	filtered_models.append(model)

	print(f"🔍 After filtering: {len(filtered_models)} models")

	# 构建DataFrame数据
	data = []
	for model in filtered_models:
	try:
	results = model.get("results", {})

	# 获取各项准确率
	overall_accuracy = self._calculate_average(results)
	signal_acc = results.get("Signal", {}).get("accuracy")
	perception_acc = results.get("Perception", {}).get("accuracy")
	semantic_acc = results.get("Semantic", {}).get("accuracy")
	generation_acc = results.get("Generation", {}).get("accuracy")

	# 创建带链接的模型名和提交者
	model_name_display = self._create_link(model.get("name", "Unknown"), model.get("name_link", ""))
	submitter_display = self._create_link(
	model.get("submitter", "Unknown"), model.get("submitter_link", "")
	)

	row = {
	"Type": self._get_model_type_icon(model.get("model_type", "unknown")),
	"Model": model_name_display,
	"Size": model.get("model_size", "Unknown"),
	"MM": self._get_multimodal_icon(model.get("is_multimodal", False)),
	"Overall": self._format_accuracy(overall_accuracy),
	"Signal": self._format_accuracy(signal_acc),
	"Perception": self._format_accuracy(perception_acc),
	"Semantic": self._format_accuracy(semantic_acc),
	"Generation": self._format_accuracy(generation_acc),
	"Submitter": submitter_display,
	"Date": (model.get("submission_time", "")[:10] if model.get("submission_time") else "-"),
	# 用于排序的数值列
	"overall_val": overall_accuracy or 0,
	"signal_val": signal_acc or 0,
	"perception_val": perception_acc or 0,
	"semantic_val": semantic_acc or 0,
	"generation_val": generation_acc or 0,
	}
	data.append(row)
	except Exception as e:
	print(f"⚠️ Error processing model {model.get('name', 'Unknown')}: {e}")
	continue

	df = pd.DataFrame(data)
	print(f"📋 Created DataFrame with {len(df)} rows")

	if len(df) == 0:
	print("📋 Empty DataFrame, returning empty table")
	return pd.DataFrame(
	columns=[
	"Rank",
	"Type",
	"Model",
	"Size",
	"MM",
	"Overall",
	"Signal",
	"Perception",
	"Semantic",
	"Generation",
	"Submitter",
	"Date",
	]
	)

	# 排序
	sort_mapping = {
	"Overall": "overall_val",
	"Signal": "signal_val",
	"Perception": "perception_val",
	"Semantic": "semantic_val",
	"Generation": "generation_val",
	"Model": "Model",
	"Date": "Date",
	}

	sort_col = sort_mapping.get(sort_by, "overall_val")
	df = df.sort_values(by=sort_col, ascending=ascending)

	# 添加带奖牌的排名
	ranks = []
	for i in range(len(df)):
	rank_num = i + 1
	ranks.append(self._get_rank_display(rank_num))

	df.insert(0, "Rank", ranks)

	# 移除用于排序的辅助列
	display_columns = [
	"Rank",
	"Type",
	"Model",
	"Size",
	"MM",
	"Overall",
	"Signal",
	"Perception",
	"Semantic",
	"Generation",
	"Submitter",
	"Date",
	]
	result_df = df[display_columns]
	print(f"✅ Returning DataFrame with {len(result_df)} rows")
	return result_df

	def get_subcategory_details(self, model_name: str) -> pd.DataFrame:
	"""获取模型的子类别详细结果"""
	# 移除HTML标签进行匹配
	clean_model_name = model_name
	if "<a href=" in model_name:
	# 提取链接中的文本
	import re

	match = re.search(r">([^<]+)<", model_name)
	if match:
	clean_model_name = match.group(1)

	for model in self.data.get("models", []):
	if model.get("name") == clean_model_name:
	data = []
	results = model.get("results", {})
	for level, level_data in results.items():
	if level == "overall_accuracy": # 跳过总体准确率字段
	continue

	subcategories = level_data.get("subcategories", {})
	for subcat, subcat_data in subcategories.items():
	data.append(
	{
	"Level": level,
	"Subcategory": subcat,
	"Accuracy": self._format_accuracy(subcat_data.get("accuracy")),
	}
	)
	return pd.DataFrame(data)
	return pd.DataFrame()


	def create_leaderboard():
	"""创建排行榜Gradio界面"""

	leaderboard = SpectralLeaderboard()

	with gr.Blocks(
	title="🔬 SpectrumLab Leaderboard",
	theme=gr.themes.Default(),
	css="""
	.gradio-container {
	max-width: 1400px !important;
	}
	.dataframe table {
	border-collapse: collapse !important;
	}
	.dataframe td, .dataframe th {
	padding: 8px 12px !important;
	border: 1px solid #e1e5e9 !important;
	}
	.dataframe th {
	background-color: #f8f9fa !important;
	font-weight: 600 !important;
	}
	.dataframe tr:nth-child(even) {
	background-color: #f8f9fa !important;
	}
	.dataframe tr:hover {
	background-color: #e8f4f8 !important;
	}
	""",
	) as demo:
	gr.Markdown(
	"""
	# 🏆 SpectrumLab Leaderboard

	A comprehensive benchmark for evaluating large language models on spectroscopic analysis tasks.

	📊 Evaluation Levels: Signal Processing, Perception, Semantic Understanding, Generation
	🔬 Domains: IR, NMR, UV-Vis, Mass Spectrometry and more
	🌟 Multimodal: Support for both text-only and vision-language models
	"""
	)

	with gr.Row():
	info = leaderboard.data.get("leaderboard_info", {"total_models": 0})
	gr.Markdown(
	f"""
	📈 Stats: {info["total_models"]} models evaluated
	🏅 Rankings: 🥇🥈🥉 medals for top performers
	🔗 Submit: Send evaluation results to contribute your model!
	"""
	)

	with gr.Row():
	with gr.Column(scale=2):
	model_type_filter = gr.Dropdown(
	choices=["All", "open_source", "proprietary", "baseline"],
	value="All",
	label="🏷️ Model Type",
	)

	with gr.Column(scale=2):
	multimodal_filter = gr.Dropdown(
	choices=["All", "Multimodal Only", "Text Only"],
	value="All",
	label="👁️ Modality",
	)

	with gr.Column(scale=2):
	sort_by = gr.Dropdown(
	choices=[
	"Overall",
	"Signal",
	"Perception",
	"Semantic",
	"Generation",
	"Model",
	"Date",
	],
	value="Overall",
	label="📊 Sort By",
	)

	with gr.Column(scale=1):
	ascending = gr.Checkbox(value=False, label="⬆️ Ascending")

	with gr.Column(scale=1):
	refresh_btn = gr.Button("🔄 Refresh", variant="secondary")

	# 主排行榜表格
	initial_df = leaderboard.get_leaderboard_df()
	leaderboard_table = gr.Dataframe(
	value=initial_df,
	interactive=False,
	wrap=True,
	datatype=["html"] * len(initial_df.columns) if len(initial_df.columns) > 0 else ["html"] * 12,
	column_widths=(
	[
	"6%",
	"5%",
	"18%",
	"8%",
	"5%",
	"10%",
	"10%",
	"10%",
	"10%",
	"10%",
	"16%",
	"10%",
	]
	if len(initial_df.columns) > 0
	else None
	),
	label="🏆 Model Rankings",
	)

	# 模型详细信息
	with gr.Accordion("📋 Model Details", open=False):
	model_choices = [model.get("name", "Unknown") for model in leaderboard.data.get("models", [])]
	model_select = gr.Dropdown(
	choices=model_choices,
	label="Select Model for Details",
	)

	with gr.Row():
	with gr.Column():
	subcategory_table = gr.Dataframe(label="📊 Subcategory Results")

	with gr.Column():
	model_info = gr.Markdown(label="ℹ️ Model Information")

	# 图例说明
	with gr.Accordion("📖 Legend & Info", open=False):
	gr.Markdown(
	"""
	### 🔍 Column Explanations

	- Rank: 🥇 1st place, 🥈 2nd place, 🥉 3rd place, then numbers
	- Type: 🔓 Open Source, 🔒 Proprietary, 📊 Baseline
	- MM: 👁️ Multimodal, 📝 Text-only
	- Overall: Average accuracy across all evaluated levels
	- Signal: Low-level signal processing tasks
	- Perception: Mid-level feature extraction tasks
	- Semantic: High-level understanding tasks
	- Generation: Spectrum generation tasks

	### 📝 Notes
	- "-" indicates the model was not evaluated on that benchmark
	- Rankings are based on overall performance across all evaluated tasks
	- Multimodal models can process both text and spectroscopic images
	- Click on model names and submitters to visit their pages

	### 📊 Task Categories

	Signal Level:
	- Spectrum Type Classification (TC)
	- Spectrum Quality Assessment (QE)
	- Basic Feature Extraction (FE)
	- Impurity Peak Detection (ID)

	Perception Level:
	- Functional Group Recognition (GR)
	- Elemental Compositional Prediction (EP)
	- Peak Assignment (PA)
	- Basic Property Prediction (PP)

	Semantic Level:
	- Molecular Structure Elucidation (SE)
	- Fusing Spectroscopic Modalities (FM)
	- Multimodal Molecular Reasoning (MR)

	Generation Level:
	- Forward Problems (FP)
	- Inverse Problems (IP)
	- De Novo Generation (DnG)
	"""
	)

	def update_leaderboard(model_type, multimodal, sort_by_val, asc):
	"""更新排行榜"""
	print(f"🔄 Updating leaderboard with filters: {model_type}, {multimodal}, {sort_by_val}, {asc}")
	return leaderboard.get_leaderboard_df(
	model_type_filter=model_type,
	multimodal_filter=multimodal,
	sort_by=sort_by_val,
	ascending=asc,
	)

	def update_model_details(model_name):
	"""更新模型详细信息"""
	if not model_name:
	return pd.DataFrame(), ""

	# 获取子类别详情
	subcategory_df = leaderboard.get_subcategory_details(model_name)

	# 获取模型基本信息
	for model in leaderboard.data.get("models", []):
	if model.get("name") == model_name:
	# 处理链接显示
	def format_link(name, url):
	if url and url.strip():
	return f"[{name}]({url})"
	return "Not provided"

	model_info_dict = model.get("model_info", {})
	results = model.get("results", {})

	info_md = f"""
	### {model.get("name", "Unknown")}

	👤 Submitter: {model.get("submitter", "Unknown")}
	📅 Submission: {model.get("submission_time", "")[:10] if model.get("submission_time") else "Unknown"}
	🏷️ Type: {model.get("model_type", "Unknown")}
	📏 Size: {model.get("model_size", "Unknown")}
	👁️ Multimodal: {"Yes" if model.get("is_multimodal", False) else "No"}

	📝 Description: {model_info_dict.get("description", "") or "No description provided"}

	🔗 Links:
	- Homepage: {format_link("Visit", model_info_dict.get("homepage", ""))}
	- Paper: {format_link("Read", model_info_dict.get("paper", ""))}
	- Code: {format_link("View", model_info_dict.get("code", ""))}

	📊 Performance Summary:
	- Overall: {leaderboard._format_accuracy(results.get("overall_accuracy"))}%
	- Signal: {leaderboard._format_accuracy(results.get("Signal", {}).get("accuracy"))}%
	- Perception: {leaderboard._format_accuracy(results.get("Perception", {}).get("accuracy"))}%
	- Semantic: {leaderboard._format_accuracy(results.get("Semantic", {}).get("accuracy"))}%
	- Generation: {leaderboard._format_accuracy(results.get("Generation", {}).get("accuracy"))}%
	"""
	return subcategory_df, info_md

	return pd.DataFrame(), ""

	# 事件绑定
	for component in [model_type_filter, multimodal_filter, sort_by, ascending]:
	component.change(
	fn=update_leaderboard,
	inputs=[model_type_filter, multimodal_filter, sort_by, ascending],
	outputs=[leaderboard_table],
	)

	refresh_btn.click(
	fn=update_leaderboard,
	inputs=[model_type_filter, multimodal_filter, sort_by, ascending],
	outputs=[leaderboard_table],
	)

	model_select.change(
	fn=update_model_details,
	inputs=[model_select],
	outputs=[subcategory_table, model_info],
	)

	return demo


	if __name__ == "__main__":
	app = create_leaderboard()
	print("🚀 Starting SpectrumLab Leaderboard...")
	app.launch(
	server_name="0.0.0.0",
	share=True,
	show_api=False,
	inbrowser=True,
	)