Spaces:

worldbench
/

WorldLens

Running

WorldLens / app.py

AlanLiangC

add models

9d1372a 3 days ago

9.23 kB

	import os
	import glob
	import json
	from typing import Dict, Literal, Tuple, List, Optional

	import pandas as pd
	import matplotlib.pyplot as plt
	import gradio as gr

	RESULTS_DIR = "./worldlens-results"

	METRICS_MIN_BETTER = [
	"Depth Discrepancy", "Perceptual Discrepancy",
	"Photometric Error", "Geometric Discrepancy",
	"Novel-View Discrepancy",
	"Displacement Error",
	]

	METRICS_MAX_BETTER = [
	"Subject Fidelity", "Subject Coherence", "Subject Consistency",
	"Temporal Consistency", "Semantic Consistency",
	"View Consistency",
	"Novel-View Quality",
	"Open-Loop Adherence", "Route Completion", "Closed-Loop Adherence",
	"Map Segmentation", "3D Object Detection", "3D Object Tracking",
	"Occupancy Prediction",
	]

	METRIC_BETTER: Dict[str, Literal["min", "max"]] = {
	m: "min" for m in METRICS_MIN_BETTER
	}
	METRIC_BETTER.update({m: "max" for m in METRICS_MAX_BETTER})

	METRIC_CHOICES: List[str] = sorted(set(METRICS_MIN_BETTER + METRICS_MAX_BETTER))
	DEFAULT_METRIC = "Subject Fidelity" if "Subject Fidelity" in METRIC_CHOICES else METRIC_CHOICES[0]

	df_all: Optional[pd.DataFrame] = None


	def load_results() -> pd.DataFrame:

	rows = []

	json_files = sorted(glob.glob(os.path.join(RESULTS_DIR, "*.json")))
	if not json_files:
	return pd.DataFrame()

	for path in json_files:
	with open(path, "r") as f:
	data = json.load(f)

	model_name = os.path.splitext(os.path.basename(path))[0]
	venue = data.get("venue", "")
	date = data.get("date", "")

	row = {
	"Model": model_name,
	"venue": venue,
	"date": date,
	}

	metrics = data.get("Metrics", {})
	for category, metric_dict in metrics.items():
	if not isinstance(metric_dict, dict):
	continue
	for metric_name, value in metric_dict.items():
	row[metric_name] = value

	rows.append(row)

	df = pd.DataFrame(rows)

	meta_cols = ["Model", "venue", "date"]
	metric_cols = [c for c in df.columns if c not in meta_cols]
	df = df[meta_cols + metric_cols]

	return df


	def get_venue_choices(df: pd.DataFrame) -> List[str]:
	if "venue" not in df.columns:
	return ["All"]
	venues = sorted([v for v in df["venue"].dropna().unique() if v != ""])
	return ["All"] + venues


	def update_leaderboard(
	metric: str,
	top_k: int,
	model_filter: str,
	venue_filter: str,
	sort_mode: str,
	selected_metrics: Optional[List[str]],
	) -> Tuple[pd.DataFrame, plt.Figure]:

	global df_all

	if df_all is None or df_all.empty:
	fig, ax = plt.subplots(figsize=(6, 3))
	ax.text(0.5, 0.5, "No results found in ./worldlens-results",
	ha="center", va="center")
	ax.axis("off")
	return pd.DataFrame(), fig

	df = df_all.copy()

	if model_filter:
	df = df[df["Model"].str.contains(model_filter, case=False, regex=False)]

	if venue_filter and venue_filter != "All":
	df = df[df["venue"] == venue_filter]

	if metric not in df.columns:
	fig, ax = plt.subplots(figsize=(6, 3))
	ax.text(0.5, 0.5, f"Metric '{metric}' not found in current data.", ha="center", va="center")
	ax.axis("off")
	return pd.DataFrame(), fig

	better = METRIC_BETTER.get(metric, "max")
	if sort_mode == "Auto":
	ascending = (better == "min")
	elif sort_mode == "Ascending (small → large)":
	ascending = True
	else:
	ascending = False

	df_sorted = df.sort_values(metric, ascending=ascending)

	df_top = df_sorted.head(top_k).copy()
	cols = ["Model", "venue", "date"]

	if selected_metrics is None:
	selected_metrics = []

	for m in selected_metrics:
	if m in df_top.columns and m not in cols:
	cols.append(m)

	if metric in df_top.columns and metric not in cols:
	cols.append(metric)

	table_df = df_top[cols].round(3)

	# =========================
	# Dark-theme leaderboard plot
	# =========================
	bg_color = "#0e1117" # HF 深色背景
	panel_color = "#161b22" # 面板背景
	bar_color = "#4cc9f0" # 主色（青蓝）
	grid_color = "#30363d"
	text_color = "#c9d1d9"

	fig, ax = plt.subplots(figsize=(10, 4.5))
	fig.patch.set_facecolor(bg_color)
	ax.set_facecolor(panel_color)

	values = df_top[metric].values
	models = table_df["Model"].values
	bars = ax.barh(models, values, color=bar_color, height=0.6)

	if ascending:
	ax.invert_yaxis()

	ax.set_xlabel(metric, color=text_color, fontsize=11, labelpad=6)
	ax.set_title(
	f"Leaderboard · {metric}",
	fontsize=13,
	color=text_color,
	pad=10,
	fontweight="bold"
	)

	ax.xaxis.grid(True, linestyle="--", linewidth=0.6, color=grid_color, alpha=0.7)
	ax.yaxis.grid(False)

	for spine in ["top", "right", "left"]:
	ax.spines[spine].set_visible(False)
	ax.spines["bottom"].set_color(grid_color)

	ax.tick_params(axis="x", colors=text_color, labelsize=10)
	ax.tick_params(axis="y", colors=text_color, labelsize=10)

	for bar, value in zip(bars, values):
	ax.text(
	bar.get_width() * 1.01,
	bar.get_y() + bar.get_height() / 2,
	f"{value:.2f}",
	va="center",
	ha="left",
	fontsize=9.5,
	color=text_color
	)

	plt.tight_layout()

	return table_df, fig


	def reload_data():

	global df_all
	df_all = load_results()

	if df_all is None or df_all.empty:
	msg = "No JSON files found in ./worldlens-results. Please upload some results."
	dummy_fig, ax = plt.subplots(figsize=(6, 3))
	ax.text(0.5, 0.5, msg, ha="center", va="center")
	ax.axis("off")

	venue_update = gr.update(choices=["All"], value="All")

	return msg, venue_update, pd.DataFrame(), dummy_fig

	venue_choices = get_venue_choices(df_all)
	msg = f"Loaded {len(df_all)} models from {RESULTS_DIR}"

	default_selected = ["Subject Fidelity", "Temporal Consistency", "Map Segmentation"]
	default_selected = [m for m in default_selected if m in METRIC_CHOICES]

	table_df, fig = update_leaderboard(
	metric=DEFAULT_METRIC,
	top_k=10,
	model_filter="",
	venue_filter="All",
	sort_mode="Auto",
	selected_metrics=default_selected,
	)

	venue_update = gr.update(
	choices=venue_choices,
	value="All",
	interactive=True,
	)

	return msg, venue_update, table_df, fig


	with gr.Blocks(css="""
	#title {
	text-align: center;
	}
	""") as demo:
	gr.Markdown(
	"""
	# 🌍 WorldLens Leaderboard
	""",
	elem_id="title"
	)

	status_box = gr.Markdown("Loading results...", elem_id="status")

	with gr.Row():
	metric_dropdown = gr.Dropdown(
	label="Metric (for ranking)",
	choices=METRIC_CHOICES, # 固定 choices，避免动态更新不兼容
	value=DEFAULT_METRIC,
	interactive=True,
	)
	sort_mode_radio = gr.Radio(
	label="Sort mode",
	choices=[
	"Auto",
	"Ascending (small → large)",
	"Descending (large → small)",
	],
	value="Auto",
	interactive=True,
	)
	topk_slider = gr.Slider(
	label="Top-K",
	minimum=3,
	maximum=50,
	value=10,
	step=1,
	interactive=True,
	)

	# 新增：表格中展示的多个指标
	metrics_select = gr.CheckboxGroup(
	label="Metrics to show in table",
	choices=METRIC_CHOICES,
	value=["Subject Fidelity", "Temporal Consistency", "Map Segmentation"],
	interactive=True,
	)

	with gr.Row():
	model_filter_box = gr.Textbox(
	label="Filter by model name",
	placeholder="magic, dream, ...",
	interactive=True,
	)
	venue_dropdown = gr.Dropdown(
	label="Filter by venue",
	choices=["All"],
	value="All",
	interactive=True,
	)

	with gr.Row():
	reload_button = gr.Button("🔄 Reload JSONs", variant="secondary")
	update_button = gr.Button("✅ Update leaderboard", variant="primary")

	leaderboard_table = gr.DataFrame(
	label="Leaderboard",
	interactive=False,
	)
	leaderboard_plot = gr.Plot(label="Metric comparison", format="png")

	reload_button.click(
	fn=reload_data,
	inputs=[],
	outputs=[status_box, venue_dropdown, leaderboard_table, leaderboard_plot],
	)

	update_button.click(
	fn=update_leaderboard,
	inputs=[
	metric_dropdown,
	topk_slider,
	model_filter_box,
	venue_dropdown,
	sort_mode_radio,
	metrics_select,
	],
	outputs=[leaderboard_table, leaderboard_plot],
	)

	demo.load(
	fn=reload_data,
	inputs=[],
	outputs=[status_box, venue_dropdown, leaderboard_table, leaderboard_plot],
	)


	if __name__ == "__main__":
	demo.launch()