Spaces:

worldbench
/

WorldLens

Running

File size: 9,226 Bytes

import os
import glob
import json
from typing import Dict, Literal, Tuple, List, Optional

import pandas as pd
import matplotlib.pyplot as plt
import gradio as gr

RESULTS_DIR = "./worldlens-results"

METRICS_MIN_BETTER = [
    "Depth Discrepancy", "Perceptual Discrepancy",
    "Photometric Error", "Geometric Discrepancy",
    "Novel-View Discrepancy",
    "Displacement Error",
]

METRICS_MAX_BETTER = [
    "Subject Fidelity", "Subject Coherence", "Subject Consistency",
    "Temporal Consistency", "Semantic Consistency",
    "View Consistency",
    "Novel-View Quality",
    "Open-Loop Adherence", "Route Completion", "Closed-Loop Adherence",
    "Map Segmentation", "3D Object Detection", "3D Object Tracking",
    "Occupancy Prediction",
]

METRIC_BETTER: Dict[str, Literal["min", "max"]] = {
    m: "min" for m in METRICS_MIN_BETTER
}
METRIC_BETTER.update({m: "max" for m in METRICS_MAX_BETTER})

METRIC_CHOICES: List[str] = sorted(set(METRICS_MIN_BETTER + METRICS_MAX_BETTER))
DEFAULT_METRIC = "Subject Fidelity" if "Subject Fidelity" in METRIC_CHOICES else METRIC_CHOICES[0]

df_all: Optional[pd.DataFrame] = None


def load_results() -> pd.DataFrame:

    rows = []

    json_files = sorted(glob.glob(os.path.join(RESULTS_DIR, "*.json")))
    if not json_files:
        return pd.DataFrame()

    for path in json_files:
        with open(path, "r") as f:
            data = json.load(f)

        model_name = os.path.splitext(os.path.basename(path))[0]
        venue = data.get("venue", "")
        date = data.get("date", "")

        row = {
            "Model": model_name,
            "venue": venue,
            "date": date,
        }

        metrics = data.get("Metrics", {})
        for category, metric_dict in metrics.items():
            if not isinstance(metric_dict, dict):
                continue
            for metric_name, value in metric_dict.items():
                row[metric_name] = value

        rows.append(row)

    df = pd.DataFrame(rows)

    meta_cols = ["Model", "venue", "date"]
    metric_cols = [c for c in df.columns if c not in meta_cols]
    df = df[meta_cols + metric_cols]

    return df


def get_venue_choices(df: pd.DataFrame) -> List[str]:
    if "venue" not in df.columns:
        return ["All"]
    venues = sorted([v for v in df["venue"].dropna().unique() if v != ""])
    return ["All"] + venues


def update_leaderboard(
    metric: str,
    top_k: int,
    model_filter: str,
    venue_filter: str,
    sort_mode: str,
    selected_metrics: Optional[List[str]],
) -> Tuple[pd.DataFrame, plt.Figure]:

    global df_all

    if df_all is None or df_all.empty:
        fig, ax = plt.subplots(figsize=(6, 3))
        ax.text(0.5, 0.5, "No results found in ./worldlens-results",
                ha="center", va="center")
        ax.axis("off")
        return pd.DataFrame(), fig

    df = df_all.copy()

    if model_filter:
        df = df[df["Model"].str.contains(model_filter, case=False, regex=False)]

    if venue_filter and venue_filter != "All":
        df = df[df["venue"] == venue_filter]

    if metric not in df.columns:
        fig, ax = plt.subplots(figsize=(6, 3))
        ax.text(0.5, 0.5, f"Metric '{metric}' not found in current data.", ha="center", va="center")
        ax.axis("off")
        return pd.DataFrame(), fig

    better = METRIC_BETTER.get(metric, "max")
    if sort_mode == "Auto":
        ascending = (better == "min")
    elif sort_mode == "Ascending (small → large)":
        ascending = True
    else:
        ascending = False

    df_sorted = df.sort_values(metric, ascending=ascending)

    df_top = df_sorted.head(top_k).copy()
    cols = ["Model", "venue", "date"]

    if selected_metrics is None:
        selected_metrics = []

    for m in selected_metrics:
        if m in df_top.columns and m not in cols:
            cols.append(m)

    if metric in df_top.columns and metric not in cols:
        cols.append(metric)

    table_df = df_top[cols].round(3)

    # =========================
    # Dark-theme leaderboard plot
    # =========================
    bg_color = "#0e1117"        # HF 深色背景
    panel_color = "#161b22"     # 面板背景
    bar_color = "#4cc9f0"       # 主色（青蓝）
    grid_color = "#30363d"
    text_color = "#c9d1d9"

    fig, ax = plt.subplots(figsize=(10, 4.5))
    fig.patch.set_facecolor(bg_color)
    ax.set_facecolor(panel_color)

    values = df_top[metric].values
    models = table_df["Model"].values
    bars = ax.barh(models, values, color=bar_color, height=0.6)

    if ascending:
        ax.invert_yaxis()

    ax.set_xlabel(metric, color=text_color, fontsize=11, labelpad=6)
    ax.set_title(
        f"Leaderboard · {metric}",
        fontsize=13,
        color=text_color,
        pad=10,
        fontweight="bold"
    )

    ax.xaxis.grid(True, linestyle="--", linewidth=0.6, color=grid_color, alpha=0.7)
    ax.yaxis.grid(False)

    for spine in ["top", "right", "left"]:
        ax.spines[spine].set_visible(False)
    ax.spines["bottom"].set_color(grid_color)

    ax.tick_params(axis="x", colors=text_color, labelsize=10)
    ax.tick_params(axis="y", colors=text_color, labelsize=10)

    for bar, value in zip(bars, values):
        ax.text(
            bar.get_width() * 1.01,
            bar.get_y() + bar.get_height() / 2,
            f"{value:.2f}",
            va="center",
            ha="left",
            fontsize=9.5,
            color=text_color
        )

    plt.tight_layout()

    return table_df, fig


def reload_data():

    global df_all
    df_all = load_results()

    if df_all is None or df_all.empty:
        msg = "No JSON files found in ./worldlens-results. Please upload some results."
        dummy_fig, ax = plt.subplots(figsize=(6, 3))
        ax.text(0.5, 0.5, msg, ha="center", va="center")
        ax.axis("off")

        venue_update = gr.update(choices=["All"], value="All")

        return msg, venue_update, pd.DataFrame(), dummy_fig

    venue_choices = get_venue_choices(df_all)
    msg = f"Loaded {len(df_all)} models from {RESULTS_DIR}"

    default_selected = ["Subject Fidelity", "Temporal Consistency", "Map Segmentation"]
    default_selected = [m for m in default_selected if m in METRIC_CHOICES]

    table_df, fig = update_leaderboard(
        metric=DEFAULT_METRIC,
        top_k=10,
        model_filter="",
        venue_filter="All",
        sort_mode="Auto",
        selected_metrics=default_selected,
    )

    venue_update = gr.update(
        choices=venue_choices,
        value="All",
        interactive=True,
    )

    return msg, venue_update, table_df, fig


with gr.Blocks(css="""
#title {
  text-align: center;
}
""") as demo:
    gr.Markdown(
        """
# 🌍 WorldLens Leaderboard
        """,
        elem_id="title"
    )

    status_box = gr.Markdown("Loading results...", elem_id="status")

    with gr.Row():
        metric_dropdown = gr.Dropdown(
            label="Metric (for ranking)",
            choices=METRIC_CHOICES,           # 固定 choices，避免动态更新不兼容
            value=DEFAULT_METRIC,
            interactive=True,
        )
        sort_mode_radio = gr.Radio(
            label="Sort mode",
            choices=[
                "Auto",
                "Ascending (small → large)",
                "Descending (large → small)",
            ],
            value="Auto",
            interactive=True,
        )
        topk_slider = gr.Slider(
            label="Top-K",
            minimum=3,
            maximum=50,
            value=10,
            step=1,
            interactive=True,
        )

    # 新增：表格中展示的多个指标
    metrics_select = gr.CheckboxGroup(
        label="Metrics to show in table",
        choices=METRIC_CHOICES,
        value=["Subject Fidelity", "Temporal Consistency", "Map Segmentation"],
        interactive=True,
    )

    with gr.Row():
        model_filter_box = gr.Textbox(
            label="Filter by model name",
            placeholder="magic, dream, ...",
            interactive=True,
        )
        venue_dropdown = gr.Dropdown(
            label="Filter by venue",
            choices=["All"],
            value="All",
            interactive=True,
        )

    with gr.Row():
        reload_button = gr.Button("🔄 Reload JSONs", variant="secondary")
        update_button = gr.Button("✅ Update leaderboard", variant="primary")

    leaderboard_table = gr.DataFrame(
        label="Leaderboard",
        interactive=False,
    )
    leaderboard_plot = gr.Plot(label="Metric comparison", format="png")

    reload_button.click(
        fn=reload_data,
        inputs=[],
        outputs=[status_box, venue_dropdown, leaderboard_table, leaderboard_plot],
    )

    update_button.click(
        fn=update_leaderboard,
        inputs=[
            metric_dropdown,
            topk_slider,
            model_filter_box,
            venue_dropdown,
            sort_mode_radio,
            metrics_select,
        ],
        outputs=[leaderboard_table, leaderboard_plot],
    )

    demo.load(
        fn=reload_data,
        inputs=[],
        outputs=[status_box, venue_dropdown, leaderboard_table, leaderboard_plot],
    )


if __name__ == "__main__":
    demo.launch()