import os import glob import json from typing import Dict, Literal, Tuple, List, Optional import pandas as pd import matplotlib.pyplot as plt import gradio as gr RESULTS_DIR = "./worldlens-results" METRICS_MIN_BETTER = [ "Depth Discrepancy", "Perceptual Discrepancy", "Photometric Error", "Geometric Discrepancy", "Novel-View Discrepancy", "Displacement Error", ] METRICS_MAX_BETTER = [ "Subject Fidelity", "Subject Coherence", "Subject Consistency", "Temporal Consistency", "Semantic Consistency", "View Consistency", "Novel-View Quality", "Open-Loop Adherence", "Route Completion", "Closed-Loop Adherence", "Map Segmentation", "3D Object Detection", "3D Object Tracking", "Occupancy Prediction", ] METRIC_BETTER: Dict[str, Literal["min", "max"]] = { m: "min" for m in METRICS_MIN_BETTER } METRIC_BETTER.update({m: "max" for m in METRICS_MAX_BETTER}) METRIC_CHOICES: List[str] = sorted(set(METRICS_MIN_BETTER + METRICS_MAX_BETTER)) DEFAULT_METRIC = "Subject Fidelity" if "Subject Fidelity" in METRIC_CHOICES else METRIC_CHOICES[0] df_all: Optional[pd.DataFrame] = None def load_results() -> pd.DataFrame: rows = [] json_files = sorted(glob.glob(os.path.join(RESULTS_DIR, "*.json"))) if not json_files: return pd.DataFrame() for path in json_files: with open(path, "r") as f: data = json.load(f) model_name = os.path.splitext(os.path.basename(path))[0] venue = data.get("venue", "") date = data.get("date", "") row = { "Model": model_name, "venue": venue, "date": date, } metrics = data.get("Metrics", {}) for category, metric_dict in metrics.items(): if not isinstance(metric_dict, dict): continue for metric_name, value in metric_dict.items(): row[metric_name] = value rows.append(row) df = pd.DataFrame(rows) meta_cols = ["Model", "venue", "date"] metric_cols = [c for c in df.columns if c not in meta_cols] df = df[meta_cols + metric_cols] return df def get_venue_choices(df: pd.DataFrame) -> List[str]: if "venue" not in df.columns: return ["All"] venues = sorted([v for v in df["venue"].dropna().unique() if v != ""]) return ["All"] + venues def update_leaderboard( metric: str, top_k: int, model_filter: str, venue_filter: str, sort_mode: str, selected_metrics: Optional[List[str]], ) -> Tuple[pd.DataFrame, plt.Figure]: global df_all if df_all is None or df_all.empty: fig, ax = plt.subplots(figsize=(6, 3)) ax.text(0.5, 0.5, "No results found in ./worldlens-results", ha="center", va="center") ax.axis("off") return pd.DataFrame(), fig df = df_all.copy() if model_filter: df = df[df["Model"].str.contains(model_filter, case=False, regex=False)] if venue_filter and venue_filter != "All": df = df[df["venue"] == venue_filter] if metric not in df.columns: fig, ax = plt.subplots(figsize=(6, 3)) ax.text(0.5, 0.5, f"Metric '{metric}' not found in current data.", ha="center", va="center") ax.axis("off") return pd.DataFrame(), fig better = METRIC_BETTER.get(metric, "max") if sort_mode == "Auto": ascending = (better == "min") elif sort_mode == "Ascending (small → large)": ascending = True else: ascending = False df_sorted = df.sort_values(metric, ascending=ascending) df_top = df_sorted.head(top_k).copy() cols = ["Model", "venue", "date"] if selected_metrics is None: selected_metrics = [] for m in selected_metrics: if m in df_top.columns and m not in cols: cols.append(m) if metric in df_top.columns and metric not in cols: cols.append(metric) table_df = df_top[cols].round(3) # ========================= # Dark-theme leaderboard plot # ========================= bg_color = "#0e1117" # HF 深色背景 panel_color = "#161b22" # 面板背景 bar_color = "#4cc9f0" # 主色(青蓝) grid_color = "#30363d" text_color = "#c9d1d9" fig, ax = plt.subplots(figsize=(10, 4.5)) fig.patch.set_facecolor(bg_color) ax.set_facecolor(panel_color) values = df_top[metric].values models = table_df["Model"].values bars = ax.barh(models, values, color=bar_color, height=0.6) if ascending: ax.invert_yaxis() ax.set_xlabel(metric, color=text_color, fontsize=11, labelpad=6) ax.set_title( f"Leaderboard · {metric}", fontsize=13, color=text_color, pad=10, fontweight="bold" ) ax.xaxis.grid(True, linestyle="--", linewidth=0.6, color=grid_color, alpha=0.7) ax.yaxis.grid(False) for spine in ["top", "right", "left"]: ax.spines[spine].set_visible(False) ax.spines["bottom"].set_color(grid_color) ax.tick_params(axis="x", colors=text_color, labelsize=10) ax.tick_params(axis="y", colors=text_color, labelsize=10) for bar, value in zip(bars, values): ax.text( bar.get_width() * 1.01, bar.get_y() + bar.get_height() / 2, f"{value:.2f}", va="center", ha="left", fontsize=9.5, color=text_color ) plt.tight_layout() return table_df, fig def reload_data(): global df_all df_all = load_results() if df_all is None or df_all.empty: msg = "No JSON files found in ./worldlens-results. Please upload some results." dummy_fig, ax = plt.subplots(figsize=(6, 3)) ax.text(0.5, 0.5, msg, ha="center", va="center") ax.axis("off") venue_update = gr.update(choices=["All"], value="All") return msg, venue_update, pd.DataFrame(), dummy_fig venue_choices = get_venue_choices(df_all) msg = f"Loaded {len(df_all)} models from {RESULTS_DIR}" default_selected = ["Subject Fidelity", "Temporal Consistency", "Map Segmentation"] default_selected = [m for m in default_selected if m in METRIC_CHOICES] table_df, fig = update_leaderboard( metric=DEFAULT_METRIC, top_k=10, model_filter="", venue_filter="All", sort_mode="Auto", selected_metrics=default_selected, ) venue_update = gr.update( choices=venue_choices, value="All", interactive=True, ) return msg, venue_update, table_df, fig with gr.Blocks(css=""" #title { text-align: center; } """) as demo: gr.Markdown( """ # 🌍 WorldLens Leaderboard """, elem_id="title" ) status_box = gr.Markdown("Loading results...", elem_id="status") with gr.Row(): metric_dropdown = gr.Dropdown( label="Metric (for ranking)", choices=METRIC_CHOICES, # 固定 choices,避免动态更新不兼容 value=DEFAULT_METRIC, interactive=True, ) sort_mode_radio = gr.Radio( label="Sort mode", choices=[ "Auto", "Ascending (small → large)", "Descending (large → small)", ], value="Auto", interactive=True, ) topk_slider = gr.Slider( label="Top-K", minimum=3, maximum=50, value=10, step=1, interactive=True, ) # 新增:表格中展示的多个指标 metrics_select = gr.CheckboxGroup( label="Metrics to show in table", choices=METRIC_CHOICES, value=["Subject Fidelity", "Temporal Consistency", "Map Segmentation"], interactive=True, ) with gr.Row(): model_filter_box = gr.Textbox( label="Filter by model name", placeholder="magic, dream, ...", interactive=True, ) venue_dropdown = gr.Dropdown( label="Filter by venue", choices=["All"], value="All", interactive=True, ) with gr.Row(): reload_button = gr.Button("🔄 Reload JSONs", variant="secondary") update_button = gr.Button("✅ Update leaderboard", variant="primary") leaderboard_table = gr.DataFrame( label="Leaderboard", interactive=False, ) leaderboard_plot = gr.Plot(label="Metric comparison", format="png") reload_button.click( fn=reload_data, inputs=[], outputs=[status_box, venue_dropdown, leaderboard_table, leaderboard_plot], ) update_button.click( fn=update_leaderboard, inputs=[ metric_dropdown, topk_slider, model_filter_box, venue_dropdown, sort_mode_radio, metrics_select, ], outputs=[leaderboard_table, leaderboard_plot], ) demo.load( fn=reload_data, inputs=[], outputs=[status_box, venue_dropdown, leaderboard_table, leaderboard_plot], ) if __name__ == "__main__": demo.launch()