import gradio as gr import pandas as pd import plotly.graph_objects as go from app_utils import load_results, visualize_leaderboard, apply_data_slice, DATA_SLICE_MAP results_df = load_results() DESCRIPTION = """ # Hughes Hallucination Evaluation Model (HHEM) Leaderboard Using [Vectara](https://vectara.com/)'s proprietary [Factual Consistency Evaluation Model](https://www.vectara.com/blog/hallucination-detection-commercial-vs-open-source-a-deep-dive), this leaderboard evaluates how often an LLM hallucinates -- containing information not stated in the source document -- when summarizing a document. For an LLM, its hallucination rate is defined as the ratio of summaries that hallucinate to the total number of summaries it generates. For more details or to contribute, see [this Github repo](https://github.com/vectara/hallucination-leaderboard). """ def leaderboard( filter_models_by_name: str = "", high_ar_only: bool = False, size_filter: str = "all", access_filter: str = "all", data_slice: str = "Overall" ): """Filter and display the leaderboard.""" df = results_df.copy() # Apply data slice first (recalculates metrics and re-sorts) df = apply_data_slice(df, data_slice) # Filter by answer rate if toggle is on if high_ar_only: df = df[df["Answer %"] >= 95] # Filter by model size if size_filter and size_filter != "all": df = df[df["Model Size"] == size_filter] # Filter by accessibility if access_filter and access_filter != "all": df = df[df["Accessibility"] == access_filter] # Filter by model name filter_models_by_name = filter_models_by_name.replace(",", ";").replace(" ", "") if len(filter_models_by_name) > 0 and "all" not in filter_models_by_name.lower(): filter_list = [name.lower() for name in filter_models_by_name.split(";") if name] df = df[df["LLM_lower_case"].str.contains("|".join(filter_list), na=False)] if len(df) == 0: # Show "no results" message in the plot fig = go.Figure() fig.add_annotation( text="No models found matching your filter", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=14, color="gray") ) fig.update_layout( xaxis=dict(visible=False), yaxis=dict(visible=False), height=400, margin=dict(l=50, r=50, t=50, b=50) ) return fig, pd.DataFrame(columns=["LLM", "Hallucination %", "Answer %", "Avg Summary Words"]) fig = visualize_leaderboard(df) return fig, df[["LLM", "Hallucination %", "Answer %", "Avg Summary Words"]] with gr.Blocks( title="Hughes Hallucination Evaluation Model (HHEM) Leaderboard", theme=gr.themes.Soft(), css=""" .header-logo { display: flex; align-items: center; gap: 10px; margin-bottom: 10px; } .header-logo img { height: 40px; } footer { display: none !important; } .modebar { display: none !important; } .horizontal-radio .wrap { display: flex !important; flex-direction: row !important; gap: 8px !important; } """ ) as demo: gr.HTML( '
'
'