Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import gradio as gr | |
| import pandas as pd | |
| import matplotlib.pyplot | |
| from app_utils import load_results, visualize_leaderboard | |
| results_df = load_results() | |
| DESCRIPTION = """ | |
| # Hughes Hallucination Evaluation Model (HHEM) Leaderboard | |
| Using [Vectara](https://vectara.com/)'s proprietary [Factual Consistency Evaluation Model](https://www.vectara.com/blog/hallucination-detection-commercial-vs-open-source-a-deep-dive), | |
| this leaderboard evaluates how often an LLM hallucinates -- containing information not stated in the source document -- when summarizing a document. | |
| For an LLM, its hallucination rate is defined as the ratio of summaries that hallucinate to the total number of summaries it generates. | |
| For more details or to contribute, see [this Github repo](https://github.com/vectara/hallucination-leaderboard). | |
| """ | |
| def leaderboard( | |
| filter_models_by_name: str = "", | |
| high_ar_only: bool = False, | |
| size_filter: str = "all", | |
| access_filter: str = "all" | |
| ): | |
| """Filter and display the leaderboard.""" | |
| df = results_df.copy() | |
| # Filter by answer rate if toggle is on | |
| if high_ar_only: | |
| df = df[df["Answer %"] >= 95] | |
| # Filter by model size | |
| if size_filter and size_filter != "all": | |
| df = df[df["Model Size"] == size_filter] | |
| # Filter by accessibility | |
| if access_filter and access_filter != "all": | |
| df = df[df["Accessibility"] == access_filter] | |
| # Filter by model name | |
| filter_models_by_name = filter_models_by_name.replace(",", ";").replace(" ", "") | |
| if len(filter_models_by_name) > 0 and "all" not in filter_models_by_name.lower(): | |
| filter_list = [name.lower() for name in filter_models_by_name.split(";") if name] | |
| df = df[df["LLM_lower_case"].str.contains("|".join(filter_list), na=False)] | |
| if len(df) == 0: | |
| # Show "no results" message in the plot | |
| fig, ax = matplotlib.pyplot.subplots(figsize=(10, 5)) | |
| ax.text(0.5, 0.5, "No models found matching your filter", | |
| ha='center', va='center', fontsize=14, color='gray') | |
| ax.set_xlim(0, 1) | |
| ax.set_ylim(0, 1) | |
| ax.axis('off') | |
| return fig, pd.DataFrame(columns=["LLM", "Hallucination %", "Answer %", "Avg Summary Words"]) | |
| fig = visualize_leaderboard(df) | |
| return fig, df[["LLM", "Hallucination %", "Answer %", "Avg Summary Words"]] | |
| with gr.Blocks( | |
| title="Hughes Hallucination Evaluation Model (HHEM) Leaderboard", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .header-logo { | |
| display: flex; | |
| align-items: center; | |
| gap: 10px; | |
| margin-bottom: 10px; | |
| } | |
| .header-logo img { | |
| height: 40px; | |
| } | |
| footer { display: none !important; } | |
| """ | |
| ) as demo: | |
| gr.HTML( | |
| '<div class="header-logo">' | |
| '<img src="https://huggingface.co/spaces/vectara/README/resolve/main/Vectara-logo.png" alt="Vectara">' | |
| '</div>' | |
| ) | |
| gr.Markdown(DESCRIPTION) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| plot_output = gr.Plot(show_label=False) | |
| with gr.Column(scale=1): | |
| filter_input = gr.Textbox( | |
| placeholder="Filter models...", | |
| show_label=False, | |
| value="" | |
| ) | |
| high_ar_toggle = gr.Checkbox( | |
| label="Only models with ≥95% answer rate", | |
| value=False | |
| ) | |
| size_filter = gr.Radio( | |
| choices=["all", "small", "large"], | |
| value="all", | |
| label="Model size" | |
| ) | |
| access_filter = gr.Radio( | |
| choices=["all", "commercial", "open"], | |
| value="all", | |
| label="Model type" | |
| ) | |
| with gr.Row(): | |
| table_output = gr.Dataframe( | |
| label="Leaderboard", | |
| interactive=False, | |
| max_height=500 | |
| ) | |
| inputs = [filter_input, high_ar_toggle, size_filter, access_filter] | |
| outputs = [plot_output, table_output] | |
| # Load initial data on page load | |
| demo.load(fn=leaderboard, inputs=inputs, outputs=outputs) | |
| # Update on filter change or toggle change | |
| filter_input.change(fn=leaderboard, inputs=inputs, outputs=outputs) | |
| high_ar_toggle.change(fn=leaderboard, inputs=inputs, outputs=outputs) | |
| size_filter.change(fn=leaderboard, inputs=inputs, outputs=outputs) | |
| access_filter.change(fn=leaderboard, inputs=inputs, outputs=outputs) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |