Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 4,488 Bytes
d0c57df 8c3427d d0c57df 8c3427d d0c57df 8c3427d d0c57df 8c3427d d0c57df 8c3427d d0c57df 8c3427d d0c57df 696341e d0c57df 8c3427d d0c57df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
import pandas as pd
import matplotlib.pyplot
from app_utils import load_results, visualize_leaderboard
results_df = load_results()
DESCRIPTION = """
# Hughes Hallucination Evaluation Model (HHEM) Leaderboard
Using [Vectara](https://vectara.com/)'s proprietary [Factual Consistency Evaluation Model](https://www.vectara.com/blog/hallucination-detection-commercial-vs-open-source-a-deep-dive),
this leaderboard evaluates how often an LLM hallucinates -- containing information not stated in the source document -- when summarizing a document.
For an LLM, its hallucination rate is defined as the ratio of summaries that hallucinate to the total number of summaries it generates.
For more details or to contribute, see [this Github repo](https://github.com/vectara/hallucination-leaderboard).
"""
def leaderboard(
filter_models_by_name: str = "",
high_ar_only: bool = False,
size_filter: str = "all",
access_filter: str = "all"
):
"""Filter and display the leaderboard."""
df = results_df.copy()
# Filter by answer rate if toggle is on
if high_ar_only:
df = df[df["Answer %"] >= 95]
# Filter by model size
if size_filter and size_filter != "all":
df = df[df["Model Size"] == size_filter]
# Filter by accessibility
if access_filter and access_filter != "all":
df = df[df["Accessibility"] == access_filter]
# Filter by model name
filter_models_by_name = filter_models_by_name.replace(",", ";").replace(" ", "")
if len(filter_models_by_name) > 0 and "all" not in filter_models_by_name.lower():
filter_list = [name.lower() for name in filter_models_by_name.split(";") if name]
df = df[df["LLM_lower_case"].str.contains("|".join(filter_list), na=False)]
if len(df) == 0:
# Show "no results" message in the plot
fig, ax = matplotlib.pyplot.subplots(figsize=(10, 5))
ax.text(0.5, 0.5, "No models found matching your filter",
ha='center', va='center', fontsize=14, color='gray')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.axis('off')
return fig, pd.DataFrame(columns=["LLM", "Hallucination %", "Answer %", "Avg Summary Words"])
fig = visualize_leaderboard(df)
return fig, df[["LLM", "Hallucination %", "Answer %", "Avg Summary Words"]]
with gr.Blocks(
title="Hughes Hallucination Evaluation Model (HHEM) Leaderboard",
theme=gr.themes.Soft(),
css="""
.header-logo {
display: flex;
align-items: center;
gap: 10px;
margin-bottom: 10px;
}
.header-logo img {
height: 40px;
}
footer { display: none !important; }
"""
) as demo:
gr.HTML(
'<div class="header-logo">'
'<img src="https://huggingface.co/spaces/vectara/README/resolve/main/Vectara-logo.png" alt="Vectara">'
'</div>'
)
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column(scale=3):
plot_output = gr.Plot(show_label=False)
with gr.Column(scale=1):
filter_input = gr.Textbox(
placeholder="Filter models...",
show_label=False,
value=""
)
high_ar_toggle = gr.Checkbox(
label="Only models with ≥95% answer rate",
value=False
)
size_filter = gr.Radio(
choices=["all", "small", "large"],
value="all",
label="Model size"
)
access_filter = gr.Radio(
choices=["all", "commercial", "open"],
value="all",
label="Model type"
)
with gr.Row():
table_output = gr.Dataframe(
label="Leaderboard",
interactive=False,
max_height=500
)
inputs = [filter_input, high_ar_toggle, size_filter, access_filter]
outputs = [plot_output, table_output]
# Load initial data on page load
demo.load(fn=leaderboard, inputs=inputs, outputs=outputs)
# Update on filter change or toggle change
filter_input.change(fn=leaderboard, inputs=inputs, outputs=outputs)
high_ar_toggle.change(fn=leaderboard, inputs=inputs, outputs=outputs)
size_filter.change(fn=leaderboard, inputs=inputs, outputs=outputs)
access_filter.change(fn=leaderboard, inputs=inputs, outputs=outputs)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860) |