import gradio as gr
import pandas as pd
import plotly.graph_objects as go

from app_utils import load_results, visualize_leaderboard, apply_data_slice, DATA_SLICE_MAP

results_df = load_results()

DESCRIPTION = """
# Hughes Hallucination Evaluation Model (HHEM) Leaderboard

Using [Vectara](https://vectara.com/)'s proprietary [Factual Consistency Evaluation Model](https://www.vectara.com/blog/hallucination-detection-commercial-vs-open-source-a-deep-dive),
this leaderboard evaluates how often an LLM hallucinates -- containing information not stated in the source document -- when summarizing a document.
For an LLM, its hallucination rate is defined as the ratio of summaries that hallucinate to the total number of summaries it generates.
For more details or to contribute, see [this Github repo](https://github.com/vectara/hallucination-leaderboard).
"""


def leaderboard(
    filter_models_by_name: str = "",
    high_ar_only: bool = False,
    size_filter: str = "all",
    access_filter: str = "all",
    data_slice: str = "Overall"
):
    """Filter and display the leaderboard."""
    df = results_df.copy()

    # Apply data slice first (recalculates metrics and re-sorts)
    df = apply_data_slice(df, data_slice)

    # Filter by answer rate if toggle is on
    if high_ar_only:
        df = df[df["Answer %"] >= 95]

    # Filter by model size
    if size_filter and size_filter != "all":
        df = df[df["Model Size"] == size_filter]

    # Filter by accessibility
    if access_filter and access_filter != "all":
        df = df[df["Accessibility"] == access_filter]

    # Filter by model name
    filter_models_by_name = filter_models_by_name.replace(",", ";").replace(" ", "")
    if len(filter_models_by_name) > 0 and "all" not in filter_models_by_name.lower():
        filter_list = [name.lower() for name in filter_models_by_name.split(";") if name]
        df = df[df["LLM_lower_case"].str.contains("|".join(filter_list), na=False)]

    if len(df) == 0:
        # Show "no results" message in the plot
        fig = go.Figure()
        fig.add_annotation(
            text="No models found matching your filter",
            xref="paper", yref="paper", x=0.5, y=0.5,
            showarrow=False, font=dict(size=14, color="gray")
        )
        fig.update_layout(
            xaxis=dict(visible=False), yaxis=dict(visible=False),
            height=400, margin=dict(l=50, r=50, t=50, b=50)
        )
        return fig, pd.DataFrame(columns=["LLM", "Hallucination %", "Answer %", "Avg Summary Words"])

    fig = visualize_leaderboard(df)
    return fig, df[["LLM", "Hallucination %", "Answer %", "Avg Summary Words"]]


with gr.Blocks(
    title="Hughes Hallucination Evaluation Model (HHEM) Leaderboard",
    theme=gr.themes.Soft(),
    css="""
    .header-logo {
        display: flex;
        align-items: center;
        gap: 10px;
        margin-bottom: 10px;
    }
    .header-logo img {
        height: 40px;
    }
    footer { display: none !important; }
    .modebar { display: none !important; }
    .horizontal-radio .wrap {
        display: flex !important;
        flex-direction: row !important;
        gap: 8px !important;
    }
    """
) as demo:
    gr.HTML(
        '<div class="header-logo">'
        '<img src="https://huggingface.co/spaces/vectara/README/resolve/main/Vectara-logo.png" alt="Vectara">'
        '</div>'
    )
    gr.Markdown(DESCRIPTION)

    with gr.Row():
        with gr.Column(scale=3):
            plot_output = gr.Plot(show_label=False)
        with gr.Column(scale=1):
            filter_input = gr.Textbox(
                placeholder="Filter models...",
                show_label=False,
                value=""
            )
            high_ar_toggle = gr.Checkbox(
                label="Only models with ≥95% answer rate",
                value=False
            )
            size_filter = gr.Radio(
                choices=["all", "small", "large"],
                value="all",
                label="Model size",
                elem_classes=["horizontal-radio"]
            )
            access_filter = gr.Radio(
                choices=["all", "commercial", "open"],
                value="all",
                label="Model type",
                elem_classes=["horizontal-radio"]
            )
            data_slice = gr.Dropdown(
                choices=list(DATA_SLICE_MAP.keys()),
                value="Overall",
                label="Data Slice"
            )

    with gr.Row():
        table_output = gr.Dataframe(
            label="Leaderboard",
            interactive=False,
            max_height=500
        )

    inputs = [filter_input, high_ar_toggle, size_filter, access_filter, data_slice]
    outputs = [plot_output, table_output]

    # Load initial data on page load
    demo.load(fn=leaderboard, inputs=inputs, outputs=outputs)

    # Update on filter change or toggle change
    filter_input.change(fn=leaderboard, inputs=inputs, outputs=outputs)
    high_ar_toggle.change(fn=leaderboard, inputs=inputs, outputs=outputs)
    size_filter.change(fn=leaderboard, inputs=inputs, outputs=outputs)
    access_filter.change(fn=leaderboard, inputs=inputs, outputs=outputs)
    data_slice.change(fn=leaderboard, inputs=inputs, outputs=outputs)


if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)