Spaces:

adpinzonp
/

FrontierBench

Sleeping

App Files Files Community

adpinzonp commited on Aug 6, 2025

Commit

7d31c00

verified ·

1 Parent(s): a987529

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +269 -0

app.py ADDED Viewed

	@@ -0,0 +1,269 @@

+import pandas as pd
+def sheet_to_dataframe(sheet_url):
+    """
+    Converts a public Google Sheet into a pandas DataFrame.
+    sheet_url: sheet URL ("https://docs.google.com/spreadsheets/d/ID/edit#gid=0")
+    Returns: pandas DataFrame
+    """
+    import re
+    m = re.search(r'/d/([a-zA-Z0-9-_]+)', sheet_url)
+    gid = re.search(r'gid=([0-9]+)', sheet_url)
+    if not m or not gid:
+        raise ValueError("Invalid Google Sheets URL")
+    sheet_id = m.group(1)
+    gid = gid.group(1)
+    # Build the CSV link
+    csv_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={gid}"
+    # Read the DataFrame
+    df = pd.read_csv(csv_url)
+    return df
+# ---------------- App code below ----------------
+import numpy as np
+import gradio as gr
+import plotly.graph_objects as go
+from sklearn.experimental import enable_iterative_imputer  # noqa: F401
+from sklearn.impute import IterativeImputer, SimpleImputer
+import warnings
+warnings.filterwarnings("ignore", category=FutureWarning)
+DEFAULT_SHEET_URL = "https://docs.google.com/spreadsheets/d/1ygw8nrqI-FdHzyQGczKR5n3t01d-9sxMB_KVoClhoAg/edit?gid=0#gid=0"
+def _parse_percent_value(v):
+    if v is None or (isinstance(v, float) and np.isnan(v)):
+        return np.nan
+    if isinstance(v, (int, float)):
+        return float(v)
+    s = str(v).strip()
+    if s == "":
+        return np.nan
+    # Handle NA-like tokens
+    if s.lower() in {"na", "n/a", "null", "none"}:
+        return np.nan
+    # Remove percent sign
+    s = s.replace("%", "").replace(",", "").strip()
+    # Handle dashes
+    if s in {"-", "–", "—"}:
+        return np.nan
+    try:
+        return float(s)
+    except Exception:
+        return np.nan
+def _split_columns(df):
+    """First 4 columns are fixed; rest are benchmarks."""
+    all_cols = list(df.columns)
+    if len(all_cols) < 4:
+        raise ValueError("The sheet must have at least the first four columns: Model, Company, Input price per 1MT, Output price per 1MT")
+    fixed = all_cols[:4]
+    benches = all_cols[4:]
+    return fixed, benches
+def _clean_benchmarks(df):
+    """Return numeric benchmark dataframe (0..100 scale if provided as %)."""
+    fixed, benches = _split_columns(df)
+    num = df.copy()
+    for c in benches:
+        num[c] = num[c].apply(_parse_percent_value)
+    return num, benches, fixed
+def _style_table(df_display, benches, cmap="RdYlGn", vmin=0.0, vmax=100.0, precision=1):
+    """Return an HTML string of a pandas Styler with background gradients on benchmark columns."""
+    styler = (
+        df_display.style
+        .format({c: f"{{:.{precision}f}}%" for c in benches}, na_rep="N/A")
+        .background_gradient(axis=None, subset=benches, cmap=cmap, vmin=vmin, vmax=vmax)
+        .set_table_styles(
+            [
+                {"selector": "th", "props": [("position", "sticky"), ("top", "0"), ("background", "#111"), ("color", "white"), ("z-index", "1")]},
+                {"selector": "table", "props": [("border-collapse", "collapse"), ("font-family", "Inter, Roboto, Arial, sans-serif")]},
+                {"selector": "td, th", "props": [("border", "1px solid #333"), ("padding", "6px 8px")]},
+                {"selector": "tbody tr:nth-child(odd)", "props": [("background-color", "#161616")]},
+                {"selector": "tbody tr:nth-child(even)", "props": [("background-color", "#0f0f0f")]},
+            ]
+        )
+        .set_properties(subset=df_display.columns[:4], **{"font-weight": "600"})
+    )
+    return styler.to_html()
+def _filter_rows(df_raw, df_num, benches, text_query, bench_choice, comparator, threshold):
+    mask = pd.Series(True, index=df_raw.index)
+    if text_query:
+        tq = str(text_query).strip().lower()
+        # Search in Model + Company
+        mc = (df_raw.iloc[:, 0].astype(str).str.lower().fillna("")
+              + " " +
+              df_raw.iloc[:, 1].astype(str).str.lower().fillna(""))
+        mask &= mc.str.contains(tq, na=False)
+    if bench_choice == "Any":
+        bench_vals = df_num[benches]
+        if comparator == "≥":
+            mask &= (bench_vals.ge(threshold)).any(axis=1).fillna(False)
+        else:
+            mask &= (bench_vals.le(threshold)).any(axis=1).fillna(False)
+    elif bench_choice and bench_choice in benches:
+        col_vals = df_num[bench_choice]
+        if comparator == "≥":
+            mask &= col_vals.ge(threshold).fillna(False)
+        else:
+            mask &= col_vals.le(threshold).fillna(False)
+    return df_raw.loc[mask].reset_index(drop=True), df_num.loc[mask].reset_index(drop=True)
+def _build_correlation_plot(df_num, benches):
+    if len(benches) == 0:
+        fig = go.Figure()
+        fig.update_layout(title="No benchmark columns found")
+        return fig
+    mat = df_num[benches].astype(float)
+    if mat.shape[1] == 1:
+        corr = pd.DataFrame([[1.0]], index=benches, columns=benches)
+    else:
+        corr = mat.corr(method="pearson")
+    fig = go.Figure(
+        data=go.Heatmap(
+            z=corr.values,
+            x=list(corr.columns),
+            y=list(corr.index),
+            colorscale="RdYlGn",
+            zmin=-1,
+            zmax=1,
+            colorbar=dict(title="ρ"),
+            hoverongaps=False,
+        )
+    )
+    fig.update_layout(
+        title="Correlation between benchmark variables",
+        xaxis_nticks=max(5, min(20, len(benches))),
+        yaxis_nticks=max(5, min(20, len(benches))),
+        margin=dict(l=60, r=20, t=60, b=60),
+        height=600,
+    )
+    return fig
+def fetch_and_prepare(url):
+    df_raw = sheet_to_dataframe(url)
+    df_num, benches, fixed = _clean_benchmarks(df_raw)
+    return df_raw, df_num, benches, fixed
+def refetch_all(t1_q, t1_bench, t1_op, t1_thr, t3_q, t3_bench, t3_op, t3_thr):
+    # Always re-fetch from the default sheet
+    df_raw, df_num, benches, _ = fetch_and_prepare(DEFAULT_SHEET_URL)
+    # Correlation
+    fig_corr = _build_correlation_plot(df_num, benches)
+    # Tab 1 initial render (with current filters)
+    df1_raw_f, df1_num_f = _filter_rows(df_raw, df_num, benches, t1_q, t1_bench, t1_op, t1_thr)
+    html_tab1 = _style_table(pd.concat([df1_raw_f.iloc[:, :4], df1_num_f[benches]], axis=1), benches)
+    # Imputation for Tab 3
+    bench_only = df_num[benches].astype(float)
+    if bench_only.shape[1] > 1:
+        imputer = IterativeImputer(random_state=0, sample_posterior=False, max_iter=15, initial_strategy="mean")
+        bench_imp = pd.DataFrame(imputer.fit_transform(bench_only), columns=benches)
+    else:
+        simp = SimpleImputer(strategy="mean")
+        bench_imp = pd.DataFrame(simp.fit_transform(bench_only), columns=benches)
+    # Tab 3 initial render (with current filters)
+    df3_raw_f, df3_num_f = _filter_rows(df_raw, bench_imp, benches, t3_q, t3_bench, t3_op, t3_thr)
+    html_tab3 = _style_table(pd.concat([df3_raw_f.iloc[:, :4], df3_num_f[benches]], axis=1), benches)
+    # Dropdown choices
+    bench_options = ["Any"] + benches
+    # Return UI updates and persistent states
+    return (
+        html_tab1,           # t1_html
+        fig_corr,            # corr_plot
+        html_tab3,           # t3_html
+        gr.update(choices=bench_options, value=t1_bench if t1_bench in bench_options else "Any"),
+        gr.update(choices=bench_options, value=t3_bench if t3_bench in bench_options else "Any"),
+        df_raw,              # s_df_raw
+        df_num,              # s_df_num
+        benches,             # s_benches
+        bench_imp            # s_bench_imp
+    )
+def filter_tab1(s_df_raw, s_df_num, s_benches, text_query, bench_choice, comparator, threshold):
+    df1_raw_f, df1_num_f = _filter_rows(s_df_raw, s_df_num, s_benches, text_query, bench_choice, comparator, threshold)
+    html_tab1 = _style_table(pd.concat([df1_raw_f.iloc[:, :4], df1_num_f[s_benches]], axis=1), s_benches)
+    return html_tab1
+def filter_tab3(s_df_raw, s_bench_imp, s_benches, text_query, bench_choice, comparator, threshold):
+    df3_raw_f, df3_num_f = _filter_rows(s_df_raw, s_bench_imp, s_benches, text_query, bench_choice, comparator, threshold)
+    html_tab3 = _style_table(pd.concat([df3_raw_f.iloc[:, :4], df3_num_f[s_benches]], axis=1), s_benches)
+    return html_tab3
+with gr.Blocks(css="""
+/* Make the HTML tables scrollable horizontally if wide */
+.table-wrap { overflow-x: auto; }
+""") as demo:
+    gr.Markdown("## LLM Benchmarks — Live from Google Sheets")
+    with gr.Row():
+        reload_btn = gr.Button("Reload", variant="primary", scale=1)
+    # States to cache the last fetched data for responsive filtering
+    s_df_raw = gr.State()
+    s_df_num = gr.State()
+    s_benches = gr.State()
+    s_bench_imp = gr.State()
+    with gr.Tabs():
+        with gr.Tab("Original table"):
+            with gr.Row():
+                t1_q = gr.Textbox(label="Filter: Model/Company contains", placeholder="e.g., llama", scale=2)
+                t1_bench = gr.Dropdown(choices=["Any"], value="Any", label="Benchmark", scale=1)
+                t1_op = gr.Radio(choices=["≥", "≤"], value="≥", label="Comparator", scale=1)
+                t1_thr = gr.Slider(minimum=0, maximum=100, value=0, step=1, label="Threshold (%)", scale=1)
+            t1_html = gr.HTML(elem_classes=["table-wrap"])
+        with gr.Tab("Correlation matrix"):
+            corr_plot = gr.Plot()
+        with gr.Tab("Imputed table"):
+            with gr.Row():
+                t3_q = gr.Textbox(label="Filter: Model/Company contains", placeholder="e.g., llama", scale=2)
+                t3_bench = gr.Dropdown(choices=["Any"], value="Any", label="Benchmark", scale=1)
+                t3_op = gr.Radio(choices=["≥", "≤"], value="≥", label="Comparator", scale=1)
+                t3_thr = gr.Slider(minimum=0, maximum=100, value=0, step=1, label="Threshold (%)", scale=1)
+            t3_html = gr.HTML(elem_classes=["table-wrap"])
+    # On load and on reload, re-fetch from Google Sheets and rebuild everything
+    args_reload = [t1_q, t1_bench, t1_op, t1_thr, t3_q, t3_bench, t3_op, t3_thr]
+    outs_reload = [t1_html, corr_plot, t3_html, t1_bench, t3_bench, s_df_raw, s_df_num, s_benches, s_bench_imp]
+    demo.load(refetch_all, inputs=args_reload, outputs=outs_reload)
+    reload_btn.click(refetch_all, inputs=args_reload, outputs=outs_reload)
+    # Live filtering without refetching
+    t1_q.change(filter_tab1, inputs=[s_df_raw, s_df_num, s_benches, t1_q, t1_bench, t1_op, t1_thr], outputs=[t1_html])
+    t1_bench.change(filter_tab1, inputs=[s_df_raw, s_df_num, s_benches, t1_q, t1_bench, t1_op, t1_thr], outputs=[t1_html])
+    t1_op.change(filter_tab1, inputs=[s_df_raw, s_df_num, s_benches, t1_q, t1_bench, t1_op, t1_thr], outputs=[t1_html])
+    t1_thr.change(filter_tab1, inputs=[s_df_raw, s_df_num, s_benches, t1_q, t1_bench, t1_op, t1_thr], outputs=[t1_html])
+    t3_q.change(filter_tab3, inputs=[s_df_raw, s_bench_imp, s_benches, t3_q, t3_bench, t3_op, t3_thr], outputs=[t3_html])
+    t3_bench.change(filter_tab3, inputs=[s_df_raw, s_bench_imp, s_benches, t3_q, t3_bench, t3_op, t3_thr], outputs=[t3_html])
+    t3_op.change(filter_tab3, inputs=[s_df_raw, s_bench_imp, s_benches, t3_q, t3_bench, t3_op, t3_thr], outputs=[t3_html])
+    t3_thr.change(filter_tab3, inputs=[s_df_raw, s_bench_imp, s_benches, t3_q, t3_bench, t3_op, t3_thr], outputs=[t3_html])
+if __name__ == "__main__":
+    demo.launch()