adpinzonp commited on
Commit
7d31c00
·
verified ·
1 Parent(s): a987529

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +269 -0
app.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def sheet_to_dataframe(sheet_url):
4
+ """
5
+ Converts a public Google Sheet into a pandas DataFrame.
6
+ sheet_url: sheet URL ("https://docs.google.com/spreadsheets/d/ID/edit#gid=0")
7
+ Returns: pandas DataFrame
8
+ """
9
+ import re
10
+ m = re.search(r'/d/([a-zA-Z0-9-_]+)', sheet_url)
11
+ gid = re.search(r'gid=([0-9]+)', sheet_url)
12
+ if not m or not gid:
13
+ raise ValueError("Invalid Google Sheets URL")
14
+ sheet_id = m.group(1)
15
+ gid = gid.group(1)
16
+ # Build the CSV link
17
+ csv_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={gid}"
18
+ # Read the DataFrame
19
+ df = pd.read_csv(csv_url)
20
+ return df
21
+
22
+ # ---------------- App code below ----------------
23
+ import numpy as np
24
+ import gradio as gr
25
+ import plotly.graph_objects as go
26
+ from sklearn.experimental import enable_iterative_imputer # noqa: F401
27
+ from sklearn.impute import IterativeImputer, SimpleImputer
28
+ import warnings
29
+
30
+ warnings.filterwarnings("ignore", category=FutureWarning)
31
+
32
+ DEFAULT_SHEET_URL = "https://docs.google.com/spreadsheets/d/1ygw8nrqI-FdHzyQGczKR5n3t01d-9sxMB_KVoClhoAg/edit?gid=0#gid=0"
33
+
34
+
35
+ def _parse_percent_value(v):
36
+ if v is None or (isinstance(v, float) and np.isnan(v)):
37
+ return np.nan
38
+ if isinstance(v, (int, float)):
39
+ return float(v)
40
+ s = str(v).strip()
41
+ if s == "":
42
+ return np.nan
43
+ # Handle NA-like tokens
44
+ if s.lower() in {"na", "n/a", "null", "none"}:
45
+ return np.nan
46
+ # Remove percent sign
47
+ s = s.replace("%", "").replace(",", "").strip()
48
+ # Handle dashes
49
+ if s in {"-", "–", "—"}:
50
+ return np.nan
51
+ try:
52
+ return float(s)
53
+ except Exception:
54
+ return np.nan
55
+
56
+
57
+ def _split_columns(df):
58
+ """First 4 columns are fixed; rest are benchmarks."""
59
+ all_cols = list(df.columns)
60
+ if len(all_cols) < 4:
61
+ raise ValueError("The sheet must have at least the first four columns: Model, Company, Input price per 1MT, Output price per 1MT")
62
+ fixed = all_cols[:4]
63
+ benches = all_cols[4:]
64
+ return fixed, benches
65
+
66
+
67
+ def _clean_benchmarks(df):
68
+ """Return numeric benchmark dataframe (0..100 scale if provided as %)."""
69
+ fixed, benches = _split_columns(df)
70
+ num = df.copy()
71
+ for c in benches:
72
+ num[c] = num[c].apply(_parse_percent_value)
73
+ return num, benches, fixed
74
+
75
+
76
+ def _style_table(df_display, benches, cmap="RdYlGn", vmin=0.0, vmax=100.0, precision=1):
77
+ """Return an HTML string of a pandas Styler with background gradients on benchmark columns."""
78
+ styler = (
79
+ df_display.style
80
+ .format({c: f"{{:.{precision}f}}%" for c in benches}, na_rep="N/A")
81
+ .background_gradient(axis=None, subset=benches, cmap=cmap, vmin=vmin, vmax=vmax)
82
+ .set_table_styles(
83
+ [
84
+ {"selector": "th", "props": [("position", "sticky"), ("top", "0"), ("background", "#111"), ("color", "white"), ("z-index", "1")]},
85
+ {"selector": "table", "props": [("border-collapse", "collapse"), ("font-family", "Inter, Roboto, Arial, sans-serif")]},
86
+ {"selector": "td, th", "props": [("border", "1px solid #333"), ("padding", "6px 8px")]},
87
+ {"selector": "tbody tr:nth-child(odd)", "props": [("background-color", "#161616")]},
88
+ {"selector": "tbody tr:nth-child(even)", "props": [("background-color", "#0f0f0f")]},
89
+ ]
90
+ )
91
+ .set_properties(subset=df_display.columns[:4], **{"font-weight": "600"})
92
+ )
93
+ return styler.to_html()
94
+
95
+
96
+ def _filter_rows(df_raw, df_num, benches, text_query, bench_choice, comparator, threshold):
97
+ mask = pd.Series(True, index=df_raw.index)
98
+ if text_query:
99
+ tq = str(text_query).strip().lower()
100
+ # Search in Model + Company
101
+ mc = (df_raw.iloc[:, 0].astype(str).str.lower().fillna("")
102
+ + " " +
103
+ df_raw.iloc[:, 1].astype(str).str.lower().fillna(""))
104
+ mask &= mc.str.contains(tq, na=False)
105
+
106
+ if bench_choice == "Any":
107
+ bench_vals = df_num[benches]
108
+ if comparator == "≥":
109
+ mask &= (bench_vals.ge(threshold)).any(axis=1).fillna(False)
110
+ else:
111
+ mask &= (bench_vals.le(threshold)).any(axis=1).fillna(False)
112
+ elif bench_choice and bench_choice in benches:
113
+ col_vals = df_num[bench_choice]
114
+ if comparator == "≥":
115
+ mask &= col_vals.ge(threshold).fillna(False)
116
+ else:
117
+ mask &= col_vals.le(threshold).fillna(False)
118
+
119
+ return df_raw.loc[mask].reset_index(drop=True), df_num.loc[mask].reset_index(drop=True)
120
+
121
+
122
+ def _build_correlation_plot(df_num, benches):
123
+ if len(benches) == 0:
124
+ fig = go.Figure()
125
+ fig.update_layout(title="No benchmark columns found")
126
+ return fig
127
+
128
+ mat = df_num[benches].astype(float)
129
+ if mat.shape[1] == 1:
130
+ corr = pd.DataFrame([[1.0]], index=benches, columns=benches)
131
+ else:
132
+ corr = mat.corr(method="pearson")
133
+
134
+ fig = go.Figure(
135
+ data=go.Heatmap(
136
+ z=corr.values,
137
+ x=list(corr.columns),
138
+ y=list(corr.index),
139
+ colorscale="RdYlGn",
140
+ zmin=-1,
141
+ zmax=1,
142
+ colorbar=dict(title="ρ"),
143
+ hoverongaps=False,
144
+ )
145
+ )
146
+ fig.update_layout(
147
+ title="Correlation between benchmark variables",
148
+ xaxis_nticks=max(5, min(20, len(benches))),
149
+ yaxis_nticks=max(5, min(20, len(benches))),
150
+ margin=dict(l=60, r=20, t=60, b=60),
151
+ height=600,
152
+ )
153
+ return fig
154
+
155
+
156
+ def fetch_and_prepare(url):
157
+ df_raw = sheet_to_dataframe(url)
158
+ df_num, benches, fixed = _clean_benchmarks(df_raw)
159
+ return df_raw, df_num, benches, fixed
160
+
161
+
162
+ def refetch_all(t1_q, t1_bench, t1_op, t1_thr, t3_q, t3_bench, t3_op, t3_thr):
163
+ # Always re-fetch from the default sheet
164
+ df_raw, df_num, benches, _ = fetch_and_prepare(DEFAULT_SHEET_URL)
165
+
166
+ # Correlation
167
+ fig_corr = _build_correlation_plot(df_num, benches)
168
+
169
+ # Tab 1 initial render (with current filters)
170
+ df1_raw_f, df1_num_f = _filter_rows(df_raw, df_num, benches, t1_q, t1_bench, t1_op, t1_thr)
171
+ html_tab1 = _style_table(pd.concat([df1_raw_f.iloc[:, :4], df1_num_f[benches]], axis=1), benches)
172
+
173
+ # Imputation for Tab 3
174
+ bench_only = df_num[benches].astype(float)
175
+ if bench_only.shape[1] > 1:
176
+ imputer = IterativeImputer(random_state=0, sample_posterior=False, max_iter=15, initial_strategy="mean")
177
+ bench_imp = pd.DataFrame(imputer.fit_transform(bench_only), columns=benches)
178
+ else:
179
+ simp = SimpleImputer(strategy="mean")
180
+ bench_imp = pd.DataFrame(simp.fit_transform(bench_only), columns=benches)
181
+
182
+ # Tab 3 initial render (with current filters)
183
+ df3_raw_f, df3_num_f = _filter_rows(df_raw, bench_imp, benches, t3_q, t3_bench, t3_op, t3_thr)
184
+ html_tab3 = _style_table(pd.concat([df3_raw_f.iloc[:, :4], df3_num_f[benches]], axis=1), benches)
185
+
186
+ # Dropdown choices
187
+ bench_options = ["Any"] + benches
188
+
189
+ # Return UI updates and persistent states
190
+ return (
191
+ html_tab1, # t1_html
192
+ fig_corr, # corr_plot
193
+ html_tab3, # t3_html
194
+ gr.update(choices=bench_options, value=t1_bench if t1_bench in bench_options else "Any"),
195
+ gr.update(choices=bench_options, value=t3_bench if t3_bench in bench_options else "Any"),
196
+ df_raw, # s_df_raw
197
+ df_num, # s_df_num
198
+ benches, # s_benches
199
+ bench_imp # s_bench_imp
200
+ )
201
+
202
+
203
+ def filter_tab1(s_df_raw, s_df_num, s_benches, text_query, bench_choice, comparator, threshold):
204
+ df1_raw_f, df1_num_f = _filter_rows(s_df_raw, s_df_num, s_benches, text_query, bench_choice, comparator, threshold)
205
+ html_tab1 = _style_table(pd.concat([df1_raw_f.iloc[:, :4], df1_num_f[s_benches]], axis=1), s_benches)
206
+ return html_tab1
207
+
208
+
209
+ def filter_tab3(s_df_raw, s_bench_imp, s_benches, text_query, bench_choice, comparator, threshold):
210
+ df3_raw_f, df3_num_f = _filter_rows(s_df_raw, s_bench_imp, s_benches, text_query, bench_choice, comparator, threshold)
211
+ html_tab3 = _style_table(pd.concat([df3_raw_f.iloc[:, :4], df3_num_f[s_benches]], axis=1), s_benches)
212
+ return html_tab3
213
+
214
+
215
+ with gr.Blocks(css="""
216
+ /* Make the HTML tables scrollable horizontally if wide */
217
+ .table-wrap { overflow-x: auto; }
218
+ """) as demo:
219
+ gr.Markdown("## LLM Benchmarks — Live from Google Sheets")
220
+
221
+ with gr.Row():
222
+ reload_btn = gr.Button("Reload", variant="primary", scale=1)
223
+
224
+ # States to cache the last fetched data for responsive filtering
225
+ s_df_raw = gr.State()
226
+ s_df_num = gr.State()
227
+ s_benches = gr.State()
228
+ s_bench_imp = gr.State()
229
+
230
+ with gr.Tabs():
231
+ with gr.Tab("Original table"):
232
+ with gr.Row():
233
+ t1_q = gr.Textbox(label="Filter: Model/Company contains", placeholder="e.g., llama", scale=2)
234
+ t1_bench = gr.Dropdown(choices=["Any"], value="Any", label="Benchmark", scale=1)
235
+ t1_op = gr.Radio(choices=["≥", "≤"], value="≥", label="Comparator", scale=1)
236
+ t1_thr = gr.Slider(minimum=0, maximum=100, value=0, step=1, label="Threshold (%)", scale=1)
237
+ t1_html = gr.HTML(elem_classes=["table-wrap"])
238
+
239
+ with gr.Tab("Correlation matrix"):
240
+ corr_plot = gr.Plot()
241
+
242
+ with gr.Tab("Imputed table"):
243
+ with gr.Row():
244
+ t3_q = gr.Textbox(label="Filter: Model/Company contains", placeholder="e.g., llama", scale=2)
245
+ t3_bench = gr.Dropdown(choices=["Any"], value="Any", label="Benchmark", scale=1)
246
+ t3_op = gr.Radio(choices=["≥", "≤"], value="≥", label="Comparator", scale=1)
247
+ t3_thr = gr.Slider(minimum=0, maximum=100, value=0, step=1, label="Threshold (%)", scale=1)
248
+ t3_html = gr.HTML(elem_classes=["table-wrap"])
249
+
250
+ # On load and on reload, re-fetch from Google Sheets and rebuild everything
251
+ args_reload = [t1_q, t1_bench, t1_op, t1_thr, t3_q, t3_bench, t3_op, t3_thr]
252
+ outs_reload = [t1_html, corr_plot, t3_html, t1_bench, t3_bench, s_df_raw, s_df_num, s_benches, s_bench_imp]
253
+
254
+ demo.load(refetch_all, inputs=args_reload, outputs=outs_reload)
255
+ reload_btn.click(refetch_all, inputs=args_reload, outputs=outs_reload)
256
+
257
+ # Live filtering without refetching
258
+ t1_q.change(filter_tab1, inputs=[s_df_raw, s_df_num, s_benches, t1_q, t1_bench, t1_op, t1_thr], outputs=[t1_html])
259
+ t1_bench.change(filter_tab1, inputs=[s_df_raw, s_df_num, s_benches, t1_q, t1_bench, t1_op, t1_thr], outputs=[t1_html])
260
+ t1_op.change(filter_tab1, inputs=[s_df_raw, s_df_num, s_benches, t1_q, t1_bench, t1_op, t1_thr], outputs=[t1_html])
261
+ t1_thr.change(filter_tab1, inputs=[s_df_raw, s_df_num, s_benches, t1_q, t1_bench, t1_op, t1_thr], outputs=[t1_html])
262
+
263
+ t3_q.change(filter_tab3, inputs=[s_df_raw, s_bench_imp, s_benches, t3_q, t3_bench, t3_op, t3_thr], outputs=[t3_html])
264
+ t3_bench.change(filter_tab3, inputs=[s_df_raw, s_bench_imp, s_benches, t3_q, t3_bench, t3_op, t3_thr], outputs=[t3_html])
265
+ t3_op.change(filter_tab3, inputs=[s_df_raw, s_bench_imp, s_benches, t3_q, t3_bench, t3_op, t3_thr], outputs=[t3_html])
266
+ t3_thr.change(filter_tab3, inputs=[s_df_raw, s_bench_imp, s_benches, t3_q, t3_bench, t3_op, t3_thr], outputs=[t3_html])
267
+
268
+ if __name__ == "__main__":
269
+ demo.launch()