Spaces:
Running
Running
Commit
·
9702a67
1
Parent(s):
616bf01
support dark mode
Browse files
app.py
CHANGED
|
@@ -7,18 +7,17 @@ from dotenv import load_dotenv
|
|
| 7 |
from matplotlib.colors import LinearSegmentedColormap
|
| 8 |
import plotly.express as px
|
| 9 |
import plotly.graph_objects as go
|
| 10 |
-
from sklearn.linear_model import LinearRegression
|
| 11 |
import numpy as np
|
| 12 |
from huggingface_hub import HfApi
|
| 13 |
from huggingface_hub.hf_api import HTTPError
|
| 14 |
from huggingface_hub.utils import GatedRepoError
|
| 15 |
from gradio_rangeslider import RangeSlider
|
| 16 |
import datetime
|
| 17 |
-
|
| 18 |
|
| 19 |
load_dotenv()
|
| 20 |
webhook_url = os.environ.get("WEBHOOK_URL")
|
| 21 |
-
|
| 22 |
file_name_list = [
|
| 23 |
"14b",
|
| 24 |
"9b",
|
|
@@ -27,19 +26,16 @@ file_name_list = [
|
|
| 27 |
"1b5",
|
| 28 |
"other",
|
| 29 |
]
|
| 30 |
-
|
| 31 |
sheet_name_list = [
|
| 32 |
"cr",
|
| 33 |
"bpc",
|
| 34 |
"bpb",
|
| 35 |
]
|
| 36 |
-
|
| 37 |
metric_list = [
|
| 38 |
"Compression Rate (%)",
|
| 39 |
"Bits Per Character (BPC)",
|
| 40 |
"Bits Per Byte (BPB)",
|
| 41 |
]
|
| 42 |
-
|
| 43 |
model_size_list = [
|
| 44 |
"~14B",
|
| 45 |
"~9B",
|
|
@@ -48,13 +44,11 @@ model_size_list = [
|
|
| 48 |
"~1.5B",
|
| 49 |
"Other",
|
| 50 |
]
|
| 51 |
-
|
| 52 |
metric_to_sheet = {
|
| 53 |
"Compression Rate (%)": "cr",
|
| 54 |
"Bits Per Character (BPC)": "bpc",
|
| 55 |
"Bits Per Byte (BPB)": "bpb",
|
| 56 |
}
|
| 57 |
-
|
| 58 |
model_size_to_file_name = {
|
| 59 |
"~14B": "14b",
|
| 60 |
"~9B": "9b",
|
|
@@ -68,27 +62,21 @@ def read_about_md():
|
|
| 68 |
with open('about.md', 'r', encoding='utf-8') as f:
|
| 69 |
return f.read()
|
| 70 |
|
| 71 |
-
|
| 72 |
def rename_columns(df):
|
| 73 |
df.columns = [col.rsplit("_", maxsplit=1)[0] for col in df.columns]
|
| 74 |
return df
|
| 75 |
|
| 76 |
-
|
| 77 |
def get_folders_matching_format(directory):
|
| 78 |
pattern = re.compile(r"^\d{4}-\d{2}$")
|
| 79 |
folders = []
|
| 80 |
-
|
| 81 |
if not os.path.exists(directory):
|
| 82 |
return folders
|
| 83 |
-
|
| 84 |
for item in os.listdir(directory):
|
| 85 |
full_path = os.path.join(directory, item)
|
| 86 |
if os.path.isdir(full_path) and pattern.match(item):
|
| 87 |
folders.append(full_path)
|
| 88 |
-
|
| 89 |
return folders
|
| 90 |
|
| 91 |
-
|
| 92 |
def get_unique_column_names(data=None):
|
| 93 |
return [
|
| 94 |
"ao3_\u200benglish",
|
|
@@ -100,74 +88,63 @@ def get_unique_column_names(data=None):
|
|
| 100 |
"github_\u200bpython",
|
| 101 |
]
|
| 102 |
|
| 103 |
-
|
| 104 |
def color_cell(value):
|
| 105 |
return "background-color: #fffdd0" if pd.notna(value) else "default"
|
| 106 |
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
size_range:
|
| 115 |
-
midpoint: float = 0.5,
|
| 116 |
-
sort_by: str = "Average (lower=better)",
|
| 117 |
-
ascending: bool = True,
|
| 118 |
-
):
|
| 119 |
-
print(
|
| 120 |
-
f"Updating - time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}, period: {period}, models: {models_size}, metric: {metric}, visible_columns: {visible_columns}, color_columns: {color_columns}, size_range: {size_range}, sort_by: {sort_by}, ascending: {ascending}\n"
|
| 121 |
-
)
|
| 122 |
|
| 123 |
if not models_size:
|
| 124 |
return "No data available for the selected models and period."
|
| 125 |
-
|
| 126 |
-
|
| 127 |
target_period_data = all_data[period]
|
| 128 |
target_file_name = [model_size_to_file_name[model] for model in models_size]
|
| 129 |
sheet_name = metric_to_sheet[metric]
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
combined_data = pd.concat(
|
| 133 |
-
[df.dropna(axis=1, how="all") for df in [target_period_data[file_name][sheet_name] for file_name in target_file_name]], axis=0
|
| 134 |
-
)
|
| 135 |
if len(combined_data) == 0:
|
| 136 |
return "No data available for the selected models and period."
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
# Filter models based on the size range
|
| 140 |
combined_data = combined_data[combined_data["Parameters Count (B)"].between(size_range[0], size_range[1])]
|
| 141 |
combined_data.reset_index(drop=True, inplace=True)
|
|
|
|
| 142 |
if len(combined_data) == 0:
|
| 143 |
return "No data available for the selected models and period."
|
| 144 |
-
|
| 145 |
-
|
| 146 |
combined_data["Name"] = combined_data["Name"].apply(lambda x: x.replace(".pth", ""))
|
| 147 |
-
|
| 148 |
ordered_columns = get_unique_column_names()
|
| 149 |
relevant_columns = [col for col in ordered_columns if col in visible_columns and col not in ["Name", "Parameters Count (B)", "Average (The lower the better)"]]
|
| 150 |
|
| 151 |
-
if len(combined_data) > 0:
|
| 152 |
combined_data["Average (The lower the better)"] = round(combined_data[relevant_columns].mean(axis=1), 3)
|
| 153 |
-
|
| 154 |
-
combined_data = combined_data.rename(columns={"Average (The lower the better)": "Average (lower=better)"})
|
| 155 |
sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
|
| 156 |
-
|
| 157 |
-
filtered_data = sorted_data[
|
| 158 |
filtered_data.columns = [col.replace("_", " ") for col in filtered_data.columns]
|
| 159 |
-
|
| 160 |
formatter = {col: "{:.3f}" for col in filtered_data.columns if filtered_data[col].dtype in ["float64", "float32"]}
|
| 161 |
-
|
| 162 |
-
#
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
for column in filtered_data.columns:
|
| 168 |
-
if column in ["Name", "Params (B)"]:
|
| 169 |
-
|
| 170 |
-
col_values = filtered_data[column]
|
| 171 |
if len(col_values) > 1:
|
| 172 |
sorted_values = np.sort(col_values)
|
| 173 |
vmin[column] = sorted_values.min()
|
|
@@ -175,99 +152,64 @@ def update_table(
|
|
| 175 |
idx = int(len(sorted_values) * midpoint)
|
| 176 |
vmid[column] = sorted_values[idx]
|
| 177 |
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
def normalize(x):
|
| 183 |
-
if x
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
else:
|
| 186 |
-
return 0.5 + 0.5 * (x -
|
| 187 |
-
|
| 188 |
normed = series.apply(normalize)
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
target_color_columns = []
|
| 193 |
-
if "Average" in color_columns:
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
for column in target_color_columns:
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
vmax=vmax[column],
|
| 206 |
-
vmid=vmid[column],
|
| 207 |
-
subset=[column],
|
| 208 |
-
)
|
| 209 |
-
|
| 210 |
-
# return styler
|
| 211 |
styler = styler.hide(axis="index")
|
| 212 |
-
|
| 213 |
widths = [300, 150, 150, 100, 100, 100, 100, 100, 100, 100, 100]
|
|
|
|
| 214 |
table_styles = []
|
| 215 |
-
|
|
|
|
| 216 |
for i, w in enumerate(widths):
|
| 217 |
-
table_styles.append(
|
| 218 |
-
{
|
| 219 |
-
"selector": "th",
|
| 220 |
-
"props": [
|
| 221 |
-
("background-color", "#f5f5f5"),
|
| 222 |
-
("padding", "8px"),
|
| 223 |
-
("font-weight", "bold"),
|
| 224 |
-
],
|
| 225 |
-
}
|
| 226 |
-
)
|
| 227 |
-
table_styles.append(
|
| 228 |
-
{
|
| 229 |
-
"selector": f"th.col{i}",
|
| 230 |
-
"props": [
|
| 231 |
-
("min-width", f"{w}px"),
|
| 232 |
-
("max-width", f"{w}px"),
|
| 233 |
-
("text-align", "center"),
|
| 234 |
-
("border", "1px solid #dddddd"),
|
| 235 |
-
],
|
| 236 |
-
}
|
| 237 |
-
)
|
| 238 |
-
table_styles.append(
|
| 239 |
-
{
|
| 240 |
-
"selector": f"td.col{i}",
|
| 241 |
-
"props": [
|
| 242 |
-
("min-width", f"{w}px"),
|
| 243 |
-
("max-width", f"{w}px"),
|
| 244 |
-
("text-align", "center"),
|
| 245 |
-
("border", "1px solid #dddddd"),
|
| 246 |
-
],
|
| 247 |
-
}
|
| 248 |
-
)
|
| 249 |
-
|
| 250 |
-
table_styles.append(
|
| 251 |
-
{
|
| 252 |
-
"selector": "table",
|
| 253 |
-
"props": [
|
| 254 |
-
("border-collapse", "collapse"),
|
| 255 |
-
("border", "1px solid #dddddd"),
|
| 256 |
-
],
|
| 257 |
-
}
|
| 258 |
-
)
|
| 259 |
-
|
| 260 |
styler = styler.set_table_styles(table_styles)
|
| 261 |
-
|
| 262 |
-
html_output = styler.to_html()
|
| 263 |
-
return html_output
|
| 264 |
-
|
| 265 |
|
| 266 |
def create_world_languages_gdp_chart():
|
| 267 |
languages = ["English", "Chinese", "Spanish", "Japanese", "German", "French", "Arabic", "Italian", "Portuguese", "Korean", "Other"]
|
| 268 |
shares = [27, 18, 8, 6, 5, 4, 3, 2, 2, 2, 23]
|
| 269 |
colors = ["#FF7F7F", "#FFA07A", "#FFDB58", "#90EE90", "#98FB98", "#87CEFA", "#B0C4DE", "#DDA0DD", "#D8BFD8", "#F0E68C", "#E0FFFF"]
|
| 270 |
-
|
| 271 |
fig = go.Figure(
|
| 272 |
data=[
|
| 273 |
go.Pie(
|
|
@@ -282,7 +224,6 @@ def create_world_languages_gdp_chart():
|
|
| 282 |
)
|
| 283 |
]
|
| 284 |
)
|
| 285 |
-
|
| 286 |
fig.update_layout(
|
| 287 |
title={
|
| 288 |
"text": "World Languages by Share of Global GDP",
|
|
@@ -297,10 +238,8 @@ def create_world_languages_gdp_chart():
|
|
| 297 |
height=500,
|
| 298 |
margin=dict(t=80, b=20, l=20, r=20),
|
| 299 |
)
|
| 300 |
-
|
| 301 |
return fig
|
| 302 |
|
| 303 |
-
|
| 304 |
def check_model_exists(model_id):
|
| 305 |
api = HfApi()
|
| 306 |
try:
|
|
@@ -314,11 +253,9 @@ def check_model_exists(model_id):
|
|
| 314 |
else:
|
| 315 |
return "Error: " + str(e)
|
| 316 |
|
| 317 |
-
|
| 318 |
def submit_model(name):
|
| 319 |
if "Exists" not in check_model_exists(name):
|
| 320 |
return f"# ERROR: Model {name} does not exist on Hugging Face!"
|
| 321 |
-
|
| 322 |
try:
|
| 323 |
response = requests.post(webhook_url, json={"content": name})
|
| 324 |
if response.status_code == 200:
|
|
@@ -334,131 +271,20 @@ def submit_model(name):
|
|
| 334 |
except Exception as e:
|
| 335 |
print(e)
|
| 336 |
return "ERROR: Unexpected error. Please try again later."
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
# def create_scaling_plot(all_data, period):
|
| 340 |
-
# selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
|
| 341 |
-
# target_data = all_data[period]
|
| 342 |
-
# new_df = pd.DataFrame()
|
| 343 |
-
|
| 344 |
-
# for size in target_data.keys():
|
| 345 |
-
# new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
|
| 346 |
-
|
| 347 |
-
# new_df.rename(columns={"Parameters Count (B)": "Params(B)", "Average (The lower the better)": "Compression Rate (%)"}, inplace=True)
|
| 348 |
-
|
| 349 |
-
# new_df["Log Params(B)"] = np.log(new_df["Params(B)"])
|
| 350 |
-
# new_df["Log Compression Rate (%)"] = np.log(new_df["Compression Rate (%)"])
|
| 351 |
-
|
| 352 |
-
# fig = px.scatter(
|
| 353 |
-
# new_df,
|
| 354 |
-
# x="Log Params(B)",
|
| 355 |
-
# y="Log Compression Rate (%)",
|
| 356 |
-
# title="Compression Rate Scaling Law",
|
| 357 |
-
# hover_name="Name",
|
| 358 |
-
# custom_data=["Params(B)", "Compression Rate (%)"],
|
| 359 |
-
# )
|
| 360 |
-
|
| 361 |
-
# fig.update_traces(
|
| 362 |
-
# hovertemplate="<b>%{hovertext}</b><br>Params(B): %{customdata[0]:.2f} B<br>Compression Rate (%): %{customdata[1]:.2f}<extra></extra>"
|
| 363 |
-
# )
|
| 364 |
-
# fig.update_layout(
|
| 365 |
-
# width=800, # 设置图像宽度
|
| 366 |
-
# height=600, # 设置图像高度
|
| 367 |
-
# title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
|
| 368 |
-
# showlegend=True,
|
| 369 |
-
# xaxis={"showgrid": True, "zeroline": False, "type": "linear", "title": "Params(B)"}, # 确保坐标轴类型正确
|
| 370 |
-
# yaxis={"showgrid": True, "zeroline": False, "type": "linear", "title": "Compression Rate (%)", "autorange": "reversed"},
|
| 371 |
-
# )
|
| 372 |
-
|
| 373 |
-
# names_to_connect_dict = {
|
| 374 |
-
# "2024-05": ["Meta-Llama-3-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
|
| 375 |
-
# "2024-06": ["Meta-Llama-3-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
|
| 376 |
-
# "2024-07": ["Meta-Llama-3.1-8B", "stablelm-3b-4e1t", "Qwen2-1.5B", "TinyLlama-1.1B-intermediate-step-1431k-3T", "Mistral-Nemo-Base-2407"],
|
| 377 |
-
# "2024-08": [
|
| 378 |
-
# "Meta-Llama-3.1-8B",
|
| 379 |
-
# "Rene-v0.1-1.3b-pytorch",
|
| 380 |
-
# "stablelm-3b-4e1t",
|
| 381 |
-
# "Qwen2-1.5B",
|
| 382 |
-
# "TinyLlama-1.1B-intermediate-step-1431k-3T",
|
| 383 |
-
# "Mistral-Nemo-Base-2407",
|
| 384 |
-
# ],
|
| 385 |
-
# "2025-01": ["Qwen2.5-1.5B"],
|
| 386 |
-
# }
|
| 387 |
-
|
| 388 |
-
# names_to_connect = names_to_connect_dict.get(period, names_to_connect_dict["2024-08"])
|
| 389 |
-
|
| 390 |
-
# connection_points = new_df[new_df["Name"].isin(names_to_connect)]
|
| 391 |
-
# print(connection_points)
|
| 392 |
-
|
| 393 |
-
# new_df["Color"] = new_df["Name"].apply(lambda name: "#39C5BB" if name in names_to_connect else "#636efa")
|
| 394 |
-
|
| 395 |
-
# fig.update_traces(marker=dict(color=new_df["Color"]))
|
| 396 |
-
|
| 397 |
-
# X = connection_points["Log Params(B)"].values.reshape(-1, 1)
|
| 398 |
-
# y = connection_points["Log Compression Rate (%)"].values
|
| 399 |
-
# model = LinearRegression().fit(X, y)
|
| 400 |
-
|
| 401 |
-
# x_min = connection_points["Log Params(B)"].min()
|
| 402 |
-
# x_max = connection_points["Log Params(B)"].max()
|
| 403 |
-
# extended_x = np.linspace(x_min, x_max * 1.5, 100)
|
| 404 |
-
# extended_x_original = np.exp(extended_x)
|
| 405 |
-
# trend_line_y = model.predict(extended_x.reshape(-1, 1))
|
| 406 |
-
# trend_line_y_original = np.exp(trend_line_y)
|
| 407 |
-
|
| 408 |
-
# trend_line = go.Scatter(
|
| 409 |
-
# x=extended_x,
|
| 410 |
-
# y=trend_line_y,
|
| 411 |
-
# mode="lines",
|
| 412 |
-
# line=dict(color="skyblue", dash="dash"),
|
| 413 |
-
# name="Trend Line",
|
| 414 |
-
# hovertemplate="<b>Params(B):</b> %{customdata[0]:.2f}<br>" + "<b>Compression Rate (%):</b> %{customdata[1]:.2f}<extra></extra>",
|
| 415 |
-
# customdata=np.stack((extended_x_original, trend_line_y_original), axis=-1),
|
| 416 |
-
# )
|
| 417 |
-
|
| 418 |
-
# fig.add_trace(trend_line)
|
| 419 |
-
|
| 420 |
-
# x_min = new_df["Params(B)"].min()
|
| 421 |
-
# x_max = new_df["Params(B)"].max()
|
| 422 |
-
# x_tick_vals = np.geomspace(x_min, x_max, num=5)
|
| 423 |
-
# x_tick_text = [f"{val:.1f}" for val in x_tick_vals]
|
| 424 |
-
|
| 425 |
-
# y_min = new_df["Compression Rate (%)"].min()
|
| 426 |
-
# y_max = new_df["Compression Rate (%)"].max()
|
| 427 |
-
# y_tick_vals = np.geomspace(y_min, y_max, num=5)
|
| 428 |
-
# y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
|
| 429 |
-
|
| 430 |
-
# fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title="Params(B)")
|
| 431 |
-
# fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title="Compression Rate (%)", autorange="reversed")
|
| 432 |
-
|
| 433 |
-
# fig.update_layout(xaxis=dict(showgrid=True, zeroline=False), yaxis=dict(showgrid=True, zeroline=False))
|
| 434 |
-
|
| 435 |
-
# fig.update_traces(marker=dict(size=12))
|
| 436 |
-
|
| 437 |
-
# print(fig.layout)
|
| 438 |
-
|
| 439 |
-
# return fig
|
| 440 |
-
|
| 441 |
-
|
| 442 |
def create_scaling_plot(all_data, period):
|
| 443 |
selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
|
| 444 |
target_data = all_data[period]
|
| 445 |
new_df = pd.DataFrame()
|
| 446 |
-
|
| 447 |
for size in target_data.keys():
|
| 448 |
new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
|
| 449 |
-
|
| 450 |
x_values = new_df["Parameters Count (B)"].astype(float).tolist()
|
| 451 |
y_values = new_df["Average (The lower the better)"].astype(float).tolist()
|
| 452 |
names = new_df["Name"].tolist()
|
| 453 |
-
|
| 454 |
x_min, x_max = np.log10(min(x_values)), np.log10(max(x_values))
|
| 455 |
y_min, y_max = np.log10(min(y_values)), np.log10(max(y_values))
|
| 456 |
-
|
| 457 |
x_dtick = (x_max - x_min) / 4
|
| 458 |
y_dtick = (y_max - y_min) / 4
|
| 459 |
-
|
| 460 |
fig = go.Figure()
|
| 461 |
-
|
| 462 |
fig.add_trace(
|
| 463 |
go.Scatter(
|
| 464 |
x=x_values,
|
|
@@ -473,7 +299,6 @@ def create_scaling_plot(all_data, period):
|
|
| 473 |
),
|
| 474 |
)
|
| 475 |
)
|
| 476 |
-
|
| 477 |
fig.update_layout(
|
| 478 |
title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
|
| 479 |
width=800,
|
|
@@ -499,10 +324,8 @@ def create_scaling_plot(all_data, period):
|
|
| 499 |
autorange="reversed",
|
| 500 |
),
|
| 501 |
)
|
| 502 |
-
|
| 503 |
return fig
|
| 504 |
|
| 505 |
-
|
| 506 |
def read_all_data(folder_name):
|
| 507 |
all_data = {}
|
| 508 |
time_list = []
|
|
@@ -517,37 +340,18 @@ def read_all_data(folder_name):
|
|
| 517 |
for sheet_name in sheet_name_list:
|
| 518 |
final_file_name = os.path.join(folder, file_name)
|
| 519 |
all_data[folder_name][file_name][sheet_name] = rename_columns(pd.read_excel(final_file_name + ".xlsx", sheet_name=sheet_name))
|
| 520 |
-
|
| 521 |
return all_data, time_list
|
| 522 |
|
| 523 |
-
|
| 524 |
-
# def read_mutilange_data(folder_path='mutilang_data'):
|
| 525 |
-
# mutilange_data = {}
|
| 526 |
-
# excel_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.xlsx')]
|
| 527 |
-
# time_list = [file.split('.')[0] for file in excel_files]
|
| 528 |
-
# time_list = [x.split('\\')[-1] for x in time_list]
|
| 529 |
-
# for file_name in excel_files:
|
| 530 |
-
# if mutilange_data.get(file_name) is None:
|
| 531 |
-
# mutilange_data[file_name] = {}
|
| 532 |
-
# for sheet_name in sheet_name_list:
|
| 533 |
-
# mutilange_data[file_name][sheet_name] = rename_columns(
|
| 534 |
-
# pd.read_excel(file_name, sheet_name=sheet_name))
|
| 535 |
-
# return mutilange_data, time_list
|
| 536 |
-
|
| 537 |
-
|
| 538 |
all_data, time_list = read_all_data("data")
|
| 539 |
-
# muti_lang_data, muti_lang_time_list = read_mutilange_data()
|
| 540 |
-
|
| 541 |
time_list.sort()
|
| 542 |
last_period = time_list[-1]
|
| 543 |
-
|
| 544 |
initial_fig = create_scaling_plot(all_data, last_period)
|
| 545 |
initial_metric = metric_list[0]
|
| 546 |
initial_columns = get_unique_column_names(all_data)
|
| 547 |
initial_colors = ["Average", "Individual Tests"]
|
| 548 |
initial_size_range = [0, 40]
|
|
|
|
| 549 |
initial_data = update_table(last_period, model_size_list, initial_metric, initial_columns, initial_colors, initial_size_range)
|
| 550 |
-
|
| 551 |
css = """
|
| 552 |
.gradio-container {
|
| 553 |
max-width: 95% !important;
|
|
@@ -566,11 +370,11 @@ table {
|
|
| 566 |
width: 100% !important;
|
| 567 |
}
|
| 568 |
"""
|
| 569 |
-
|
| 570 |
TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">🏆 LLM Compression Leaderboard</span></h1>'
|
| 571 |
SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating won't work 🚫; only compute 💻, data 📊, and real innovation 🔥 can prevail!</span></h1>"
|
| 572 |
-
|
| 573 |
-
|
|
|
|
| 574 |
gr.HTML(TITLE_HTML)
|
| 575 |
gr.HTML(SUBTITLE_HTML)
|
| 576 |
with gr.Tabs() as tabs:
|
|
@@ -585,62 +389,32 @@ with gr.Blocks(css=css) as demo:
|
|
| 585 |
midpoint_slider = gr.Slider(minimum=0.1, maximum=0.9, value=0.5, step=0.01, label="Color Gradient Midpoint")
|
| 586 |
color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
|
| 587 |
colfilter = gr.CheckboxGroup(label="Data Source", choices=get_unique_column_names(all_data), value=initial_columns)
|
| 588 |
-
|
| 589 |
table = gr.HTML(initial_data)
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
)
|
| 596 |
-
model_selector.change(
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
)
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
|
| 604 |
-
outputs=table,
|
| 605 |
-
)
|
| 606 |
-
colfilter.change(
|
| 607 |
-
update_table,
|
| 608 |
-
inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
|
| 609 |
-
outputs=table,
|
| 610 |
-
)
|
| 611 |
-
color_selector.change(
|
| 612 |
-
update_table,
|
| 613 |
-
inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
|
| 614 |
-
outputs=table,
|
| 615 |
-
)
|
| 616 |
-
size_range_slider.change(
|
| 617 |
-
update_table,
|
| 618 |
-
inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
|
| 619 |
-
outputs=table,
|
| 620 |
-
)
|
| 621 |
-
midpoint_slider.change(
|
| 622 |
-
update_table,
|
| 623 |
-
inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider],
|
| 624 |
-
outputs=table,
|
| 625 |
-
)
|
| 626 |
-
|
| 627 |
with gr.Tab("🌍 MultiLang"):
|
| 628 |
gr.Markdown("## Coming soon...")
|
| 629 |
-
world_languages_plot = gr.Plot(create_world_languages_gdp_chart())
|
| 630 |
-
|
| 631 |
with gr.Tab("📈 Scaling Law"):
|
| 632 |
period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=last_period)
|
| 633 |
-
|
| 634 |
def update_plot(period):
|
| 635 |
new_fig = create_scaling_plot(all_data, period)
|
| 636 |
return new_fig
|
| 637 |
-
|
| 638 |
plot = gr.Plot(initial_fig)
|
| 639 |
period_selector_2.change(update_plot, inputs=period_selector_2, outputs=plot)
|
| 640 |
-
|
| 641 |
with gr.Tab("ℹ️ About"):
|
| 642 |
gr.Markdown(read_about_md())
|
| 643 |
-
|
| 644 |
with gr.Tab("🚀 Submit"):
|
| 645 |
with gr.Group():
|
| 646 |
with gr.Row():
|
|
@@ -648,5 +422,4 @@ with gr.Blocks(css=css) as demo:
|
|
| 648 |
submit = gr.Button("Submit", variant="primary", scale=0)
|
| 649 |
output = gr.Markdown("# Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
|
| 650 |
submit.click(fn=submit_model, inputs=model_name, outputs=output)
|
| 651 |
-
|
| 652 |
-
demo.launch(share=False)
|
|
|
|
| 7 |
from matplotlib.colors import LinearSegmentedColormap
|
| 8 |
import plotly.express as px
|
| 9 |
import plotly.graph_objects as go
|
| 10 |
+
# from sklearn.linear_model import LinearRegression
|
| 11 |
import numpy as np
|
| 12 |
from huggingface_hub import HfApi
|
| 13 |
from huggingface_hub.hf_api import HTTPError
|
| 14 |
from huggingface_hub.utils import GatedRepoError
|
| 15 |
from gradio_rangeslider import RangeSlider
|
| 16 |
import datetime
|
| 17 |
+
from gradio.themes.utils.colors import slate
|
| 18 |
|
| 19 |
load_dotenv()
|
| 20 |
webhook_url = os.environ.get("WEBHOOK_URL")
|
|
|
|
| 21 |
file_name_list = [
|
| 22 |
"14b",
|
| 23 |
"9b",
|
|
|
|
| 26 |
"1b5",
|
| 27 |
"other",
|
| 28 |
]
|
|
|
|
| 29 |
sheet_name_list = [
|
| 30 |
"cr",
|
| 31 |
"bpc",
|
| 32 |
"bpb",
|
| 33 |
]
|
|
|
|
| 34 |
metric_list = [
|
| 35 |
"Compression Rate (%)",
|
| 36 |
"Bits Per Character (BPC)",
|
| 37 |
"Bits Per Byte (BPB)",
|
| 38 |
]
|
|
|
|
| 39 |
model_size_list = [
|
| 40 |
"~14B",
|
| 41 |
"~9B",
|
|
|
|
| 44 |
"~1.5B",
|
| 45 |
"Other",
|
| 46 |
]
|
|
|
|
| 47 |
metric_to_sheet = {
|
| 48 |
"Compression Rate (%)": "cr",
|
| 49 |
"Bits Per Character (BPC)": "bpc",
|
| 50 |
"Bits Per Byte (BPB)": "bpb",
|
| 51 |
}
|
|
|
|
| 52 |
model_size_to_file_name = {
|
| 53 |
"~14B": "14b",
|
| 54 |
"~9B": "9b",
|
|
|
|
| 62 |
with open('about.md', 'r', encoding='utf-8') as f:
|
| 63 |
return f.read()
|
| 64 |
|
|
|
|
| 65 |
def rename_columns(df):
|
| 66 |
df.columns = [col.rsplit("_", maxsplit=1)[0] for col in df.columns]
|
| 67 |
return df
|
| 68 |
|
|
|
|
| 69 |
def get_folders_matching_format(directory):
|
| 70 |
pattern = re.compile(r"^\d{4}-\d{2}$")
|
| 71 |
folders = []
|
|
|
|
| 72 |
if not os.path.exists(directory):
|
| 73 |
return folders
|
|
|
|
| 74 |
for item in os.listdir(directory):
|
| 75 |
full_path = os.path.join(directory, item)
|
| 76 |
if os.path.isdir(full_path) and pattern.match(item):
|
| 77 |
folders.append(full_path)
|
|
|
|
| 78 |
return folders
|
| 79 |
|
|
|
|
| 80 |
def get_unique_column_names(data=None):
|
| 81 |
return [
|
| 82 |
"ao3_\u200benglish",
|
|
|
|
| 88 |
"github_\u200bpython",
|
| 89 |
]
|
| 90 |
|
|
|
|
| 91 |
def color_cell(value):
|
| 92 |
return "background-color: #fffdd0" if pd.notna(value) else "default"
|
| 93 |
|
| 94 |
+
# def color_cell_themed(value):
|
| 95 |
+
# return "background-color: rgba(255, 253, 208, 1.0)" if pd.notna(value) else "default"
|
| 96 |
|
| 97 |
+
# --- 核心改动点 1: 修改 update_table 函数 ---
|
| 98 |
+
# 添加 request: gr.Request = None 参数来接收主题模式信息
|
| 99 |
+
# 默认值为 None 是为了处理初始加载
|
| 100 |
+
def update_table(period: str, models_size: list, metric: str, visible_columns: list, color_columns: list, size_range: list, midpoint: float = 0.5, sort_by: str = "Average (lower=better)", ascending: bool = True, request: gr.Request = None):
|
| 101 |
+
# 打印日志并检查当前模式
|
| 102 |
+
is_dark_mode = request.is_dark if request else False
|
| 103 |
+
print(f"Updating - time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}, period: {period}, models: {models_size}, metric: {metric}, visible_columns: {visible_columns}, color_columns: {color_columns}, size_range: {size_range}, sort_by: {sort_by}, ascending: {ascending}, is_dark: {is_dark_mode}\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
if not models_size:
|
| 106 |
return "No data available for the selected models and period."
|
| 107 |
+
|
|
|
|
| 108 |
target_period_data = all_data[period]
|
| 109 |
target_file_name = [model_size_to_file_name[model] for model in models_size]
|
| 110 |
sheet_name = metric_to_sheet[metric]
|
| 111 |
+
combined_data = pd.concat([df.dropna(axis=1, how="all") for df in [target_period_data[file_name][sheet_name] for file_name in target_file_name]], axis=0)
|
| 112 |
+
|
|
|
|
|
|
|
|
|
|
| 113 |
if len(combined_data) == 0:
|
| 114 |
return "No data available for the selected models and period."
|
| 115 |
+
|
|
|
|
|
|
|
| 116 |
combined_data = combined_data[combined_data["Parameters Count (B)"].between(size_range[0], size_range[1])]
|
| 117 |
combined_data.reset_index(drop=True, inplace=True)
|
| 118 |
+
|
| 119 |
if len(combined_data) == 0:
|
| 120 |
return "No data available for the selected models and period."
|
| 121 |
+
|
|
|
|
| 122 |
combined_data["Name"] = combined_data["Name"].apply(lambda x: x.replace(".pth", ""))
|
|
|
|
| 123 |
ordered_columns = get_unique_column_names()
|
| 124 |
relevant_columns = [col for col in ordered_columns if col in visible_columns and col not in ["Name", "Parameters Count (B)", "Average (The lower the better)"]]
|
| 125 |
|
| 126 |
+
if len(combined_data) > 0 and relevant_columns:
|
| 127 |
combined_data["Average (The lower the better)"] = round(combined_data[relevant_columns].mean(axis=1), 3)
|
| 128 |
+
|
| 129 |
+
combined_data = combined_data.rename(columns={"Parameters Count (B)": "Params (B)", "Average (The lower the better)": "Average (lower=better)"})
|
| 130 |
sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
|
| 131 |
+
visible_columns_final = ["Name", "Params (B)", "Average (lower=better)"] + relevant_columns
|
| 132 |
+
filtered_data = sorted_data[visible_columns_final]
|
| 133 |
filtered_data.columns = [col.replace("_", " ") for col in filtered_data.columns]
|
|
|
|
| 134 |
formatter = {col: "{:.3f}" for col in filtered_data.columns if filtered_data[col].dtype in ["float64", "float32"]}
|
| 135 |
+
|
| 136 |
+
# --- 核心改动点 2: 根据主题模式选择不同的配色方案 ---
|
| 137 |
+
if is_dark_mode:
|
| 138 |
+
# 夜间模式配色 (绿 -> 深灰 -> 红)
|
| 139 |
+
colors = ["#2ca02c", "#2b2b2b", "#d62728"]
|
| 140 |
+
else:
|
| 141 |
+
# 日间模式配色 (绿 -> 白 -> 红)
|
| 142 |
+
colors = ["#63be7b", "#ffffff", "#f8696b"]
|
| 143 |
+
|
| 144 |
+
vmin, vmax, vmid = {}, {}, {}
|
| 145 |
for column in filtered_data.columns:
|
| 146 |
+
if column in ["Name", "Params (B)"]: continue
|
| 147 |
+
col_values = filtered_data[column].dropna()
|
|
|
|
| 148 |
if len(col_values) > 1:
|
| 149 |
sorted_values = np.sort(col_values)
|
| 150 |
vmin[column] = sorted_values.min()
|
|
|
|
| 152 |
idx = int(len(sorted_values) * midpoint)
|
| 153 |
vmid[column] = sorted_values[idx]
|
| 154 |
|
| 155 |
+
# --- 核心改动点 3: 修改样式函数以包含固定的黑色字体 ---
|
| 156 |
+
def custom_background_gradient(series, cmap, vmin_val, vmax_val, vmid_val):
|
| 157 |
+
if len(series) == 0: return series
|
|
|
|
| 158 |
def normalize(x):
|
| 159 |
+
if pd.isna(x): return 0.5 # Neutral for NaN
|
| 160 |
+
if vmid_val == vmin_val and x <= vmid_val: return 0.0
|
| 161 |
+
if vmid_val == vmax_val and x >= vmid_val: return 1.0
|
| 162 |
+
if vmid_val == vmin_val or vmid_val == vmax_val: return 0.5
|
| 163 |
+
if x <= vmid_val:
|
| 164 |
+
return 0.5 * (x - vmin_val) / (vmid_val - vmin_val)
|
| 165 |
else:
|
| 166 |
+
return 0.5 + 0.5 * (x - vmid_val) / (vmax_val - vmid_val)
|
|
|
|
| 167 |
normed = series.apply(normalize)
|
| 168 |
+
cmap_colors = [cmap(x) for x in normed]
|
| 169 |
+
# 在返回的CSS中同时设置 background-color 和 color
|
| 170 |
+
return [
|
| 171 |
+
"background-color: rgba({}, {}, {}, {}); color: black;".format(*[int(255 * c) for c in color[:3]], color[3])
|
| 172 |
+
for color in cmap_colors
|
| 173 |
+
]
|
| 174 |
|
| 175 |
target_color_columns = []
|
| 176 |
+
if "Average" in color_columns: target_color_columns.append("Average (lower=better)")
|
| 177 |
+
if "Individual Tests" in color_columns: target_color_columns.extend([col for col in filtered_data.columns if col not in ["Name", "Params (B)", "Average (lower=better)"]])
|
| 178 |
+
|
| 179 |
+
def color_params_column_dynamic(value):
|
| 180 |
+
if not pd.notna(value):
|
| 181 |
+
return "default"
|
| 182 |
+
|
| 183 |
+
# 2. 根据 is_dark_mode 返回不同的颜色
|
| 184 |
+
if is_dark_mode:
|
| 185 |
+
# 为夜间模式选择一个柔和、不刺眼的暗金色
|
| 186 |
+
# 字体颜色也设置为浅色以保证对比度
|
| 187 |
+
return "background-color: #4b4936; color: #f0f0f0;"
|
| 188 |
+
else:
|
| 189 |
+
# 为日间模式使用明亮的奶油色,字体为黑色
|
| 190 |
+
return "background-color: #fffdd0; color: black;"
|
| 191 |
+
|
| 192 |
+
styler = filtered_data.style.format(formatter).map(color_params_column_dynamic, subset=["Params (B)"])
|
| 193 |
for column in target_color_columns:
|
| 194 |
+
if column in vmin:
|
| 195 |
+
custom_cmap = LinearSegmentedColormap.from_list("custom_cmap", colors)
|
| 196 |
+
styler = styler.apply(custom_background_gradient, cmap=custom_cmap, vmin_val=vmin[column], vmax_val=vmax[column], vmid_val=vmid[column], subset=[column])
|
| 197 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
styler = styler.hide(axis="index")
|
|
|
|
| 199 |
widths = [300, 150, 150, 100, 100, 100, 100, 100, 100, 100, 100]
|
| 200 |
+
|
| 201 |
table_styles = []
|
| 202 |
+
table_styles.append({"selector": "th", "props": [("background-color", "var(--background-fill-secondary)"), ("color", "var(--body-text-color)"), ("padding", "8px"), ("font-weight", "bold")]})
|
| 203 |
+
table_styles.append({"selector": "table", "props": [("border-collapse", "collapse"), ("border", f"1px solid var(--border-color-primary)")]})
|
| 204 |
for i, w in enumerate(widths):
|
| 205 |
+
table_styles.append({"selector": f"th.col{i}, td.col{i}", "props": [("min-width", f"{w}px"), ("max-width", f"{w}px"), ("text-align", "center"), ("border", f"1px solid var(--border-color-primary)")]})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
styler = styler.set_table_styles(table_styles)
|
| 207 |
+
return styler.to_html()
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
def create_world_languages_gdp_chart():
|
| 210 |
languages = ["English", "Chinese", "Spanish", "Japanese", "German", "French", "Arabic", "Italian", "Portuguese", "Korean", "Other"]
|
| 211 |
shares = [27, 18, 8, 6, 5, 4, 3, 2, 2, 2, 23]
|
| 212 |
colors = ["#FF7F7F", "#FFA07A", "#FFDB58", "#90EE90", "#98FB98", "#87CEFA", "#B0C4DE", "#DDA0DD", "#D8BFD8", "#F0E68C", "#E0FFFF"]
|
|
|
|
| 213 |
fig = go.Figure(
|
| 214 |
data=[
|
| 215 |
go.Pie(
|
|
|
|
| 224 |
)
|
| 225 |
]
|
| 226 |
)
|
|
|
|
| 227 |
fig.update_layout(
|
| 228 |
title={
|
| 229 |
"text": "World Languages by Share of Global GDP",
|
|
|
|
| 238 |
height=500,
|
| 239 |
margin=dict(t=80, b=20, l=20, r=20),
|
| 240 |
)
|
|
|
|
| 241 |
return fig
|
| 242 |
|
|
|
|
| 243 |
def check_model_exists(model_id):
|
| 244 |
api = HfApi()
|
| 245 |
try:
|
|
|
|
| 253 |
else:
|
| 254 |
return "Error: " + str(e)
|
| 255 |
|
|
|
|
| 256 |
def submit_model(name):
|
| 257 |
if "Exists" not in check_model_exists(name):
|
| 258 |
return f"# ERROR: Model {name} does not exist on Hugging Face!"
|
|
|
|
| 259 |
try:
|
| 260 |
response = requests.post(webhook_url, json={"content": name})
|
| 261 |
if response.status_code == 200:
|
|
|
|
| 271 |
except Exception as e:
|
| 272 |
print(e)
|
| 273 |
return "ERROR: Unexpected error. Please try again later."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
def create_scaling_plot(all_data, period):
|
| 275 |
selected_columns = ["Name", "Parameters Count (B)", "Average (The lower the better)"]
|
| 276 |
target_data = all_data[period]
|
| 277 |
new_df = pd.DataFrame()
|
|
|
|
| 278 |
for size in target_data.keys():
|
| 279 |
new_df = pd.concat([new_df, target_data[size]["cr"].loc[:, selected_columns].dropna(axis=1, how="all")], axis=0)
|
|
|
|
| 280 |
x_values = new_df["Parameters Count (B)"].astype(float).tolist()
|
| 281 |
y_values = new_df["Average (The lower the better)"].astype(float).tolist()
|
| 282 |
names = new_df["Name"].tolist()
|
|
|
|
| 283 |
x_min, x_max = np.log10(min(x_values)), np.log10(max(x_values))
|
| 284 |
y_min, y_max = np.log10(min(y_values)), np.log10(max(y_values))
|
|
|
|
| 285 |
x_dtick = (x_max - x_min) / 4
|
| 286 |
y_dtick = (y_max - y_min) / 4
|
|
|
|
| 287 |
fig = go.Figure()
|
|
|
|
| 288 |
fig.add_trace(
|
| 289 |
go.Scatter(
|
| 290 |
x=x_values,
|
|
|
|
| 299 |
),
|
| 300 |
)
|
| 301 |
)
|
|
|
|
| 302 |
fig.update_layout(
|
| 303 |
title={"text": "Compression Rate Scaling Law", "x": 0.5, "xanchor": "center", "yanchor": "top"},
|
| 304 |
width=800,
|
|
|
|
| 324 |
autorange="reversed",
|
| 325 |
),
|
| 326 |
)
|
|
|
|
| 327 |
return fig
|
| 328 |
|
|
|
|
| 329 |
def read_all_data(folder_name):
|
| 330 |
all_data = {}
|
| 331 |
time_list = []
|
|
|
|
| 340 |
for sheet_name in sheet_name_list:
|
| 341 |
final_file_name = os.path.join(folder, file_name)
|
| 342 |
all_data[folder_name][file_name][sheet_name] = rename_columns(pd.read_excel(final_file_name + ".xlsx", sheet_name=sheet_name))
|
|
|
|
| 343 |
return all_data, time_list
|
| 344 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
all_data, time_list = read_all_data("data")
|
|
|
|
|
|
|
| 346 |
time_list.sort()
|
| 347 |
last_period = time_list[-1]
|
|
|
|
| 348 |
initial_fig = create_scaling_plot(all_data, last_period)
|
| 349 |
initial_metric = metric_list[0]
|
| 350 |
initial_columns = get_unique_column_names(all_data)
|
| 351 |
initial_colors = ["Average", "Individual Tests"]
|
| 352 |
initial_size_range = [0, 40]
|
| 353 |
+
# 初始调用 update_table 时,request 参数将为默认的 None
|
| 354 |
initial_data = update_table(last_period, model_size_list, initial_metric, initial_columns, initial_colors, initial_size_range)
|
|
|
|
| 355 |
css = """
|
| 356 |
.gradio-container {
|
| 357 |
max-width: 95% !important;
|
|
|
|
| 370 |
width: 100% !important;
|
| 371 |
}
|
| 372 |
"""
|
|
|
|
| 373 |
TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">🏆 LLM Compression Leaderboard</span></h1>'
|
| 374 |
SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating won't work 🚫; only compute 💻, data 📊, and real innovation 🔥 can prevail!</span></h1>"
|
| 375 |
+
# theme = gr.themes.Default(primary_hue=slate, secondary_hue=slate)
|
| 376 |
+
theme = gr.themes.Default()
|
| 377 |
+
with gr.Blocks(theme=theme, css=css) as demo:
|
| 378 |
gr.HTML(TITLE_HTML)
|
| 379 |
gr.HTML(SUBTITLE_HTML)
|
| 380 |
with gr.Tabs() as tabs:
|
|
|
|
| 389 |
midpoint_slider = gr.Slider(minimum=0.1, maximum=0.9, value=0.5, step=0.01, label="Color Gradient Midpoint")
|
| 390 |
color_selector = gr.CheckboxGroup(label="Colored Columns", choices=["Average", "Individual Tests"], value=initial_colors)
|
| 391 |
colfilter = gr.CheckboxGroup(label="Data Source", choices=get_unique_column_names(all_data), value=initial_columns)
|
|
|
|
| 392 |
table = gr.HTML(initial_data)
|
| 393 |
+
|
| 394 |
+
# --- 核心改动点 4: 更新所有 .change() 事件,添加 gr.Request() ---
|
| 395 |
+
# 定义共享的输入列表,避免重复
|
| 396 |
+
shared_inputs = [period_selector, model_selector, metric_selector, colfilter, color_selector, size_range_slider, midpoint_slider]
|
| 397 |
+
|
| 398 |
+
period_selector.change(update_table, inputs=shared_inputs, outputs=table)
|
| 399 |
+
model_selector.change(update_table, inputs=shared_inputs, outputs=table)
|
| 400 |
+
metric_selector.change(update_table, inputs=shared_inputs, outputs=table)
|
| 401 |
+
colfilter.change(update_table, inputs=shared_inputs, outputs=table)
|
| 402 |
+
color_selector.change(update_table, inputs=shared_inputs, outputs=table)
|
| 403 |
+
size_range_slider.change(update_table, inputs=shared_inputs, outputs=table)
|
| 404 |
+
midpoint_slider.change(update_table, inputs=shared_inputs, outputs=table)
|
| 405 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
with gr.Tab("🌍 MultiLang"):
|
| 407 |
gr.Markdown("## Coming soon...")
|
| 408 |
+
# world_languages_plot = gr.Plot(create_world_languages_gdp_chart())
|
|
|
|
| 409 |
with gr.Tab("📈 Scaling Law"):
|
| 410 |
period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=last_period)
|
|
|
|
| 411 |
def update_plot(period):
|
| 412 |
new_fig = create_scaling_plot(all_data, period)
|
| 413 |
return new_fig
|
|
|
|
| 414 |
plot = gr.Plot(initial_fig)
|
| 415 |
period_selector_2.change(update_plot, inputs=period_selector_2, outputs=plot)
|
|
|
|
| 416 |
with gr.Tab("ℹ️ About"):
|
| 417 |
gr.Markdown(read_about_md())
|
|
|
|
| 418 |
with gr.Tab("🚀 Submit"):
|
| 419 |
with gr.Group():
|
| 420 |
with gr.Row():
|
|
|
|
| 422 |
submit = gr.Button("Submit", variant="primary", scale=0)
|
| 423 |
output = gr.Markdown("# Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
|
| 424 |
submit.click(fn=submit_model, inputs=model_name, outputs=output)
|
| 425 |
+
demo.launch(share=False)
|
|
|