|
|
import pandas as pd |
|
|
from dash import html, dcc |
|
|
from dash_iconify import DashIconify |
|
|
import dash_mantine_components as dmc |
|
|
import base64 |
|
|
import countryflag |
|
|
|
|
|
button_style = { |
|
|
"display": "inline-block", |
|
|
"marginBottom": "10px", |
|
|
"marginRight": "15px", |
|
|
"marginTop": "30px", |
|
|
"padding": "6px 16px", |
|
|
"backgroundColor": "#082030", |
|
|
"color": "white", |
|
|
"borderRadius": "6px", |
|
|
"textDecoration": "none", |
|
|
"fontWeight": "bold", |
|
|
"fontSize": "14px", |
|
|
} |
|
|
|
|
|
company_icon_map = { |
|
|
"google": "../assets/icons/google.png", |
|
|
"distilbert": "../assets/images/hf.svg", |
|
|
"sentence-transformers": "../assets/images/hf.svg", |
|
|
"facebook": "../assets/icons/meta.png", |
|
|
"openai": "../assets/icons/openai.png", |
|
|
"amazon": "../assets/icons/amazon.png", |
|
|
"microsoft": "../assets/icons/microsoft.png", |
|
|
} |
|
|
|
|
|
country_emoji_fallback = { |
|
|
"User": "π€", |
|
|
"Organization": "π’", |
|
|
"Model": "π¦", |
|
|
} |
|
|
|
|
|
meta_cols_map = { |
|
|
"org_country_single": ["org_country_single", "total_downloads"], |
|
|
"author": [ |
|
|
"org_country_single", |
|
|
"author", |
|
|
"total_downloads", |
|
|
], |
|
|
"derived_author": [ |
|
|
"org_country_single", |
|
|
"derived_author", |
|
|
"total_downloads", |
|
|
], |
|
|
"model": [ |
|
|
"org_country_single", |
|
|
"author", |
|
|
"derived_author", |
|
|
"merged_modality", |
|
|
"total_downloads", |
|
|
], |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
def chip(text, bg_color="#F0F0F0"): |
|
|
return html.Span( |
|
|
text, |
|
|
style={ |
|
|
"backgroundColor": bg_color, |
|
|
"padding": "4px 10px", |
|
|
"borderRadius": "12px", |
|
|
"margin": "2px", |
|
|
"display": "inline-flex", |
|
|
"alignItems": "center", |
|
|
"fontSize": "14px", |
|
|
}, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def progress_bar(percent, bar_color="#AC482A"): |
|
|
return html.Div( |
|
|
style={ |
|
|
"position": "relative", |
|
|
"backgroundColor": "#E0E0E0", |
|
|
"borderRadius": "8px", |
|
|
"height": "20px", |
|
|
"width": "100%", |
|
|
"overflow": "hidden", |
|
|
}, |
|
|
children=[ |
|
|
html.Div( |
|
|
style={ |
|
|
"backgroundColor": bar_color, |
|
|
"width": f"{percent}%", |
|
|
"height": "100%", |
|
|
"borderRadius": "8px", |
|
|
"transition": "width 0.5s", |
|
|
} |
|
|
), |
|
|
html.Div( |
|
|
f"{percent:.1f}%", |
|
|
style={ |
|
|
"position": "absolute", |
|
|
"top": 0, |
|
|
"left": "50%", |
|
|
"transform": "translateX(-50%)", |
|
|
"color": "black", |
|
|
"fontWeight": "bold", |
|
|
"fontSize": "12px", |
|
|
"lineHeight": "20px", |
|
|
"textAlign": "center", |
|
|
}, |
|
|
), |
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def df_to_download_link(df, filename): |
|
|
csv_string = df.to_csv(index=False) |
|
|
b64 = base64.b64encode(csv_string.encode()).decode() |
|
|
return html.Div( |
|
|
html.A( |
|
|
children=dmc.ActionIcon( |
|
|
DashIconify(icon="mdi:download", width=24), |
|
|
size="lg", |
|
|
color="#082030", |
|
|
), |
|
|
id=f"download-{filename}", |
|
|
download=f"{filename}.csv", |
|
|
href=f"data:text/csv;base64,{b64}", |
|
|
target="_blank", |
|
|
title="Download CSV", |
|
|
style={ |
|
|
"padding": "6px 12px", |
|
|
"display": "inline-flex", |
|
|
"alignItems": "center", |
|
|
"justifyContent": "center", |
|
|
}, |
|
|
), |
|
|
style={"textAlign": "right"}, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def get_metadata_popover_content(icon, name, meta_type): |
|
|
popover_texts = { |
|
|
"country": f"Country: {name}", |
|
|
"author": f"Author/Organization: {name}", |
|
|
"downloads": f"Total downloads: {name}", |
|
|
"modality": f"Modality: {name}", |
|
|
} |
|
|
return popover_texts.get(meta_type, name) |
|
|
|
|
|
|
|
|
|
|
|
def chip_with_hovercard(text, bg_color="#F0F0F0", meta_type=None, icon=None): |
|
|
hovercard_content = get_metadata_popover_content(icon, text, meta_type) |
|
|
return dmc.HoverCard( |
|
|
width="auto", |
|
|
shadow="md", |
|
|
position="top", |
|
|
children=[ |
|
|
dmc.HoverCardTarget( |
|
|
html.Span( |
|
|
text, |
|
|
style={ |
|
|
"backgroundColor": bg_color, |
|
|
"padding": "4px 10px", |
|
|
"borderRadius": "12px", |
|
|
"margin": "2px", |
|
|
"display": "inline-flex", |
|
|
"alignItems": "center", |
|
|
"fontSize": "14px", |
|
|
"cursor": "pointer", |
|
|
"transition": "background-color 0.15s", |
|
|
}, |
|
|
|
|
|
className="chip-hover-darken" |
|
|
) |
|
|
), |
|
|
dmc.HoverCardDropdown(dmc.Text(hovercard_content, size="sm")), |
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def render_chips(metadata_list, chip_color): |
|
|
chips = [] |
|
|
for icon, name, meta_type in metadata_list: |
|
|
if isinstance(icon, str) and icon.endswith((".png", ".jpg", ".jpeg", ".svg")): |
|
|
chips.append( |
|
|
dmc.HoverCard( |
|
|
width=220, |
|
|
shadow="md", |
|
|
position="top", |
|
|
children=[ |
|
|
dmc.HoverCardTarget( |
|
|
html.Span( |
|
|
[ |
|
|
html.Img( |
|
|
src=icon, |
|
|
style={"height": "18px", "marginRight": "6px"}, |
|
|
), |
|
|
name, |
|
|
], |
|
|
style={ |
|
|
"backgroundColor": chip_color, |
|
|
"padding": "4px 10px", |
|
|
"borderRadius": "12px", |
|
|
"margin": "2px", |
|
|
"display": "inline-flex", |
|
|
"alignItems": "left", |
|
|
"fontSize": "14px", |
|
|
"cursor": "pointer", |
|
|
}, |
|
|
) |
|
|
), |
|
|
dmc.HoverCardDropdown( |
|
|
dmc.Text( |
|
|
get_metadata_popover_content(icon, name, meta_type), |
|
|
size="sm", |
|
|
) |
|
|
), |
|
|
], |
|
|
) |
|
|
) |
|
|
else: |
|
|
chips.append( |
|
|
chip_with_hovercard(f"{icon} {name}", chip_color, meta_type, icon) |
|
|
) |
|
|
return html.Div( |
|
|
chips, style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"} |
|
|
) |
|
|
|
|
|
|
|
|
def render_table_content( |
|
|
df, download_df, chip_color, bar_color="#AC482A", filename="data" |
|
|
): |
|
|
return html.Div( |
|
|
[ |
|
|
|
|
|
df_to_download_link(download_df, filename), |
|
|
|
|
|
html.Div( |
|
|
|
|
|
html.Table( |
|
|
[ |
|
|
html.Thead( |
|
|
html.Tr( |
|
|
[ |
|
|
html.Th( |
|
|
"Rank", |
|
|
className="rank-col", |
|
|
style={ |
|
|
"backgroundColor": "#F0F0F0", |
|
|
"textAlign": "left", |
|
|
}, |
|
|
), |
|
|
html.Th( |
|
|
"Name", |
|
|
className="name-col", |
|
|
style={ |
|
|
"backgroundColor": "#F0F0F0", |
|
|
"textAlign": "left", |
|
|
}, |
|
|
), |
|
|
html.Th( |
|
|
"Metadata", |
|
|
className="metadata-col", |
|
|
style={ |
|
|
"backgroundColor": "#F0F0F0", |
|
|
"textAlign": "left", |
|
|
"marginRight": "10px", |
|
|
}, |
|
|
), |
|
|
html.Th( |
|
|
"% of Total", |
|
|
className="percent-col", |
|
|
style={ |
|
|
"backgroundColor": "#F0F0F0", |
|
|
"textAlign": "left", |
|
|
}, |
|
|
), |
|
|
] |
|
|
) |
|
|
), |
|
|
html.Tbody( |
|
|
[ |
|
|
html.Tr( |
|
|
[ |
|
|
html.Td(idx + 1, style={"textAlign": "center"}), |
|
|
html.Td(row["Name"], className="name-cell", style={"textAlign": "left"}), |
|
|
html.Td(render_chips(row["Metadata"], chip_color), className="metadata-cell", style={"textAlign": "left", "whiteSpace": "normal", "wordBreak": "break-word"}), |
|
|
html.Td( |
|
|
progress_bar(row["% of total"], bar_color), |
|
|
className="percent-cell", |
|
|
style={"textAlign": "center", "minWidth": "180px", "padding": "8px"}, |
|
|
), |
|
|
] |
|
|
) |
|
|
for idx, row in df.iterrows() |
|
|
] |
|
|
), |
|
|
], |
|
|
|
|
|
style={"borderCollapse": "collapse", "width": "100%", "minWidth": "980px", "tableLayout": "auto"}, |
|
|
className="leaderboard-table", |
|
|
), |
|
|
className="leaderboard-scroll-wrapper", |
|
|
style={"overflowX": "auto", "-webkit-overflow-scrolling": "touch", "width": "100%"}, |
|
|
), |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggle=True): |
|
|
""" |
|
|
Get top N entries for a leaderboard |
|
|
|
|
|
Args: |
|
|
filtered_df: Pandas DataFrame (already filtered by time from DuckDB query) |
|
|
group_col: Column to group by |
|
|
top_n: Number of top entries to return |
|
|
derived_author_toggle: If True, attribute to model uploader (derived_author); if False, attribute to original model creator (author) |
|
|
|
|
|
Returns: |
|
|
tuple: (display_df, download_df) |
|
|
""" |
|
|
|
|
|
|
|
|
top = ( |
|
|
filtered_df.groupby(group_col)[["total_downloads", "percent_of_total"]] |
|
|
.sum() |
|
|
.nlargest(top_n, columns="total_downloads") |
|
|
.reset_index() |
|
|
.rename( |
|
|
columns={ |
|
|
group_col: "Name", |
|
|
"total_downloads": "Total Value", |
|
|
"percent_of_total": "% of total", |
|
|
} |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
download_top = top.copy() |
|
|
download_top["Total Value"] = download_top["Total Value"].astype(int) |
|
|
download_top["% of total"] = download_top["% of total"].round(2) |
|
|
|
|
|
|
|
|
meta_cols = meta_cols_map.get(group_col, []) |
|
|
|
|
|
|
|
|
meta_map = {} |
|
|
download_map = {} |
|
|
|
|
|
for name in top["Name"]: |
|
|
name_data = filtered_df[filtered_df[group_col] == name] |
|
|
meta_map[name] = {} |
|
|
download_map[name] = {} |
|
|
for col in meta_cols: |
|
|
if col in name_data.columns: |
|
|
unique_vals = name_data[col].unique() |
|
|
meta_map[name][col] = list(unique_vals) |
|
|
download_map[name][col] = list(unique_vals) |
|
|
|
|
|
|
|
|
def build_metadata(nm): |
|
|
meta = meta_map.get(nm, {}) |
|
|
chips = [] |
|
|
|
|
|
|
|
|
for c in meta.get("org_country_single", []): |
|
|
if c == "United States of America": |
|
|
c = "USA" |
|
|
if c == "user": |
|
|
c = "User" |
|
|
try: |
|
|
flag_emoji = countryflag.getflag(c) |
|
|
if not flag_emoji or flag_emoji == c: |
|
|
flag_emoji = country_emoji_fallback.get(c, "π") |
|
|
except Exception: |
|
|
flag_emoji = country_emoji_fallback.get(c, "π") |
|
|
chips.append((flag_emoji, c, "country")) |
|
|
|
|
|
|
|
|
|
|
|
author_key = "derived_author" if derived_author_toggle else "author" |
|
|
for a in meta.get(author_key, []): |
|
|
icon = company_icon_map.get(a, "") |
|
|
if icon == "": |
|
|
if meta.get("merged_country_groups_single", ["User"])[0] != "User": |
|
|
icon = "π’" |
|
|
else: |
|
|
icon = "π€" |
|
|
chips.append((icon, a, "author")) |
|
|
|
|
|
|
|
|
for m in meta.get("merged_modality", []): |
|
|
if pd.notna(m): |
|
|
chips.append(("", m, "modality")) |
|
|
|
|
|
|
|
|
for d in meta.get("total_downloads", []): |
|
|
formatted_downloads = format_large_number(d) |
|
|
chips.append(("β¬οΈ", formatted_downloads, "downloads")) |
|
|
|
|
|
return chips |
|
|
|
|
|
|
|
|
def build_download_metadata(nm): |
|
|
meta = download_map.get(nm, {}) |
|
|
download_info = {} |
|
|
|
|
|
for col in meta_cols: |
|
|
if col not in meta or not meta[col]: |
|
|
continue |
|
|
|
|
|
vals = meta.get(col, []) |
|
|
if vals: |
|
|
download_info[col] = ", ".join(str(v) for v in vals if pd.notna(v)) |
|
|
else: |
|
|
download_info[col] = "" |
|
|
|
|
|
return download_info |
|
|
|
|
|
|
|
|
top["Metadata"] = top["Name"].astype(object).apply(build_metadata) |
|
|
|
|
|
|
|
|
top["Name"] = top["Name"].replace("user", "User") |
|
|
|
|
|
|
|
|
download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]] |
|
|
download_info_df = pd.DataFrame(download_info_list) |
|
|
download_top = pd.concat([download_top, download_info_df], axis=1) |
|
|
|
|
|
return top[["Name", "Metadata", "% of total"]], download_top |
|
|
|
|
|
|
|
|
def get_top_n_from_duckdb( |
|
|
con, group_col, top_n=10, time_filter=None, view="all_downloads" |
|
|
): |
|
|
""" |
|
|
Query DuckDB directly to get top N entries with minimal data transfer |
|
|
|
|
|
Args: |
|
|
con: DuckDB connection object |
|
|
group_col: Column to group by |
|
|
top_n: Number of top entries |
|
|
time_filter: Optional tuple of (start_timestamp, end_timestamp) |
|
|
|
|
|
Returns: |
|
|
Pandas DataFrame with only the rows needed for top N |
|
|
""" |
|
|
|
|
|
time_clause = "" |
|
|
if time_filter: |
|
|
start = pd.to_datetime(time_filter[0], unit="s") |
|
|
end = pd.to_datetime(time_filter[1], unit="s") |
|
|
time_clause = f"WHERE time >= '{start}' AND time <= '{end}'" |
|
|
|
|
|
|
|
|
if group_col == "org_country_single": |
|
|
group_expr = """CASE |
|
|
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America' |
|
|
WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online' |
|
|
ELSE org_country_single |
|
|
END""" |
|
|
else: |
|
|
group_expr = group_col |
|
|
|
|
|
|
|
|
if group_col == "derived_author": |
|
|
query = f""" |
|
|
WITH base_data AS ( |
|
|
SELECT |
|
|
{group_expr} AS group_key, |
|
|
CASE |
|
|
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America' |
|
|
WHEN org_country_single IN ('International', 'Online') THEN 'International/Online' |
|
|
ELSE org_country_single |
|
|
END AS org_country_single, |
|
|
author, |
|
|
derived_author, |
|
|
merged_country_groups_single, |
|
|
merged_modality, |
|
|
downloads, |
|
|
model |
|
|
FROM {view} |
|
|
{time_clause} |
|
|
), |
|
|
|
|
|
-- Create a lookup table for derived_author -> country |
|
|
author_country_lookup AS ( |
|
|
SELECT DISTINCT |
|
|
author, |
|
|
FIRST_VALUE(org_country_single) OVER (PARTITION BY author ORDER BY downloads DESC) AS author_country |
|
|
FROM base_data |
|
|
WHERE author IS NOT NULL |
|
|
), |
|
|
|
|
|
total_downloads_cte AS ( |
|
|
SELECT SUM(downloads) AS total_downloads_all |
|
|
FROM base_data |
|
|
), |
|
|
|
|
|
top_items AS ( |
|
|
SELECT |
|
|
b.group_key AS name, |
|
|
SUM(b.downloads) AS total_downloads, |
|
|
ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total, |
|
|
COALESCE(acl.author_country, ANY_VALUE(b.org_country_single)) AS org_country_single, |
|
|
ANY_VALUE(b.author) AS author, |
|
|
ANY_VALUE(b.derived_author) AS derived_author, |
|
|
ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single, |
|
|
ANY_VALUE(b.merged_modality) AS merged_modality, |
|
|
ANY_VALUE(b.model) AS model |
|
|
FROM base_data b |
|
|
CROSS JOIN total_downloads_cte t |
|
|
LEFT JOIN author_country_lookup acl ON b.group_key = acl.author |
|
|
GROUP BY b.group_key, acl.author_country, t.total_downloads_all |
|
|
) |
|
|
|
|
|
SELECT * |
|
|
FROM top_items |
|
|
ORDER BY total_downloads DESC |
|
|
LIMIT {top_n}; |
|
|
""" |
|
|
else: |
|
|
query = f""" |
|
|
WITH base_data AS ( |
|
|
SELECT |
|
|
{group_expr} AS group_key, |
|
|
CASE |
|
|
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America' |
|
|
WHEN org_country_single IN ('International', 'Online') THEN 'International/Online' |
|
|
ELSE org_country_single |
|
|
END AS org_country_single, |
|
|
author, |
|
|
derived_author, |
|
|
merged_country_groups_single, |
|
|
merged_modality, |
|
|
downloads, |
|
|
model |
|
|
FROM {view} |
|
|
{time_clause} |
|
|
), |
|
|
|
|
|
total_downloads_cte AS ( |
|
|
SELECT SUM(downloads) AS total_downloads_all |
|
|
FROM base_data |
|
|
), |
|
|
|
|
|
top_items AS ( |
|
|
SELECT |
|
|
b.group_key AS name, |
|
|
SUM(b.downloads) AS total_downloads, |
|
|
ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total, |
|
|
ANY_VALUE(b.org_country_single) AS org_country_single, |
|
|
ANY_VALUE(b.author) AS author, |
|
|
ANY_VALUE(b.derived_author) AS derived_author, |
|
|
ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single, |
|
|
ANY_VALUE(b.merged_modality) AS merged_modality, |
|
|
ANY_VALUE(b.model) AS model |
|
|
FROM base_data b |
|
|
CROSS JOIN total_downloads_cte t |
|
|
GROUP BY b.group_key, t.total_downloads_all |
|
|
) |
|
|
|
|
|
SELECT * |
|
|
FROM top_items |
|
|
ORDER BY total_downloads DESC |
|
|
LIMIT {top_n}; |
|
|
""" |
|
|
|
|
|
try: |
|
|
return con.execute(query).fetchdf() |
|
|
except Exception as e: |
|
|
print(f"Error querying DuckDB: {e}") |
|
|
return pd.DataFrame() |
|
|
|
|
|
|
|
|
def format_large_number(n): |
|
|
"""Shorten large numbers, e.g. 5,000,000 -> '5 million'.""" |
|
|
if n >= 1_000_000_000: |
|
|
return f"{n / 1_000_000_000:.1f} billion" |
|
|
elif n >= 1_000_000: |
|
|
return f"{n / 1_000_000:.1f} million" |
|
|
elif n >= 1_000: |
|
|
return f"{n / 1_000:.1f}k" |
|
|
else: |
|
|
return str(int(n)) |
|
|
|