|
|
import pandas as pd |
|
|
from dash import html, dcc |
|
|
from dash_iconify import DashIconify |
|
|
import dash_mantine_components as dmc |
|
|
import base64 |
|
|
import countryflag |
|
|
|
|
|
button_style = { |
|
|
"display": "inline-block", |
|
|
"marginBottom": "10px", |
|
|
"marginRight": "15px", |
|
|
"marginTop": "30px", |
|
|
"padding": "6px 16px", |
|
|
"backgroundColor": "#082030", |
|
|
"color": "white", |
|
|
"borderRadius": "6px", |
|
|
"textDecoration": "none", |
|
|
"fontWeight": "bold", |
|
|
"fontSize": "14px", |
|
|
} |
|
|
|
|
|
company_icon_map = { |
|
|
"google": "../assets/icons/google.png", |
|
|
"distilbert": "../assets/icons/hugging-face.png", |
|
|
"sentence-transformers": "../assets/icons/hugging-face.png", |
|
|
"facebook": "../assets/icons/meta.png", |
|
|
"openai": "../assets/icons/openai.png", |
|
|
} |
|
|
|
|
|
country_emoji_fallback = { |
|
|
"User": "π€", |
|
|
"Organization": "π’", |
|
|
"Model": "π¦", |
|
|
} |
|
|
|
|
|
meta_cols_map = { |
|
|
"org_country_single": ["org_country_single"], |
|
|
"author": ["org_country_single", "author", "merged_country_groups_single"], |
|
|
"derived_author": ["org_country_single", "derived_author", "merged_country_groups_single"], |
|
|
"model": [ |
|
|
"org_country_single", |
|
|
"author", |
|
|
"derived_author", |
|
|
"merged_country_groups_single", |
|
|
"merged_modality", |
|
|
"total_downloads", |
|
|
], |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
def chip(text, bg_color="#F0F0F0"): |
|
|
return html.Span( |
|
|
text, |
|
|
style={ |
|
|
"backgroundColor": bg_color, |
|
|
"padding": "4px 10px", |
|
|
"borderRadius": "12px", |
|
|
"margin": "2px", |
|
|
"display": "inline-flex", |
|
|
"alignItems": "center", |
|
|
"fontSize": "14px", |
|
|
}, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def progress_bar(percent, bar_color="#AC482A"): |
|
|
return html.Div( |
|
|
style={ |
|
|
"position": "relative", |
|
|
"backgroundColor": "#E0E0E0", |
|
|
"borderRadius": "8px", |
|
|
"height": "20px", |
|
|
"width": "100%", |
|
|
"overflow": "hidden", |
|
|
}, |
|
|
children=[ |
|
|
html.Div( |
|
|
style={ |
|
|
"backgroundColor": bar_color, |
|
|
"width": f"{percent}%", |
|
|
"height": "100%", |
|
|
"borderRadius": "8px", |
|
|
"transition": "width 0.5s", |
|
|
} |
|
|
), |
|
|
html.Div( |
|
|
f"{percent:.1f}%", |
|
|
style={ |
|
|
"position": "absolute", |
|
|
"top": 0, |
|
|
"left": "50%", |
|
|
"transform": "translateX(-50%)", |
|
|
"color": "black", |
|
|
"fontWeight": "bold", |
|
|
"fontSize": "12px", |
|
|
"lineHeight": "20px", |
|
|
"textAlign": "center", |
|
|
}, |
|
|
), |
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def df_to_download_link(df, filename): |
|
|
csv_string = df.to_csv(index=False) |
|
|
b64 = base64.b64encode(csv_string.encode()).decode() |
|
|
return html.Div( |
|
|
html.A( |
|
|
children=dmc.ActionIcon( |
|
|
DashIconify(icon="mdi:download", width=24), |
|
|
size="lg", |
|
|
color="#082030", |
|
|
), |
|
|
id=f"download-{filename}", |
|
|
download=f"{filename}.csv", |
|
|
href=f"data:text/csv;base64,{b64}", |
|
|
target="_blank", |
|
|
title="Download CSV", |
|
|
style={ |
|
|
"padding": "6px 12px", |
|
|
"display": "inline-flex", |
|
|
"alignItems": "center", |
|
|
"justifyContent": "center", |
|
|
}, |
|
|
), |
|
|
style={"textAlign": "right"}, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def get_metadata_popover_content(icon, name, meta_type): |
|
|
popover_texts = { |
|
|
"country": f"Country: {name}", |
|
|
"author": f"Author/Organization: {name}", |
|
|
"downloads": f"Total downloads: {name}", |
|
|
"modality": f"Modality: {name}", |
|
|
"parameters": f"Estimated parameters: {name}", |
|
|
} |
|
|
return popover_texts.get(meta_type, name) |
|
|
|
|
|
|
|
|
|
|
|
def chip_with_hovercard(text, bg_color="#F0F0F0", meta_type=None, icon=None): |
|
|
hovercard_content = get_metadata_popover_content(icon, text, meta_type) |
|
|
return dmc.HoverCard( |
|
|
width=220, |
|
|
shadow="md", |
|
|
position="top", |
|
|
children=[ |
|
|
dmc.HoverCardTarget( |
|
|
html.Span( |
|
|
text, |
|
|
style={ |
|
|
"backgroundColor": bg_color, |
|
|
"padding": "4px 10px", |
|
|
"borderRadius": "12px", |
|
|
"margin": "2px", |
|
|
"display": "inline-flex", |
|
|
"alignItems": "center", |
|
|
"fontSize": "14px", |
|
|
"cursor": "pointer", |
|
|
}, |
|
|
) |
|
|
), |
|
|
dmc.HoverCardDropdown( |
|
|
dmc.Text(hovercard_content, size="sm") |
|
|
), |
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def render_chips(metadata_list, chip_color): |
|
|
chips = [] |
|
|
for icon, name, meta_type in metadata_list: |
|
|
if isinstance(icon, str) and icon.endswith((".png", ".jpg", ".jpeg", ".svg")): |
|
|
chips.append( |
|
|
dmc.HoverCard( |
|
|
width=220, |
|
|
shadow="md", |
|
|
position="top", |
|
|
children=[ |
|
|
dmc.HoverCardTarget( |
|
|
html.Span( |
|
|
[ |
|
|
html.Img( |
|
|
src=icon, style={"height": "18px", "marginRight": "6px"} |
|
|
), |
|
|
name, |
|
|
], |
|
|
style={ |
|
|
"backgroundColor": chip_color, |
|
|
"padding": "4px 10px", |
|
|
"borderRadius": "12px", |
|
|
"margin": "2px", |
|
|
"display": "inline-flex", |
|
|
"alignItems": "left", |
|
|
"fontSize": "14px", |
|
|
"cursor": "pointer", |
|
|
}, |
|
|
) |
|
|
), |
|
|
dmc.HoverCardDropdown( |
|
|
dmc.Text(get_metadata_popover_content(icon, name, meta_type), size="sm") |
|
|
), |
|
|
], |
|
|
) |
|
|
) |
|
|
else: |
|
|
chips.append(chip_with_hovercard(f"{icon} {name}", chip_color, meta_type, icon)) |
|
|
return html.Div( |
|
|
chips, style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"} |
|
|
) |
|
|
|
|
|
|
|
|
def render_table_content( |
|
|
df, download_df, chip_color, bar_color="#AC482A", filename="data" |
|
|
): |
|
|
return html.Div( |
|
|
[ |
|
|
|
|
|
df_to_download_link(download_df, filename), |
|
|
html.Table( |
|
|
[ |
|
|
html.Thead( |
|
|
html.Tr( |
|
|
[ |
|
|
html.Th( |
|
|
"Rank", |
|
|
style={ |
|
|
"backgroundColor": "#F0F0F0", |
|
|
"textAlign": "left", |
|
|
}, |
|
|
), |
|
|
html.Th( |
|
|
"Name", |
|
|
style={ |
|
|
"backgroundColor": "#F0F0F0", |
|
|
"textAlign": "left", |
|
|
}, |
|
|
), |
|
|
html.Th( |
|
|
"Metadata", |
|
|
style={ |
|
|
"backgroundColor": "#F0F0F0", |
|
|
"textAlign": "left", |
|
|
"marginRight": "10px", |
|
|
}, |
|
|
), |
|
|
html.Th( |
|
|
"% of Total", |
|
|
style={ |
|
|
"backgroundColor": "#F0F0F0", |
|
|
"textAlign": "left", |
|
|
}, |
|
|
), |
|
|
] |
|
|
) |
|
|
), |
|
|
html.Tbody( |
|
|
[ |
|
|
html.Tr( |
|
|
[ |
|
|
html.Td(idx + 1, style={"textAlign": "center"}), |
|
|
html.Td(row["Name"], style={"textAlign": "left"}), |
|
|
html.Td(render_chips(row["Metadata"], chip_color)), |
|
|
html.Td( |
|
|
progress_bar(row["% of total"], bar_color), |
|
|
style={"textAlign": "center"}, |
|
|
), |
|
|
] |
|
|
) |
|
|
for idx, row in df.iterrows() |
|
|
] |
|
|
), |
|
|
], |
|
|
style={"borderCollapse": "collapse", "width": "100%"}, |
|
|
), |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggle=True): |
|
|
""" |
|
|
Get top N entries for a leaderboard |
|
|
|
|
|
Args: |
|
|
filtered_df: Pandas DataFrame (already filtered by time from DuckDB query) |
|
|
group_col: Column to group by |
|
|
top_n: Number of top entries to return |
|
|
derived_author_toggle: Whether to use derived_author or author column |
|
|
|
|
|
Returns: |
|
|
tuple: (display_df, download_df) |
|
|
""" |
|
|
|
|
|
|
|
|
top = ( |
|
|
filtered_df.groupby(group_col)[["total_downloads", "percent_of_total"]] |
|
|
.sum() |
|
|
.nlargest(top_n, columns="total_downloads") |
|
|
.reset_index() |
|
|
.rename(columns={group_col: "Name", "total_downloads": "Total Value", "percent_of_total": "% of total"}) |
|
|
) |
|
|
|
|
|
|
|
|
download_top = top.copy() |
|
|
download_top["Total Value"] = download_top["Total Value"].astype(int) |
|
|
download_top["% of total"] = download_top["% of total"].round(2) |
|
|
|
|
|
|
|
|
top["Name"] = top["Name"].replace("User", "user") |
|
|
|
|
|
|
|
|
meta_cols = meta_cols_map.get(group_col, []) |
|
|
|
|
|
|
|
|
meta_map = {} |
|
|
download_map = {} |
|
|
|
|
|
for name in top["Name"]: |
|
|
name_data = filtered_df[filtered_df[group_col] == name] |
|
|
meta_map[name] = {} |
|
|
download_map[name] = {} |
|
|
for col in meta_cols: |
|
|
if col in name_data.columns: |
|
|
unique_vals = name_data[col].unique() |
|
|
meta_map[name][col] = list(unique_vals) |
|
|
download_map[name][col] = list(unique_vals) |
|
|
|
|
|
|
|
|
def build_metadata(nm): |
|
|
meta = meta_map.get(nm, {}) |
|
|
chips = [] |
|
|
|
|
|
|
|
|
for c in meta.get("org_country_single", []): |
|
|
if c == "United States of America": |
|
|
c = "USA" |
|
|
if c == "user": |
|
|
c = "User" |
|
|
|
|
|
try: |
|
|
flag_emoji = countryflag.getflag(c) |
|
|
|
|
|
if not flag_emoji or flag_emoji == c: |
|
|
flag_emoji = country_emoji_fallback.get(c, "π") |
|
|
except Exception: |
|
|
flag_emoji = country_emoji_fallback.get(c, "π") |
|
|
chips.append((flag_emoji, c, "country")) |
|
|
|
|
|
|
|
|
author_key = "derived_author" if derived_author_toggle else "author" |
|
|
for a in meta.get(author_key, []): |
|
|
icon = company_icon_map.get(a, "") |
|
|
if icon == "": |
|
|
if meta.get("merged_country_groups_single", ["User"])[0] != "User": |
|
|
icon = "π’" |
|
|
else: |
|
|
icon = "π€" |
|
|
chips.append((icon, a, "author")) |
|
|
|
|
|
|
|
|
total_downloads = sum( |
|
|
d for d in meta.get("total_downloads", []) if pd.notna(d) |
|
|
) |
|
|
if total_downloads: |
|
|
chips.append(("β¬οΈ", f"{int(total_downloads):,}", "downloads")) |
|
|
|
|
|
|
|
|
for m in meta.get("merged_modality", []): |
|
|
if pd.notna(m): |
|
|
chips.append(("", m, "modality")) |
|
|
|
|
|
return chips |
|
|
|
|
|
|
|
|
def build_download_metadata(nm): |
|
|
meta = download_map.get(nm, {}) |
|
|
download_info = {} |
|
|
|
|
|
for col in meta_cols: |
|
|
if col not in meta or not meta[col]: |
|
|
continue |
|
|
|
|
|
vals = meta.get(col, []) |
|
|
if vals: |
|
|
download_info[col] = ", ".join(str(v) for v in vals if pd.notna(v)) |
|
|
else: |
|
|
download_info[col] = "" |
|
|
|
|
|
return download_info |
|
|
|
|
|
|
|
|
top["Metadata"] = top["Name"].astype(object).apply(build_metadata) |
|
|
|
|
|
|
|
|
download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]] |
|
|
download_info_df = pd.DataFrame(download_info_list) |
|
|
download_top = pd.concat([download_top, download_info_df], axis=1) |
|
|
|
|
|
return top[["Name", "Metadata", "% of total"]], download_top |
|
|
|
|
|
|
|
|
def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None, view="all_downloads"): |
|
|
""" |
|
|
Query DuckDB directly to get top N entries with minimal data transfer |
|
|
|
|
|
Args: |
|
|
con: DuckDB connection object |
|
|
group_col: Column to group by |
|
|
top_n: Number of top entries |
|
|
time_filter: Optional tuple of (start_timestamp, end_timestamp) |
|
|
|
|
|
Returns: |
|
|
Pandas DataFrame with only the rows needed for top N |
|
|
""" |
|
|
|
|
|
time_clause = "" |
|
|
if time_filter: |
|
|
start = pd.to_datetime(time_filter[0], unit="s") |
|
|
end = pd.to_datetime(time_filter[1], unit="s") |
|
|
time_clause = f"WHERE time >= '{start}' AND time <= '{end}'" |
|
|
|
|
|
|
|
|
query = f""" |
|
|
WITH base_data AS ( |
|
|
SELECT |
|
|
{group_col}, |
|
|
CASE |
|
|
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America' |
|
|
WHEN org_country_single IN ('International', 'Online') THEN 'International/Online' |
|
|
ELSE org_country_single |
|
|
END AS org_country_single, |
|
|
author, |
|
|
derived_author, |
|
|
merged_country_groups_single, |
|
|
merged_modality, |
|
|
downloads, |
|
|
model |
|
|
FROM {view} |
|
|
{time_clause} |
|
|
), |
|
|
|
|
|
-- Compute the total downloads for all rows in the time range |
|
|
total_downloads_cte AS ( |
|
|
SELECT SUM(downloads) AS total_downloads_all |
|
|
FROM base_data |
|
|
), |
|
|
|
|
|
-- Compute per-group totals and their percentage of all downloads |
|
|
top_items AS ( |
|
|
SELECT |
|
|
b.{group_col} AS name, |
|
|
SUM(b.downloads) AS total_downloads, |
|
|
ROUND(SUM(b.downloads) * 100.0 / t.total_downloads_all, 2) AS percent_of_total, |
|
|
-- Pick first non-null metadata values for reference |
|
|
ANY_VALUE(b.org_country_single) AS org_country_single, |
|
|
ANY_VALUE(b.author) AS author, |
|
|
ANY_VALUE(b.derived_author) AS derived_author, |
|
|
ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single, |
|
|
ANY_VALUE(b.merged_modality) AS merged_modality, |
|
|
ANY_VALUE(b.model) AS model |
|
|
FROM base_data b |
|
|
CROSS JOIN total_downloads_cte t |
|
|
GROUP BY b.{group_col}, t.total_downloads_all |
|
|
) |
|
|
|
|
|
SELECT * |
|
|
FROM top_items |
|
|
ORDER BY total_downloads DESC |
|
|
LIMIT {top_n}; |
|
|
""" |
|
|
|
|
|
try: |
|
|
return con.execute(query).fetchdf() |
|
|
except Exception as e: |
|
|
print(f"Error querying DuckDB: {e}") |
|
|
return pd.DataFrame() |