Add derived author and text
Browse files- app.py +43 -26
- graphs/leaderboard.py +11 -7
app.py
CHANGED
|
@@ -174,6 +174,7 @@ app.layout = dmc.MantineProvider(
|
|
| 174 |
},
|
| 175 |
children=[
|
| 176 |
dcc.Store(id="selected-view", data="all_downloads"),
|
|
|
|
| 177 |
html.Div(
|
| 178 |
[
|
| 179 |
# Header
|
|
@@ -184,7 +185,7 @@ app.layout = dmc.MantineProvider(
|
|
| 184 |
html.Div(
|
| 185 |
[
|
| 186 |
html.Div(
|
| 187 |
-
children="
|
| 188 |
style={
|
| 189 |
"fontSize": 22,
|
| 190 |
"fontWeight": "700",
|
|
@@ -192,7 +193,7 @@ app.layout = dmc.MantineProvider(
|
|
| 192 |
},
|
| 193 |
),
|
| 194 |
html.Div(
|
| 195 |
-
children="
|
| 196 |
style={
|
| 197 |
"fontSize": 13,
|
| 198 |
"marginTop": 6,
|
|
@@ -309,7 +310,7 @@ app.layout = dmc.MantineProvider(
|
|
| 309 |
# Intro / description below header (kept but styled to match layout)
|
| 310 |
# Title
|
| 311 |
html.Div(
|
| 312 |
-
children="Model Leaderboard",
|
| 313 |
style={
|
| 314 |
"fontSize": 40,
|
| 315 |
"fontWeight": "700",
|
|
@@ -319,7 +320,7 @@ app.layout = dmc.MantineProvider(
|
|
| 319 |
},
|
| 320 |
),
|
| 321 |
html.Div(
|
| 322 |
-
children="
|
| 323 |
style={
|
| 324 |
"fontSize": 14,
|
| 325 |
"marginTop": 18,
|
|
@@ -335,7 +336,7 @@ app.layout = dmc.MantineProvider(
|
|
| 335 |
html.Div(
|
| 336 |
[
|
| 337 |
html.Div(
|
| 338 |
-
"Select
|
| 339 |
style={
|
| 340 |
"fontWeight": "700",
|
| 341 |
"marginBottom": 8,
|
|
@@ -360,7 +361,7 @@ app.layout = dmc.MantineProvider(
|
|
| 360 |
mb=10,
|
| 361 |
),
|
| 362 |
html.Div(
|
| 363 |
-
"Choose whether to view all downloads or only
|
| 364 |
style={
|
| 365 |
"fontSize": 13,
|
| 366 |
"color": "#555",
|
|
@@ -371,7 +372,7 @@ app.layout = dmc.MantineProvider(
|
|
| 371 |
html.Div(
|
| 372 |
[
|
| 373 |
html.Div(
|
| 374 |
-
"Select
|
| 375 |
style={
|
| 376 |
"fontWeight": "700",
|
| 377 |
"marginBottom": 8,
|
|
@@ -379,13 +380,14 @@ app.layout = dmc.MantineProvider(
|
|
| 379 |
},
|
| 380 |
),
|
| 381 |
dmc.Switch(
|
|
|
|
| 382 |
color="#AC482A",
|
| 383 |
label="Derived Authors",
|
| 384 |
checked=True,
|
| 385 |
mb=10,
|
| 386 |
),
|
| 387 |
html.Div(
|
| 388 |
-
"
|
| 389 |
style={
|
| 390 |
"fontSize": 13,
|
| 391 |
"color": "#555",
|
|
@@ -418,7 +420,7 @@ app.layout = dmc.MantineProvider(
|
|
| 418 |
),
|
| 419 |
time_slider,
|
| 420 |
html.Div(
|
| 421 |
-
"Adjust the time range to filter leaderboard results by model
|
| 422 |
style={
|
| 423 |
"fontSize": 13,
|
| 424 |
"color": "#555",
|
|
@@ -453,12 +455,10 @@ app.layout = dmc.MantineProvider(
|
|
| 453 |
html.Span("All Downloads", style={"fontWeight": "600", "color": "#AC482A"}),
|
| 454 |
" and ",
|
| 455 |
html.Span("Filtered Downloads", style={"fontWeight": "600", "color": "#AC482A"}),
|
| 456 |
-
" to compare
|
| 457 |
-
"You can also toggle
|
| 458 |
-
html.Span("
|
| 459 |
-
"
|
| 460 |
-
html.Span("Relative", style={"fontWeight": "600", "color": "#AC482A"}),
|
| 461 |
-
" to see raw counts or percentages."
|
| 462 |
],
|
| 463 |
style={
|
| 464 |
"fontSize": 13,
|
|
@@ -672,12 +672,12 @@ def _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n, view="all_do
|
|
| 672 |
SELECT
|
| 673 |
{group_col},
|
| 674 |
CASE
|
| 675 |
-
WHEN org_country_single
|
| 676 |
-
WHEN org_country_single
|
| 677 |
-
WHEN org_country_single = 'Online' THEN 'International/Online'
|
| 678 |
ELSE org_country_single
|
| 679 |
END AS org_country_single,
|
| 680 |
author,
|
|
|
|
| 681 |
merged_country_groups_single,
|
| 682 |
merged_modality,
|
| 683 |
downloads,
|
|
@@ -701,6 +701,7 @@ def _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n, view="all_do
|
|
| 701 |
-- Pick first non-null metadata values for reference
|
| 702 |
ANY_VALUE(b.org_country_single) AS org_country_single,
|
| 703 |
ANY_VALUE(b.author) AS author,
|
|
|
|
| 704 |
ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
|
| 705 |
ANY_VALUE(b.merged_modality) AS merged_modality,
|
| 706 |
ANY_VALUE(b.model) AS model
|
|
@@ -727,6 +728,7 @@ def _leaderboard_callback_logic(
|
|
| 727 |
default_label="▼ Show Top 50",
|
| 728 |
chip_color="#F0F9FF",
|
| 729 |
view="all_downloads",
|
|
|
|
| 730 |
):
|
| 731 |
# Normalize label on first load
|
| 732 |
if current_label is None:
|
|
@@ -746,8 +748,8 @@ def _leaderboard_callback_logic(
|
|
| 746 |
# Get filtered and aggregated data directly from DuckDB
|
| 747 |
df_filtered = _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n, view=view)
|
| 748 |
|
| 749 |
-
# Process the already-filtered data
|
| 750 |
-
df, download_df = get_top_n_leaderboard(df_filtered, group_col, top_n)
|
| 751 |
return render_table_content(
|
| 752 |
df, download_df, chip_color=chip_color, filename=filename
|
| 753 |
), new_label
|
|
@@ -756,6 +758,15 @@ def _leaderboard_callback_logic(
|
|
| 756 |
# -- end helpers --
|
| 757 |
|
| 758 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 759 |
# Callbacks for interactivity (modularized)
|
| 760 |
@app.callback(
|
| 761 |
Output("top_countries-table", "children"),
|
|
@@ -763,9 +774,10 @@ def _leaderboard_callback_logic(
|
|
| 763 |
Input("top_countries-toggle", "n_clicks"),
|
| 764 |
Input("time-slider", "value"),
|
| 765 |
Input("selected-view", "data"),
|
|
|
|
| 766 |
State("top_countries-toggle", "children"),
|
| 767 |
)
|
| 768 |
-
def update_top_countries(n_clicks, slider_value, selected_view, current_label):
|
| 769 |
return _leaderboard_callback_logic(
|
| 770 |
n_clicks,
|
| 771 |
slider_value,
|
|
@@ -775,6 +787,7 @@ def update_top_countries(n_clicks, slider_value, selected_view, current_label):
|
|
| 775 |
default_label="▼ Show Top 50",
|
| 776 |
chip_color="#F0F9FF",
|
| 777 |
view=selected_view,
|
|
|
|
| 778 |
)
|
| 779 |
|
| 780 |
|
|
@@ -784,18 +797,22 @@ def update_top_countries(n_clicks, slider_value, selected_view, current_label):
|
|
| 784 |
Input("top_developers-toggle", "n_clicks"),
|
| 785 |
Input("time-slider", "value"),
|
| 786 |
Input("selected-view", "data"),
|
|
|
|
| 787 |
State("top_developers-toggle", "children"),
|
| 788 |
)
|
| 789 |
-
def update_top_developers(n_clicks, slider_value, selected_view, current_label):
|
|
|
|
|
|
|
| 790 |
return _leaderboard_callback_logic(
|
| 791 |
n_clicks,
|
| 792 |
slider_value,
|
| 793 |
current_label,
|
| 794 |
-
group_col=
|
| 795 |
filename="top_developers",
|
| 796 |
default_label="▼ Show Top 50",
|
| 797 |
chip_color="#F0F9FF",
|
| 798 |
view=selected_view,
|
|
|
|
| 799 |
)
|
| 800 |
|
| 801 |
|
|
@@ -805,9 +822,10 @@ def update_top_developers(n_clicks, slider_value, selected_view, current_label):
|
|
| 805 |
Input("top_models-toggle", "n_clicks"),
|
| 806 |
Input("time-slider", "value"),
|
| 807 |
Input("selected-view", "data"),
|
|
|
|
| 808 |
State("top_models-toggle", "children"),
|
| 809 |
)
|
| 810 |
-
def update_top_models(n_clicks, slider_value, selected_view, current_label):
|
| 811 |
return _leaderboard_callback_logic(
|
| 812 |
n_clicks,
|
| 813 |
slider_value,
|
|
@@ -817,6 +835,7 @@ def update_top_models(n_clicks, slider_value, selected_view, current_label):
|
|
| 817 |
default_label="▼ Show More",
|
| 818 |
chip_color="#F0F9FF",
|
| 819 |
view=selected_view,
|
|
|
|
| 820 |
)
|
| 821 |
|
| 822 |
|
|
@@ -840,5 +859,3 @@ def update_selected_view(seg_value):
|
|
| 840 |
# Run the app
|
| 841 |
if __name__ == "__main__":
|
| 842 |
app.run(debug=True)
|
| 843 |
-
if __name__ == "__main__":
|
| 844 |
-
app.run(debug=True)
|
|
|
|
| 174 |
},
|
| 175 |
children=[
|
| 176 |
dcc.Store(id="selected-view", data="all_downloads"),
|
| 177 |
+
dcc.Store(id="derived-author-toggle", data=True), # Store for toggle state
|
| 178 |
html.Div(
|
| 179 |
[
|
| 180 |
# Header
|
|
|
|
| 185 |
html.Div(
|
| 186 |
[
|
| 187 |
html.Div(
|
| 188 |
+
children="Economies of Open Intelligence",
|
| 189 |
style={
|
| 190 |
"fontSize": 22,
|
| 191 |
"fontWeight": "700",
|
|
|
|
| 193 |
},
|
| 194 |
),
|
| 195 |
html.Div(
|
| 196 |
+
children="Tracing Power & Participation in the Model Ecosystem",
|
| 197 |
style={
|
| 198 |
"fontSize": 13,
|
| 199 |
"marginTop": 6,
|
|
|
|
| 310 |
# Intro / description below header (kept but styled to match layout)
|
| 311 |
# Title
|
| 312 |
html.Div(
|
| 313 |
+
children="The Open Model Leaderboard",
|
| 314 |
style={
|
| 315 |
"fontSize": 40,
|
| 316 |
"fontWeight": "700",
|
|
|
|
| 320 |
},
|
| 321 |
),
|
| 322 |
html.Div(
|
| 323 |
+
children="This leaderboard assesses concentrations of power in the open model ecosystem across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
|
| 324 |
style={
|
| 325 |
"fontSize": 14,
|
| 326 |
"marginTop": 18,
|
|
|
|
| 336 |
html.Div(
|
| 337 |
[
|
| 338 |
html.Div(
|
| 339 |
+
"Select Download View",
|
| 340 |
style={
|
| 341 |
"fontWeight": "700",
|
| 342 |
"marginBottom": 8,
|
|
|
|
| 361 |
mb=10,
|
| 362 |
),
|
| 363 |
html.Div(
|
| 364 |
+
"Choose whether to view all downloads or only those within one year of the model's creation date.",
|
| 365 |
style={
|
| 366 |
"fontSize": 13,
|
| 367 |
"color": "#555",
|
|
|
|
| 372 |
html.Div(
|
| 373 |
[
|
| 374 |
html.Div(
|
| 375 |
+
"Select Author Type",
|
| 376 |
style={
|
| 377 |
"fontWeight": "700",
|
| 378 |
"marginBottom": 8,
|
|
|
|
| 380 |
},
|
| 381 |
),
|
| 382 |
dmc.Switch(
|
| 383 |
+
id="derived-author-switch", # <-- add id
|
| 384 |
color="#AC482A",
|
| 385 |
label="Derived Authors",
|
| 386 |
checked=True,
|
| 387 |
mb=10,
|
| 388 |
),
|
| 389 |
html.Div(
|
| 390 |
+
"Toggle between viewing downloads by original authors or derived authors (those who forked or adapted models).",
|
| 391 |
style={
|
| 392 |
"fontSize": 13,
|
| 393 |
"color": "#555",
|
|
|
|
| 420 |
),
|
| 421 |
time_slider,
|
| 422 |
html.Div(
|
| 423 |
+
"Adjust the time range to filter leaderboard results by model download times.",
|
| 424 |
style={
|
| 425 |
"fontSize": 13,
|
| 426 |
"color": "#555",
|
|
|
|
| 455 |
html.Span("All Downloads", style={"fontWeight": "600", "color": "#AC482A"}),
|
| 456 |
" and ",
|
| 457 |
html.Span("Filtered Downloads", style={"fontWeight": "600", "color": "#AC482A"}),
|
| 458 |
+
" to compare overall popularity versus early interest after model release. ",
|
| 459 |
+
"You can also toggle ON ",
|
| 460 |
+
html.Span("Derived Authors", style={"fontWeight": "600", "color": "#AC482A"}),
|
| 461 |
+
" to see how derivative works contribute to developer influence.",
|
|
|
|
|
|
|
| 462 |
],
|
| 463 |
style={
|
| 464 |
"fontSize": 13,
|
|
|
|
| 672 |
SELECT
|
| 673 |
{group_col},
|
| 674 |
CASE
|
| 675 |
+
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
|
| 676 |
+
WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
|
|
|
|
| 677 |
ELSE org_country_single
|
| 678 |
END AS org_country_single,
|
| 679 |
author,
|
| 680 |
+
derived_author,
|
| 681 |
merged_country_groups_single,
|
| 682 |
merged_modality,
|
| 683 |
downloads,
|
|
|
|
| 701 |
-- Pick first non-null metadata values for reference
|
| 702 |
ANY_VALUE(b.org_country_single) AS org_country_single,
|
| 703 |
ANY_VALUE(b.author) AS author,
|
| 704 |
+
ANY_VALUE(b.derived_author) AS derived_author,
|
| 705 |
ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
|
| 706 |
ANY_VALUE(b.merged_modality) AS merged_modality,
|
| 707 |
ANY_VALUE(b.model) AS model
|
|
|
|
| 728 |
default_label="▼ Show Top 50",
|
| 729 |
chip_color="#F0F9FF",
|
| 730 |
view="all_downloads",
|
| 731 |
+
derived_author_toggle=True,
|
| 732 |
):
|
| 733 |
# Normalize label on first load
|
| 734 |
if current_label is None:
|
|
|
|
| 748 |
# Get filtered and aggregated data directly from DuckDB
|
| 749 |
df_filtered = _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n, view=view)
|
| 750 |
|
| 751 |
+
# Process the already-filtered data - pass derived_author_toggle
|
| 752 |
+
df, download_df = get_top_n_leaderboard(df_filtered, group_col, top_n, derived_author_toggle=derived_author_toggle)
|
| 753 |
return render_table_content(
|
| 754 |
df, download_df, chip_color=chip_color, filename=filename
|
| 755 |
), new_label
|
|
|
|
| 758 |
# -- end helpers --
|
| 759 |
|
| 760 |
|
| 761 |
+
# --- Callback to store derived author toggle state ---
|
| 762 |
+
@app.callback(
|
| 763 |
+
Output("derived-author-toggle", "data"),
|
| 764 |
+
Input("derived-author-switch", "checked"),
|
| 765 |
+
)
|
| 766 |
+
def update_derived_author_toggle(checked):
|
| 767 |
+
return checked
|
| 768 |
+
|
| 769 |
+
|
| 770 |
# Callbacks for interactivity (modularized)
|
| 771 |
@app.callback(
|
| 772 |
Output("top_countries-table", "children"),
|
|
|
|
| 774 |
Input("top_countries-toggle", "n_clicks"),
|
| 775 |
Input("time-slider", "value"),
|
| 776 |
Input("selected-view", "data"),
|
| 777 |
+
Input("derived-author-toggle", "data"),
|
| 778 |
State("top_countries-toggle", "children"),
|
| 779 |
)
|
| 780 |
+
def update_top_countries(n_clicks, slider_value, selected_view, derived_author_toggle, current_label):
|
| 781 |
return _leaderboard_callback_logic(
|
| 782 |
n_clicks,
|
| 783 |
slider_value,
|
|
|
|
| 787 |
default_label="▼ Show Top 50",
|
| 788 |
chip_color="#F0F9FF",
|
| 789 |
view=selected_view,
|
| 790 |
+
derived_author_toggle=derived_author_toggle,
|
| 791 |
)
|
| 792 |
|
| 793 |
|
|
|
|
| 797 |
Input("top_developers-toggle", "n_clicks"),
|
| 798 |
Input("time-slider", "value"),
|
| 799 |
Input("selected-view", "data"),
|
| 800 |
+
Input("derived-author-toggle", "data"),
|
| 801 |
State("top_developers-toggle", "children"),
|
| 802 |
)
|
| 803 |
+
def update_top_developers(n_clicks, slider_value, selected_view, derived_author_toggle, current_label):
|
| 804 |
+
# Use derived_author if toggle is True, else author
|
| 805 |
+
group_col = "derived_author" if derived_author_toggle else "author"
|
| 806 |
return _leaderboard_callback_logic(
|
| 807 |
n_clicks,
|
| 808 |
slider_value,
|
| 809 |
current_label,
|
| 810 |
+
group_col=group_col,
|
| 811 |
filename="top_developers",
|
| 812 |
default_label="▼ Show Top 50",
|
| 813 |
chip_color="#F0F9FF",
|
| 814 |
view=selected_view,
|
| 815 |
+
derived_author_toggle=derived_author_toggle,
|
| 816 |
)
|
| 817 |
|
| 818 |
|
|
|
|
| 822 |
Input("top_models-toggle", "n_clicks"),
|
| 823 |
Input("time-slider", "value"),
|
| 824 |
Input("selected-view", "data"),
|
| 825 |
+
Input("derived-author-toggle", "data"),
|
| 826 |
State("top_models-toggle", "children"),
|
| 827 |
)
|
| 828 |
+
def update_top_models(n_clicks, slider_value, selected_view, derived_author_toggle, current_label):
|
| 829 |
return _leaderboard_callback_logic(
|
| 830 |
n_clicks,
|
| 831 |
slider_value,
|
|
|
|
| 835 |
default_label="▼ Show More",
|
| 836 |
chip_color="#F0F9FF",
|
| 837 |
view=selected_view,
|
| 838 |
+
derived_author_toggle=derived_author_toggle,
|
| 839 |
)
|
| 840 |
|
| 841 |
|
|
|
|
| 859 |
# Run the app
|
| 860 |
if __name__ == "__main__":
|
| 861 |
app.run(debug=True)
|
|
|
|
|
|
graphs/leaderboard.py
CHANGED
|
@@ -36,9 +36,11 @@ country_emoji_fallback = {
|
|
| 36 |
meta_cols_map = {
|
| 37 |
"org_country_single": ["org_country_single"],
|
| 38 |
"author": ["org_country_single", "author", "merged_country_groups_single"],
|
|
|
|
| 39 |
"model": [
|
| 40 |
"org_country_single",
|
| 41 |
"author",
|
|
|
|
| 42 |
"merged_country_groups_single",
|
| 43 |
"merged_modality",
|
| 44 |
"total_downloads",
|
|
@@ -281,7 +283,7 @@ def render_table_content(
|
|
| 281 |
)
|
| 282 |
|
| 283 |
# Function to get top N leaderboard (now accepts pandas DataFrame from DuckDB query)
|
| 284 |
-
def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
|
| 285 |
"""
|
| 286 |
Get top N entries for a leaderboard
|
| 287 |
|
|
@@ -289,6 +291,7 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
|
|
| 289 |
filtered_df: Pandas DataFrame (already filtered by time from DuckDB query)
|
| 290 |
group_col: Column to group by
|
| 291 |
top_n: Number of top entries to return
|
|
|
|
| 292 |
|
| 293 |
Returns:
|
| 294 |
tuple: (display_df, download_df)
|
|
@@ -322,7 +325,6 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
|
|
| 322 |
name_data = filtered_df[filtered_df[group_col] == name]
|
| 323 |
meta_map[name] = {}
|
| 324 |
download_map[name] = {}
|
| 325 |
-
|
| 326 |
for col in meta_cols:
|
| 327 |
if col in name_data.columns:
|
| 328 |
unique_vals = name_data[col].unique()
|
|
@@ -350,8 +352,9 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
|
|
| 350 |
flag_emoji = country_emoji_fallback.get(c, "🌍")
|
| 351 |
chips.append((flag_emoji, c, "country"))
|
| 352 |
|
| 353 |
-
# Author
|
| 354 |
-
|
|
|
|
| 355 |
icon = company_icon_map.get(a, "")
|
| 356 |
if icon == "":
|
| 357 |
if meta.get("merged_country_groups_single", ["User"])[0] != "User":
|
|
@@ -428,12 +431,12 @@ def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None, view="all_
|
|
| 428 |
SELECT
|
| 429 |
{group_col},
|
| 430 |
CASE
|
| 431 |
-
WHEN org_country_single
|
| 432 |
-
WHEN org_country_single
|
| 433 |
-
WHEN org_country_single = 'Online' THEN 'International/Online'
|
| 434 |
ELSE org_country_single
|
| 435 |
END AS org_country_single,
|
| 436 |
author,
|
|
|
|
| 437 |
merged_country_groups_single,
|
| 438 |
merged_modality,
|
| 439 |
downloads,
|
|
@@ -457,6 +460,7 @@ def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None, view="all_
|
|
| 457 |
-- Pick first non-null metadata values for reference
|
| 458 |
ANY_VALUE(b.org_country_single) AS org_country_single,
|
| 459 |
ANY_VALUE(b.author) AS author,
|
|
|
|
| 460 |
ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
|
| 461 |
ANY_VALUE(b.merged_modality) AS merged_modality,
|
| 462 |
ANY_VALUE(b.model) AS model
|
|
|
|
| 36 |
meta_cols_map = {
|
| 37 |
"org_country_single": ["org_country_single"],
|
| 38 |
"author": ["org_country_single", "author", "merged_country_groups_single"],
|
| 39 |
+
"derived_author": ["org_country_single", "derived_author", "merged_country_groups_single"],
|
| 40 |
"model": [
|
| 41 |
"org_country_single",
|
| 42 |
"author",
|
| 43 |
+
"derived_author",
|
| 44 |
"merged_country_groups_single",
|
| 45 |
"merged_modality",
|
| 46 |
"total_downloads",
|
|
|
|
| 283 |
)
|
| 284 |
|
| 285 |
# Function to get top N leaderboard (now accepts pandas DataFrame from DuckDB query)
|
| 286 |
+
def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggle=True):
|
| 287 |
"""
|
| 288 |
Get top N entries for a leaderboard
|
| 289 |
|
|
|
|
| 291 |
filtered_df: Pandas DataFrame (already filtered by time from DuckDB query)
|
| 292 |
group_col: Column to group by
|
| 293 |
top_n: Number of top entries to return
|
| 294 |
+
derived_author_toggle: Whether to use derived_author or author column
|
| 295 |
|
| 296 |
Returns:
|
| 297 |
tuple: (display_df, download_df)
|
|
|
|
| 325 |
name_data = filtered_df[filtered_df[group_col] == name]
|
| 326 |
meta_map[name] = {}
|
| 327 |
download_map[name] = {}
|
|
|
|
| 328 |
for col in meta_cols:
|
| 329 |
if col in name_data.columns:
|
| 330 |
unique_vals = name_data[col].unique()
|
|
|
|
| 352 |
flag_emoji = country_emoji_fallback.get(c, "🌍")
|
| 353 |
chips.append((flag_emoji, c, "country"))
|
| 354 |
|
| 355 |
+
# Author - use derived_author_toggle to determine which column
|
| 356 |
+
author_key = "derived_author" if derived_author_toggle else "author"
|
| 357 |
+
for a in meta.get(author_key, []):
|
| 358 |
icon = company_icon_map.get(a, "")
|
| 359 |
if icon == "":
|
| 360 |
if meta.get("merged_country_groups_single", ["User"])[0] != "User":
|
|
|
|
| 431 |
SELECT
|
| 432 |
{group_col},
|
| 433 |
CASE
|
| 434 |
+
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
|
| 435 |
+
WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
|
|
|
|
| 436 |
ELSE org_country_single
|
| 437 |
END AS org_country_single,
|
| 438 |
author,
|
| 439 |
+
derived_author,
|
| 440 |
merged_country_groups_single,
|
| 441 |
merged_modality,
|
| 442 |
downloads,
|
|
|
|
| 460 |
-- Pick first non-null metadata values for reference
|
| 461 |
ANY_VALUE(b.org_country_single) AS org_country_single,
|
| 462 |
ANY_VALUE(b.author) AS author,
|
| 463 |
+
ANY_VALUE(b.derived_author) AS derived_author,
|
| 464 |
ANY_VALUE(b.merged_country_groups_single) AS merged_country_groups_single,
|
| 465 |
ANY_VALUE(b.merged_modality) AS merged_modality,
|
| 466 |
ANY_VALUE(b.model) AS model
|