emsesc commited on
Commit
cede300
Β·
1 Parent(s): 65d3762

add mobile responsiveness

Browse files
app.py CHANGED
@@ -1,5 +1,4 @@
1
  from dash import Dash, html, dcc, Input, Output, State
2
- from dash import Dash, html, dcc, Input, Output, State
3
  import pandas as pd
4
  import dash_mantine_components as dmc
5
  import duckdb
@@ -199,7 +198,7 @@ app.layout = dmc.MantineProvider(
199
  dcc.Store(id="selected-view", data="all_downloads"),
200
  dcc.Store(
201
  id="model-attribution-type", data="uploader"
202
- ), # Store for attribution type
203
  html.Div(
204
  [
205
  # Header
@@ -353,6 +352,7 @@ app.layout = dmc.MantineProvider(
353
  "gap": "24px",
354
  "backgroundColor": "#082030", # restored dark background
355
  },
 
356
  ),
357
  # Title row with "live" button
358
  html.Div(
@@ -376,10 +376,23 @@ app.layout = dmc.MantineProvider(
376
  "marginTop": "20px",
377
  "marginBottom": "20px",
378
  },
 
379
  ),
380
  # Intro / description below header (kept but styled to match layout)
381
  html.Div(
382
- children="This leaderboard assesses concentrations of power in the open model ecosystem across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
 
 
 
 
 
 
 
 
 
 
 
 
383
  style={
384
  "fontSize": 14,
385
  "marginTop": 18,
@@ -388,6 +401,7 @@ app.layout = dmc.MantineProvider(
388
  "marginRight": 100,
389
  "textAlign": "center",
390
  },
 
391
  ),
392
  # Main content (filters + tabs)
393
  html.Div(
@@ -397,7 +411,7 @@ app.layout = dmc.MantineProvider(
397
  html.Div(
398
  html.Span(
399
  [
400
- "Select Download View",
401
  dmc.HoverCard(
402
  width=260,
403
  shadow="md",
@@ -479,7 +493,7 @@ app.layout = dmc.MantineProvider(
479
  html.Div(
480
  [
481
  html.Div(
482
- "Select Model Attribution",
483
  style={
484
  "fontWeight": "700",
485
  "marginBottom": 8,
@@ -528,7 +542,7 @@ app.layout = dmc.MantineProvider(
528
  html.Div(
529
  [
530
  html.Div(
531
- "Select Time Range",
532
  style={
533
  "fontWeight": "700",
534
  "marginBottom": 8,
@@ -537,7 +551,7 @@ app.layout = dmc.MantineProvider(
537
  ),
538
  time_slider,
539
  html.Div(
540
- "Adjust the time range to filter leaderboard results by model download times.",
541
  style={
542
  "fontSize": 13,
543
  "color": "#555",
@@ -643,6 +657,7 @@ app.layout = dmc.MantineProvider(
643
  "backgroundColor": "#FFFBF9",
644
  "borderRadius": "18px",
645
  },
 
646
  ),
647
  html.Div(
648
  [
@@ -677,11 +692,14 @@ app.layout = dmc.MantineProvider(
677
  "textAlign": "left",
678
  },
679
  ),
680
- dcc.Loading(
681
- id="loading-countries",
682
- type="circle",
683
- color="#AC482A",
684
- children=html.Div(id="top_countries-table"),
 
 
 
685
  ),
686
  html.Button(
687
  id="top_countries-toggle",
@@ -718,13 +736,16 @@ app.layout = dmc.MantineProvider(
718
  "textAlign": "left",
719
  },
720
  ),
721
- dcc.Loading(
722
- id="loading-developers",
723
- type="circle",
724
- color="#AC482A",
725
- children=html.Div(
726
- id="top_developers-table"
 
 
727
  ),
 
728
  ),
729
  html.Button(
730
  id="top_developers-toggle",
@@ -761,11 +782,14 @@ app.layout = dmc.MantineProvider(
761
  "textAlign": "left",
762
  },
763
  ),
764
- dcc.Loading(
765
- id="loading-models",
766
- type="circle",
767
- color="#AC482A",
768
- children=html.Div(id="top_models-table"),
 
 
 
769
  ),
770
  html.Button(
771
  id="top_models-toggle",
@@ -782,10 +806,11 @@ app.layout = dmc.MantineProvider(
782
  "borderRadius": "18px",
783
  "padding": "32px",
784
  "marginTop": "12px",
785
- "marginBottom": "12px", # reduced from 64px
786
  "marginLeft": "50px",
787
  "marginRight": "50px",
788
  },
 
789
  ),
790
  ],
791
  style={
@@ -818,7 +843,7 @@ def _get_filtered_top_n_from_duckdb(
818
  if group_col == "org_country_single":
819
  group_expr = """CASE
820
  WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
821
- WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
822
  ELSE org_country_single
823
  END"""
824
  else:
@@ -833,7 +858,7 @@ def _get_filtered_top_n_from_duckdb(
833
  {group_expr} AS group_key,
834
  CASE
835
  WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
836
- WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
837
  ELSE org_country_single
838
  END AS org_country_single,
839
  author,
 
1
  from dash import Dash, html, dcc, Input, Output, State
 
2
  import pandas as pd
3
  import dash_mantine_components as dmc
4
  import duckdb
 
198
  dcc.Store(id="selected-view", data="all_downloads"),
199
  dcc.Store(
200
  id="model-attribution-type", data="uploader"
201
+ ),
202
  html.Div(
203
  [
204
  # Header
 
352
  "gap": "24px",
353
  "backgroundColor": "#082030", # restored dark background
354
  },
355
+ className="responsive-header", # <-- add class
356
  ),
357
  # Title row with "live" button
358
  html.Div(
 
376
  "marginTop": "20px",
377
  "marginBottom": "20px",
378
  },
379
+ className="responsive-title-row", # <-- add class
380
  ),
381
  # Intro / description below header (kept but styled to match layout)
382
  html.Div(
383
+ children=[
384
+ "This leaderboard assesses concentrations of power in the open model ecosystem through ranking user downloads across three groups: countries, developers, and models. Explore how user downloads of models are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face. This dashboard accompanies the paper titled ",
385
+ html.A(
386
+ "Economies of Open Intelligence: Tracing Power & Participation in the Model Ecosystem.",
387
+ href="https://www.google.com/", # <-- update to actual paper link if available
388
+ target="_blank",
389
+ style={
390
+ "color": "#AC482A",
391
+ "fontWeight": "700",
392
+ "textDecoration": "underline",
393
+ },
394
+ ),
395
+ ],
396
  style={
397
  "fontSize": 14,
398
  "marginTop": 18,
 
401
  "marginRight": 100,
402
  "textAlign": "center",
403
  },
404
+ className="responsive-intro", # <-- add class
405
  ),
406
  # Main content (filters + tabs)
407
  html.Div(
 
411
  html.Div(
412
  html.Span(
413
  [
414
+ "Download View",
415
  dmc.HoverCard(
416
  width=260,
417
  shadow="md",
 
493
  html.Div(
494
  [
495
  html.Div(
496
+ "Model Attribution",
497
  style={
498
  "fontWeight": "700",
499
  "marginBottom": 8,
 
542
  html.Div(
543
  [
544
  html.Div(
545
+ "Download Date Range",
546
  style={
547
  "fontWeight": "700",
548
  "marginBottom": 8,
 
551
  ),
552
  time_slider,
553
  html.Div(
554
+ "Adjust the time range to filter leaderboard results by when models were downloaded by users.",
555
  style={
556
  "fontSize": 13,
557
  "color": "#555",
 
657
  "backgroundColor": "#FFFBF9",
658
  "borderRadius": "18px",
659
  },
660
+ className="responsive-main-content", # <-- add class
661
  ),
662
  html.Div(
663
  [
 
692
  "textAlign": "left",
693
  },
694
  ),
695
+ html.Div(
696
+ dcc.Loading(
697
+ id="loading-countries",
698
+ type="circle",
699
+ color="#AC482A",
700
+ children=html.Div(id="top_countries-table"),
701
+ ),
702
+ className="responsive-table-wrapper", # <-- add wrapper for scroll
703
  ),
704
  html.Button(
705
  id="top_countries-toggle",
 
736
  "textAlign": "left",
737
  },
738
  ),
739
+ html.Div(
740
+ dcc.Loading(
741
+ id="loading-developers",
742
+ type="circle",
743
+ color="#AC482A",
744
+ children=html.Div(
745
+ id="top_developers-table"
746
+ ),
747
  ),
748
+ className="responsive-table-wrapper",
749
  ),
750
  html.Button(
751
  id="top_developers-toggle",
 
782
  "textAlign": "left",
783
  },
784
  ),
785
+ html.Div(
786
+ dcc.Loading(
787
+ id="loading-models",
788
+ type="circle",
789
+ color="#AC482A",
790
+ children=html.Div(id="top_models-table"),
791
+ ),
792
+ className="responsive-table-wrapper",
793
  ),
794
  html.Button(
795
  id="top_models-toggle",
 
806
  "borderRadius": "18px",
807
  "padding": "32px",
808
  "marginTop": "12px",
809
+ "marginBottom": "12px",
810
  "marginLeft": "50px",
811
  "marginRight": "50px",
812
  },
813
+ className="responsive-tabs", # <-- add class
814
  ),
815
  ],
816
  style={
 
843
  if group_col == "org_country_single":
844
  group_expr = """CASE
845
  WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
846
+ WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
847
  ELSE org_country_single
848
  END"""
849
  else:
 
858
  {group_expr} AS group_key,
859
  CASE
860
  WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
861
+ WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
862
  ELSE org_country_single
863
  END AS org_country_single,
864
  author,
assets/icons/amazon.png ADDED

Git LFS Details

  • SHA256: bab0a666f2d83d097e0034b324d47ea74d859e948bb15c03d2b83bbd4ffde47d
  • Pointer size: 130 Bytes
  • Size of remote file: 31.9 kB
assets/icons/microsoft.png ADDED

Git LFS Details

  • SHA256: 667f0dd915fa82d64e646d95fe63fb8a23f79416888319edf2c057a8f52b89d5
  • Pointer size: 128 Bytes
  • Size of remote file: 640 Bytes
assets/styles.css CHANGED
@@ -123,3 +123,39 @@ button[id^="download-"]:focus {
123
  transform: scale(1.06);
124
  outline: none;
125
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  transform: scale(1.06);
124
  outline: none;
125
  }
126
+
127
+ @media (max-width: 1150px) {
128
+ .responsive-main-content {
129
+ flex-direction: column !important;
130
+ gap: 16px !important;
131
+ padding: 18px !important;
132
+ margin-left: 0 !important;
133
+ margin-right: 0 !important;
134
+ }
135
+ .responsive-main-content > div {
136
+ min-width: 0 !important;
137
+ width: 100% !important;
138
+ }
139
+ .responsive-header {
140
+ flex-direction: column !important;
141
+ gap: 12px !important;
142
+ padding: 12px !important;
143
+ }
144
+ .responsive-title-row {
145
+ margin-left: 0 !important;
146
+ margin-right: 0 !important;
147
+ font-size: 28px !important;
148
+ }
149
+ .responsive-intro {
150
+ margin-left: 12px !important;
151
+ margin-right: 12px !important;
152
+ }
153
+ .responsive-tabs {
154
+ margin-left: 0 !important;
155
+ margin-right: 0 !important;
156
+ padding: 12px !important;
157
+ }
158
+ .responsive-table-wrapper {
159
+ overflow-x: auto;
160
+ }
161
+ }
graphs/leaderboard.py CHANGED
@@ -25,6 +25,8 @@ company_icon_map = {
25
  "sentence-transformers": "../assets/images/hf.svg",
26
  "facebook": "../assets/icons/meta.png",
27
  "openai": "../assets/icons/openai.png",
 
 
28
  }
29
 
30
  country_emoji_fallback = {
@@ -34,14 +36,21 @@ country_emoji_fallback = {
34
  }
35
 
36
  meta_cols_map = {
37
- "org_country_single": ["org_country_single"],
38
- "author": ["org_country_single", "author", "merged_country_groups_single"],
39
- "derived_author": ["org_country_single", "derived_author", "merged_country_groups_single"],
 
 
 
 
 
 
 
 
40
  "model": [
41
  "org_country_single",
42
  "author",
43
  "derived_author",
44
- "merged_country_groups_single",
45
  "merged_modality",
46
  "total_downloads",
47
  ],
@@ -137,7 +146,6 @@ def get_metadata_popover_content(icon, name, meta_type):
137
  "author": f"Author/Organization: {name}",
138
  "downloads": f"Total downloads: {name}",
139
  "modality": f"Modality: {name}",
140
- "parameters": f"Estimated parameters: {name}",
141
  }
142
  return popover_texts.get(meta_type, name)
143
 
@@ -165,9 +173,7 @@ def chip_with_hovercard(text, bg_color="#F0F0F0", meta_type=None, icon=None):
165
  },
166
  )
167
  ),
168
- dmc.HoverCardDropdown(
169
- dmc.Text(hovercard_content, size="sm")
170
- ),
171
  ],
172
  )
173
 
@@ -187,7 +193,8 @@ def render_chips(metadata_list, chip_color):
187
  html.Span(
188
  [
189
  html.Img(
190
- src=icon, style={"height": "18px", "marginRight": "6px"}
 
191
  ),
192
  name,
193
  ],
@@ -204,13 +211,18 @@ def render_chips(metadata_list, chip_color):
204
  )
205
  ),
206
  dmc.HoverCardDropdown(
207
- dmc.Text(get_metadata_popover_content(icon, name, meta_type), size="sm")
 
 
 
208
  ),
209
  ],
210
  )
211
  )
212
  else:
213
- chips.append(chip_with_hovercard(f"{icon} {name}", chip_color, meta_type, icon))
 
 
214
  return html.Div(
215
  chips, style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"}
216
  )
@@ -282,6 +294,7 @@ def render_table_content(
282
  ]
283
  )
284
 
 
285
  # Function to get top N leaderboard (now accepts pandas DataFrame from DuckDB query)
286
  def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggle=True):
287
  """
@@ -303,7 +316,13 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggl
303
  .sum()
304
  .nlargest(top_n, columns="total_downloads")
305
  .reset_index()
306
- .rename(columns={group_col: "Name", "total_downloads": "Total Value", "percent_of_total": "% of total"})
 
 
 
 
 
 
307
  )
308
 
309
  # Create a downloadable version of the leaderboard
@@ -316,11 +335,11 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggl
316
 
317
  # All relevant metadata columns
318
  meta_cols = meta_cols_map.get(group_col, [])
319
-
320
  # Collect all metadata per top n for each category (country, author, model)
321
  meta_map = {}
322
  download_map = {}
323
-
324
  for name in top["Name"]:
325
  name_data = filtered_df[filtered_df[group_col] == name]
326
  meta_map[name] = {}
@@ -335,23 +354,22 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggl
335
  def build_metadata(nm):
336
  meta = meta_map.get(nm, {})
337
  chips = []
338
-
339
  # Countries
340
  for c in meta.get("org_country_single", []):
341
  if c == "United States of America":
342
  c = "USA"
343
  if c == "user":
344
  c = "User"
345
- # Try countryflag.getflag(), fallback to dictionary if fails
346
  try:
347
  flag_emoji = countryflag.getflag(c)
348
- # If countryflag returns empty or None, fallback
349
  if not flag_emoji or flag_emoji == c:
350
  flag_emoji = country_emoji_fallback.get(c, "🌍")
351
  except Exception:
352
  flag_emoji = country_emoji_fallback.get(c, "🌍")
353
  chips.append((flag_emoji, c, "country"))
354
-
 
355
  # Author - use derived_author_toggle to determine which column
356
  author_key = "derived_author" if derived_author_toggle else "author"
357
  for a in meta.get(author_key, []):
@@ -362,41 +380,39 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggl
362
  else:
363
  icon = "πŸ‘€"
364
  chips.append((icon, a, "author"))
365
-
366
- # Downloads
367
- total_downloads = sum(
368
- d for d in meta.get("total_downloads", []) if pd.notna(d)
369
- )
370
- if total_downloads:
371
- chips.append(("⬇️", f"{int(total_downloads):,}", "downloads"))
372
 
373
  # Modality
374
  for m in meta.get("merged_modality", []):
375
  if pd.notna(m):
376
  chips.append(("", m, "modality"))
377
-
 
 
 
 
 
378
  return chips
379
 
380
  # Function to create downloadable dataframe metadata
381
  def build_download_metadata(nm):
382
  meta = download_map.get(nm, {})
383
  download_info = {}
384
-
385
  for col in meta_cols:
386
  if col not in meta or not meta[col]:
387
  continue
388
-
389
  vals = meta.get(col, [])
390
  if vals:
391
  download_info[col] = ", ".join(str(v) for v in vals if pd.notna(v))
392
  else:
393
  download_info[col] = ""
394
-
395
  return download_info
396
 
397
  # Apply metadata builder to top dataframe
398
  top["Metadata"] = top["Name"].astype(object).apply(build_metadata)
399
-
400
  # Build download dataframe with metadata
401
  download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]]
402
  download_info_df = pd.DataFrame(download_info_list)
@@ -405,16 +421,18 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggl
405
  return top[["Name", "Metadata", "% of total"]], download_top
406
 
407
 
408
- def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None, view="all_downloads"):
 
 
409
  """
410
  Query DuckDB directly to get top N entries with minimal data transfer
411
-
412
  Args:
413
  con: DuckDB connection object
414
  group_col: Column to group by
415
  top_n: Number of top entries
416
  time_filter: Optional tuple of (start_timestamp, end_timestamp)
417
-
418
  Returns:
419
  Pandas DataFrame with only the rows needed for top N
420
  """
@@ -429,7 +447,7 @@ def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None, view="all_
429
  if group_col == "org_country_single":
430
  group_expr = """CASE
431
  WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
432
- WHEN org_country_single IN ('International', 'Online') THEN 'International/Online'
433
  ELSE org_country_single
434
  END"""
435
  else:
@@ -543,4 +561,16 @@ def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None, view="all_
543
  return con.execute(query).fetchdf()
544
  except Exception as e:
545
  print(f"Error querying DuckDB: {e}")
546
- return pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  "sentence-transformers": "../assets/images/hf.svg",
26
  "facebook": "../assets/icons/meta.png",
27
  "openai": "../assets/icons/openai.png",
28
+ "amazon": "../assets/icons/amazon.png",
29
+ "microsoft": "../assets/icons/microsoft.png",
30
  }
31
 
32
  country_emoji_fallback = {
 
36
  }
37
 
38
  meta_cols_map = {
39
+ "org_country_single": ["org_country_single", "total_downloads"],
40
+ "author": [
41
+ "org_country_single",
42
+ "author",
43
+ "total_downloads",
44
+ ],
45
+ "derived_author": [
46
+ "org_country_single",
47
+ "derived_author",
48
+ "total_downloads",
49
+ ],
50
  "model": [
51
  "org_country_single",
52
  "author",
53
  "derived_author",
 
54
  "merged_modality",
55
  "total_downloads",
56
  ],
 
146
  "author": f"Author/Organization: {name}",
147
  "downloads": f"Total downloads: {name}",
148
  "modality": f"Modality: {name}",
 
149
  }
150
  return popover_texts.get(meta_type, name)
151
 
 
173
  },
174
  )
175
  ),
176
+ dmc.HoverCardDropdown(dmc.Text(hovercard_content, size="sm")),
 
 
177
  ],
178
  )
179
 
 
193
  html.Span(
194
  [
195
  html.Img(
196
+ src=icon,
197
+ style={"height": "18px", "marginRight": "6px"},
198
  ),
199
  name,
200
  ],
 
211
  )
212
  ),
213
  dmc.HoverCardDropdown(
214
+ dmc.Text(
215
+ get_metadata_popover_content(icon, name, meta_type),
216
+ size="sm",
217
+ )
218
  ),
219
  ],
220
  )
221
  )
222
  else:
223
+ chips.append(
224
+ chip_with_hovercard(f"{icon} {name}", chip_color, meta_type, icon)
225
+ )
226
  return html.Div(
227
  chips, style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"}
228
  )
 
294
  ]
295
  )
296
 
297
+
298
  # Function to get top N leaderboard (now accepts pandas DataFrame from DuckDB query)
299
  def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggle=True):
300
  """
 
316
  .sum()
317
  .nlargest(top_n, columns="total_downloads")
318
  .reset_index()
319
+ .rename(
320
+ columns={
321
+ group_col: "Name",
322
+ "total_downloads": "Total Value",
323
+ "percent_of_total": "% of total",
324
+ }
325
+ )
326
  )
327
 
328
  # Create a downloadable version of the leaderboard
 
335
 
336
  # All relevant metadata columns
337
  meta_cols = meta_cols_map.get(group_col, [])
338
+
339
  # Collect all metadata per top n for each category (country, author, model)
340
  meta_map = {}
341
  download_map = {}
342
+
343
  for name in top["Name"]:
344
  name_data = filtered_df[filtered_df[group_col] == name]
345
  meta_map[name] = {}
 
354
  def build_metadata(nm):
355
  meta = meta_map.get(nm, {})
356
  chips = []
357
+
358
  # Countries
359
  for c in meta.get("org_country_single", []):
360
  if c == "United States of America":
361
  c = "USA"
362
  if c == "user":
363
  c = "User"
 
364
  try:
365
  flag_emoji = countryflag.getflag(c)
 
366
  if not flag_emoji or flag_emoji == c:
367
  flag_emoji = country_emoji_fallback.get(c, "🌍")
368
  except Exception:
369
  flag_emoji = country_emoji_fallback.get(c, "🌍")
370
  chips.append((flag_emoji, c, "country"))
371
+ # Add downloads chip for country (only once)
372
+
373
  # Author - use derived_author_toggle to determine which column
374
  author_key = "derived_author" if derived_author_toggle else "author"
375
  for a in meta.get(author_key, []):
 
380
  else:
381
  icon = "πŸ‘€"
382
  chips.append((icon, a, "author"))
 
 
 
 
 
 
 
383
 
384
  # Modality
385
  for m in meta.get("merged_modality", []):
386
  if pd.notna(m):
387
  chips.append(("", m, "modality"))
388
+
389
+ # Total downloads
390
+ for d in meta.get("total_downloads", []):
391
+ formatted_downloads = format_large_number(d)
392
+ chips.append(("⬇️", formatted_downloads, "downloads"))
393
+
394
  return chips
395
 
396
  # Function to create downloadable dataframe metadata
397
  def build_download_metadata(nm):
398
  meta = download_map.get(nm, {})
399
  download_info = {}
400
+
401
  for col in meta_cols:
402
  if col not in meta or not meta[col]:
403
  continue
404
+
405
  vals = meta.get(col, [])
406
  if vals:
407
  download_info[col] = ", ".join(str(v) for v in vals if pd.notna(v))
408
  else:
409
  download_info[col] = ""
410
+
411
  return download_info
412
 
413
  # Apply metadata builder to top dataframe
414
  top["Metadata"] = top["Name"].astype(object).apply(build_metadata)
415
+
416
  # Build download dataframe with metadata
417
  download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]]
418
  download_info_df = pd.DataFrame(download_info_list)
 
421
  return top[["Name", "Metadata", "% of total"]], download_top
422
 
423
 
424
+ def get_top_n_from_duckdb(
425
+ con, group_col, top_n=10, time_filter=None, view="all_downloads"
426
+ ):
427
  """
428
  Query DuckDB directly to get top N entries with minimal data transfer
429
+
430
  Args:
431
  con: DuckDB connection object
432
  group_col: Column to group by
433
  top_n: Number of top entries
434
  time_filter: Optional tuple of (start_timestamp, end_timestamp)
435
+
436
  Returns:
437
  Pandas DataFrame with only the rows needed for top N
438
  """
 
447
  if group_col == "org_country_single":
448
  group_expr = """CASE
449
  WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
450
+ WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
451
  ELSE org_country_single
452
  END"""
453
  else:
 
561
  return con.execute(query).fetchdf()
562
  except Exception as e:
563
  print(f"Error querying DuckDB: {e}")
564
+ return pd.DataFrame()
565
+
566
+
567
+ def format_large_number(n):
568
+ """Shorten large numbers, e.g. 5,000,000 -> '5 million'."""
569
+ if n >= 1_000_000_000:
570
+ return f"{n / 1_000_000_000:.1f} billion"
571
+ elif n >= 1_000_000:
572
+ return f"{n / 1_000_000:.1f} million"
573
+ elif n >= 1_000:
574
+ return f"{n / 1_000:.1f}k"
575
+ else:
576
+ return str(int(n))