emsesc commited on
Commit
65d3762
·
1 Parent(s): fb3bf5f

change toggle, author fix, new live

Browse files
Files changed (2) hide show
  1. app.py +331 -170
  2. graphs/leaderboard.py +3 -3
app.py CHANGED
@@ -15,6 +15,22 @@ from dash_iconify import DashIconify
15
  app = Dash()
16
  server = app.server
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def load_parquet_to_duckdb(con, parquet_url, view_name):
19
  """
20
  Loads a parquet file from a remote URL into DuckDB as a view.
@@ -38,6 +54,7 @@ def load_parquet_to_duckdb(con, parquet_url, view_name):
38
  end_dt = pd.to_datetime(time_range["max_time"].iloc[0])
39
  return start_dt, end_dt
40
 
 
41
  # DuckDB connection (global)
42
  con = duckdb.connect(database=":memory:", read_only=False)
43
 
@@ -53,11 +70,11 @@ try:
53
  # Load both parquet files as views
54
  start_dt, end_dt = load_parquet_to_duckdb(con, hf_parquet_url_1, "all_downloads")
55
  # Example: load a second parquet file as another view
56
- start_dt2, end_dt2 = load_parquet_to_duckdb(con, hf_parquet_url_2, "one_year_rolling")
57
-
58
- msg = (
59
- f"Successfully connected to datasets in {time.time() - overall_start_time:.2f}s."
60
  )
 
 
61
  print(msg)
62
  except Exception as e:
63
  err_msg = f"Failed to load dataset(s). Error: {e}"
@@ -68,18 +85,21 @@ except Exception as e:
68
  start_ts = int(start_dt.timestamp())
69
  end_ts = int(end_dt.timestamp())
70
 
 
71
  def ordinal(n):
72
  # Helper to get ordinal suffix for a day
73
  if 10 <= n % 100 <= 20:
74
- suffix = 'th'
75
  else:
76
- suffix = {1: 'st', 2: 'nd', 3: 'rd'}.get(n % 10, 'th')
77
  return f"{n}{suffix}"
78
 
 
79
  def format_date(dt):
80
  # Format date as "Oct 8th, 2025"
81
  return dt.strftime("%b") + f" {ordinal(dt.day)}, {dt.year}"
82
 
 
83
  marks = []
84
  # Add start label (e.g. "Jan 2020")
85
  marks.append({"value": start_ts, "label": start_dt.strftime("%b %Y")})
@@ -92,6 +112,7 @@ for yr in range(start_dt.year, end_dt.year + 1):
92
  # Add end label (e.g. "Dec 2024")
93
  marks.append({"value": end_ts, "label": end_dt.strftime("%b %Y")})
94
 
 
95
  def get_thumb_labels(values):
96
  # Returns formatted labels for both thumbs
97
  distance = abs(values[1] - values[0])
@@ -145,6 +166,7 @@ def get_thumb_labels(values):
145
  ),
146
  ]
147
 
 
148
  # Create a dcc slider for time range selection by year
149
  time_slider = dmc.RangeSlider(
150
  id="time-slider",
@@ -175,151 +197,187 @@ app.layout = dmc.MantineProvider(
175
  },
176
  children=[
177
  dcc.Store(id="selected-view", data="all_downloads"),
178
- dcc.Store(id="derived-author-toggle", data=True), # Store for toggle state
 
 
179
  html.Div(
180
  [
181
  # Header
182
  html.Div(
183
  [
 
184
  html.Div(
185
  [
186
- html.Div(
187
  [
188
- html.Div(
189
- children="Economies of Open Intelligence",
190
  style={
191
- "fontSize": 22,
192
- "fontWeight": "700",
193
- "lineHeight": "1.1",
 
 
 
 
 
194
  },
195
  ),
196
- html.Div(
197
- children="Tracing Power & Participation in the Model Ecosystem",
198
  style={
199
- "fontSize": 13,
200
- "marginTop": 6,
201
- "opacity": 0.9,
 
 
 
 
202
  },
203
  ),
204
  ],
205
  style={
206
- "display": "flex",
207
- "flexDirection": "column",
208
- "justifyContent": "center",
209
  },
210
  ),
211
- html.Div(
212
- [
213
- html.A(
214
- children=[
215
- html.Img(
216
- src="assets/images/dpi.svg",
217
- style={
218
- "height": "28px",
219
- "verticalAlign": "middle",
220
- "paddingRight": "8px",
221
- },
222
- ),
223
- "Data Provenance Initiative",
224
- ],
225
- href="https://www.dataprovenance.org/",
226
- target="_blank",
227
- className="no-bg-link header-link",
 
 
 
 
 
 
 
 
 
 
 
228
  style={
229
- "display": "inline-block",
230
- "padding": "6px 14px",
231
- "fontSize": 13,
232
- "color": "#FFFFFF", # white on dark header
233
- # background removed so CSS controls it
234
- "borderRadius": "18px",
235
- "fontWeight": "700",
236
- "textDecoration": "none",
237
- "marginRight": "12px",
238
  },
239
  ),
240
- html.A(
241
- children=[
242
- html.Img(
243
- src="assets/images/hf.svg",
244
- style={
245
- "height": "30px",
246
- "verticalAlign": "middle",
247
- },
248
- ),
249
- html.Span(
250
- "Hugging Face",
251
- className="hf-brand-text",
252
- ),
253
- ],
254
- href="https://huggingface.co/",
255
- target="_blank",
256
- className="no-bg-link header-link",
 
 
 
257
  style={
258
- "display": "inline-flex",
259
- "padding": "6px 14px",
260
- "alignItems": "center",
261
- "color": "#FFFFFF",
262
- "borderRadius": "18px",
263
- "textDecoration": "none",
264
- "marginRight": "12px",
265
  },
266
  ),
267
- html.A(
268
- children=[
269
- html.Span(
270
- "Read the paper",
271
- className="paper-text",
272
- ),
273
- ],
274
- href="https://www.google.com/",
275
- target="_blank",
276
- className="no-bg-link header-link paper-link",
277
- style={
278
- "display": "inline-flex",
279
- "alignItems": "center",
280
- "padding": "6px 12px", # decreased size
281
- "fontSize": 14, # smaller text
282
- "margin": "0 auto",
283
- "backgroundColor": "#AC482A",
284
- "color": "#FFFFFF",
285
- "borderRadius": "5px",
286
- "textDecoration": "none",
287
- "fontWeight": "700",
288
- },
289
  ),
290
  ],
291
- style={"display": "flex", "alignItems": "center"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  ),
293
  ],
294
- style={
295
- "marginLeft": "50px",
296
- "marginRight": "50px",
297
- "display": "flex",
298
- "justifyContent": "space-between",
299
- "alignItems": "center",
300
- "padding": "18px 24px",
301
- "gap": "24px",
302
- },
303
  ),
304
  ],
305
  style={
306
- "backgroundColor": "#082030",
307
- "color": "white",
308
- "width": "100%",
 
 
 
309
  },
310
  ),
311
- # Intro / description below header (kept but styled to match layout)
312
- # Title
313
  html.Div(
314
- children="The Open Model Leaderboard",
 
 
 
 
 
 
 
 
 
 
 
315
  style={
316
- "fontSize": 40,
317
- "fontWeight": "700",
318
- "textAlign": "center",
319
- "marginTop": 20,
320
- "marginBottom": 20,
 
321
  },
322
  ),
 
323
  html.Div(
324
  children="This leaderboard assesses concentrations of power in the open model ecosystem across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
325
  style={
@@ -337,32 +395,80 @@ app.layout = dmc.MantineProvider(
337
  html.Div(
338
  [
339
  html.Div(
340
- "Select Download View",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  style={
342
  "fontWeight": "700",
343
  "marginBottom": 8,
344
  "fontSize": 14,
 
 
345
  },
346
  ),
347
- dmc.SegmentedControl(
348
- id="segmented",
349
- value="all-downloads",
350
- color="#AC482A",
351
- transitionDuration=200,
352
- data=[
353
- {
354
- "value": "all-downloads",
355
- "label": "All Downloads",
356
- },
357
- {
358
- "value": "filtered-downloads",
359
- "label": "Filtered Downloads",
360
- },
 
 
 
 
 
 
 
 
 
361
  ],
362
- mb=10,
363
  ),
364
  html.Div(
365
- "Choose whether to view all downloads or only those within one year of the model's creation date.",
366
  style={
367
  "fontSize": 13,
368
  "color": "#555",
@@ -373,22 +479,32 @@ app.layout = dmc.MantineProvider(
373
  html.Div(
374
  [
375
  html.Div(
376
- "Select Author Type",
377
  style={
378
  "fontWeight": "700",
379
  "marginBottom": 8,
380
  "fontSize": 14,
381
  },
382
  ),
383
- dmc.Switch(
384
- id="derived-author-switch", # <-- add id
 
385
  color="#AC482A",
386
- label="Derived Authors",
387
- checked=True,
 
 
 
 
 
 
 
 
 
388
  mb=10,
389
  ),
390
  html.Div(
391
- "Toggle between viewing downloads by original authors or derived authors (those who forked or adapted models).",
392
  style={
393
  "fontSize": 13,
394
  "color": "#555",
@@ -437,7 +553,10 @@ app.layout = dmc.MantineProvider(
437
  icon="mdi:lightbulb-on-outline",
438
  width=20,
439
  height=20,
440
- style={"marginRight": "8px", "color": "#082030"},
 
 
 
441
  ),
442
  html.Span("Tip"),
443
  ],
@@ -453,13 +572,39 @@ app.layout = dmc.MantineProvider(
453
  html.Div(
454
  [
455
  "Try switching between ",
456
- html.Span("All Downloads", style={"fontWeight": "600", "color": "#AC482A"}),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  " and ",
458
- html.Span("Filtered Downloads", style={"fontWeight": "600", "color": "#AC482A"}),
459
- " to compare overall popularity versus early interest after model release. ",
460
- "You can also toggle ON ",
461
- html.Span("Derived Authors", style={"fontWeight": "600", "color": "#AC482A"}),
462
- " to see how derivative works contribute to developer influence.",
 
 
 
463
  ],
464
  style={
465
  "fontSize": 13,
@@ -524,7 +669,7 @@ app.layout = dmc.MantineProvider(
524
  },
525
  children=[
526
  html.Div(
527
- children="The model leaderboard assesses concentrations of power across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
528
  style={
529
  "fontSize": 14,
530
  "marginTop": 18,
@@ -536,7 +681,7 @@ app.layout = dmc.MantineProvider(
536
  id="loading-countries",
537
  type="circle",
538
  color="#AC482A",
539
- children=html.Div(id="top_countries-table")
540
  ),
541
  html.Button(
542
  id="top_countries-toggle",
@@ -565,7 +710,7 @@ app.layout = dmc.MantineProvider(
565
  },
566
  children=[
567
  html.Div(
568
- children="The model leaderboard assesses concentrations of power across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
569
  style={
570
  "fontSize": 14,
571
  "marginTop": 18,
@@ -577,7 +722,9 @@ app.layout = dmc.MantineProvider(
577
  id="loading-developers",
578
  type="circle",
579
  color="#AC482A",
580
- children=html.Div(id="top_developers-table")
 
 
581
  ),
582
  html.Button(
583
  id="top_developers-toggle",
@@ -606,7 +753,7 @@ app.layout = dmc.MantineProvider(
606
  },
607
  children=[
608
  html.Div(
609
- children="The model leaderboard assesses concentrations of power across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
610
  style={
611
  "fontSize": 14,
612
  "marginTop": 18,
@@ -618,7 +765,7 @@ app.layout = dmc.MantineProvider(
618
  id="loading-models",
619
  type="circle",
620
  color="#AC482A",
621
- children=html.Div(id="top_models-table")
622
  ),
623
  html.Button(
624
  id="top_models-toggle",
@@ -646,14 +793,16 @@ app.layout = dmc.MantineProvider(
646
  "backgroundColor": "#ffffff",
647
  "minHeight": "100vh",
648
  },
649
- )
650
  ],
651
  )
652
 
653
 
654
  # Callbacks for interactivity
655
  # -- helper utilities to consolidate duplicated callback logic --
656
- def _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n, view="all_downloads"):
 
 
657
  """
658
  Query DuckDB directly to get top N entries with metadata
659
  This minimizes data transfer by doing aggregation in DuckDB
@@ -810,10 +959,14 @@ def _leaderboard_callback_logic(
810
  top_n, new_label = 10, "▼ Show Top 50"
811
 
812
  # Get filtered and aggregated data directly from DuckDB
813
- df_filtered = _get_filtered_top_n_from_duckdb(slider_value, group_col, top_n, view=view)
 
 
814
 
815
  # Process the already-filtered data - pass derived_author_toggle
816
- df, download_df = get_top_n_leaderboard(df_filtered, group_col, top_n, derived_author_toggle=derived_author_toggle)
 
 
817
  return render_table_content(
818
  df, download_df, chip_color=chip_color, filename=filename
819
  ), new_label
@@ -822,13 +975,13 @@ def _leaderboard_callback_logic(
822
  # -- end helpers --
823
 
824
 
825
- # --- Callback to store derived author toggle state ---
826
  @app.callback(
827
- Output("derived-author-toggle", "data"),
828
- Input("derived-author-switch", "checked"),
829
  )
830
- def update_derived_author_toggle(checked):
831
- return checked
832
 
833
 
834
  # Callbacks for interactivity (modularized)
@@ -838,10 +991,12 @@ def update_derived_author_toggle(checked):
838
  Input("top_countries-toggle", "n_clicks"),
839
  Input("time-slider", "value"),
840
  Input("selected-view", "data"),
841
- Input("derived-author-toggle", "data"),
842
  State("top_countries-toggle", "children"),
843
  )
844
- def update_top_countries(n_clicks, slider_value, selected_view, derived_author_toggle, current_label):
 
 
845
  return _leaderboard_callback_logic(
846
  n_clicks,
847
  slider_value,
@@ -851,7 +1006,7 @@ def update_top_countries(n_clicks, slider_value, selected_view, derived_author_t
851
  default_label="▼ Show Top 50",
852
  chip_color="#F0F9FF",
853
  view=selected_view,
854
- derived_author_toggle=derived_author_toggle,
855
  )
856
 
857
 
@@ -861,12 +1016,14 @@ def update_top_countries(n_clicks, slider_value, selected_view, derived_author_t
861
  Input("top_developers-toggle", "n_clicks"),
862
  Input("time-slider", "value"),
863
  Input("selected-view", "data"),
864
- Input("derived-author-toggle", "data"),
865
  State("top_developers-toggle", "children"),
866
  )
867
- def update_top_developers(n_clicks, slider_value, selected_view, derived_author_toggle, current_label):
868
- # Use derived_author if toggle is True, else author
869
- group_col = "derived_author" if derived_author_toggle else "author"
 
 
870
  return _leaderboard_callback_logic(
871
  n_clicks,
872
  slider_value,
@@ -876,7 +1033,7 @@ def update_top_developers(n_clicks, slider_value, selected_view, derived_author_
876
  default_label="▼ Show Top 50",
877
  chip_color="#F0F9FF",
878
  view=selected_view,
879
- derived_author_toggle=derived_author_toggle,
880
  )
881
 
882
 
@@ -886,10 +1043,12 @@ def update_top_developers(n_clicks, slider_value, selected_view, derived_author_
886
  Input("top_models-toggle", "n_clicks"),
887
  Input("time-slider", "value"),
888
  Input("selected-view", "data"),
889
- Input("derived-author-toggle", "data"),
890
  State("top_models-toggle", "children"),
891
  )
892
- def update_top_models(n_clicks, slider_value, selected_view, derived_author_toggle, current_label):
 
 
893
  return _leaderboard_callback_logic(
894
  n_clicks,
895
  slider_value,
@@ -899,7 +1058,7 @@ def update_top_models(n_clicks, slider_value, selected_view, derived_author_togg
899
  default_label="▼ Show More",
900
  chip_color="#F0F9FF",
901
  view=selected_view,
902
- derived_author_toggle=derived_author_toggle,
903
  )
904
 
905
 
@@ -910,6 +1069,7 @@ def update_top_models(n_clicks, slider_value, selected_view, derived_author_togg
910
  def update_thumb_labels(values):
911
  return get_thumb_labels(values)
912
 
 
913
  # --- Add callback to update selected view based on segmented control ---
914
  @app.callback(
915
  Output("selected-view", "data"),
@@ -920,6 +1080,7 @@ def update_selected_view(seg_value):
920
  return "one_year_rolling"
921
  return "all_downloads"
922
 
 
923
  # Run the app
924
  if __name__ == "__main__":
925
  app.run(debug=True)
 
15
  app = Dash()
16
  server = app.server
17
 
18
+
19
+ # Query for most recent date in all_downloads
20
+ def get_last_updated():
21
+ try:
22
+ result = con.execute(
23
+ "SELECT MAX(time) as max_time FROM all_downloads"
24
+ ).fetchdf()
25
+ max_time = result["max_time"].iloc[0]
26
+ if pd.isnull(max_time):
27
+ return "N/A"
28
+ dt = pd.to_datetime(max_time)
29
+ return dt.strftime("%b %d, %Y")
30
+ except Exception:
31
+ return "N/A"
32
+
33
+
34
  def load_parquet_to_duckdb(con, parquet_url, view_name):
35
  """
36
  Loads a parquet file from a remote URL into DuckDB as a view.
 
54
  end_dt = pd.to_datetime(time_range["max_time"].iloc[0])
55
  return start_dt, end_dt
56
 
57
+
58
  # DuckDB connection (global)
59
  con = duckdb.connect(database=":memory:", read_only=False)
60
 
 
70
  # Load both parquet files as views
71
  start_dt, end_dt = load_parquet_to_duckdb(con, hf_parquet_url_1, "all_downloads")
72
  # Example: load a second parquet file as another view
73
+ start_dt2, end_dt2 = load_parquet_to_duckdb(
74
+ con, hf_parquet_url_2, "one_year_rolling"
 
 
75
  )
76
+
77
+ msg = f"Successfully connected to datasets in {time.time() - overall_start_time:.2f}s."
78
  print(msg)
79
  except Exception as e:
80
  err_msg = f"Failed to load dataset(s). Error: {e}"
 
85
  start_ts = int(start_dt.timestamp())
86
  end_ts = int(end_dt.timestamp())
87
 
88
+
89
  def ordinal(n):
90
  # Helper to get ordinal suffix for a day
91
  if 10 <= n % 100 <= 20:
92
+ suffix = "th"
93
  else:
94
+ suffix = {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th")
95
  return f"{n}{suffix}"
96
 
97
+
98
  def format_date(dt):
99
  # Format date as "Oct 8th, 2025"
100
  return dt.strftime("%b") + f" {ordinal(dt.day)}, {dt.year}"
101
 
102
+
103
  marks = []
104
  # Add start label (e.g. "Jan 2020")
105
  marks.append({"value": start_ts, "label": start_dt.strftime("%b %Y")})
 
112
  # Add end label (e.g. "Dec 2024")
113
  marks.append({"value": end_ts, "label": end_dt.strftime("%b %Y")})
114
 
115
+
116
  def get_thumb_labels(values):
117
  # Returns formatted labels for both thumbs
118
  distance = abs(values[1] - values[0])
 
166
  ),
167
  ]
168
 
169
+
170
  # Create a dcc slider for time range selection by year
171
  time_slider = dmc.RangeSlider(
172
  id="time-slider",
 
197
  },
198
  children=[
199
  dcc.Store(id="selected-view", data="all_downloads"),
200
+ dcc.Store(
201
+ id="model-attribution-type", data="uploader"
202
+ ), # Store for attribution type
203
  html.Div(
204
  [
205
  # Header
206
  html.Div(
207
  [
208
+ # --- Replace title/subtitle with LIVE button and last updated ---
209
  html.Div(
210
  [
211
+ html.Span(
212
  [
213
+ html.Span(
 
214
  style={
215
+ "display": "inline-block",
216
+ "width": "14px",
217
+ "height": "14px",
218
+ "borderRadius": "50%",
219
+ "backgroundColor": "#AC482A",
220
+ "marginRight": "8px",
221
+ "verticalAlign": "middle",
222
+ "boxShadow": "0 0 6px #AC482A",
223
  },
224
  ),
225
+ html.Span(
226
+ "LIVE",
227
  style={
228
+ "backgroundColor": "transparent",
229
+ "color": "#fff",
230
+ "fontWeight": "700",
231
+ "fontSize": "18px",
232
+ "letterSpacing": "1px",
233
+ "verticalAlign": "middle",
234
+ "display": "inline-block",
235
  },
236
  ),
237
  ],
238
  style={
239
+ "display": "inline-flex",
240
+ "alignItems": "center",
241
+ "marginRight": "16px",
242
  },
243
  ),
244
+ html.Span(
245
+ f"Last updated: {get_last_updated()}",
246
+ style={
247
+ "backgroundColor": "#1B344D",
248
+ "color": "#FFFFFF",
249
+ "fontWeight": "500",
250
+ "fontSize": "15px",
251
+ "verticalAlign": "middle",
252
+ "display": "inline-block",
253
+ "borderRadius": "8px",
254
+ "padding": "4px 14px",
255
+ "boxShadow": "0 1px 4px rgba(8,32,48,0.10)",
256
+ "marginLeft": "0px",
257
+ },
258
+ ),
259
+ ],
260
+ style={
261
+ "display": "flex",
262
+ "alignItems": "center",
263
+ "gap": "12px",
264
+ },
265
+ ),
266
+ html.Div(
267
+ [
268
+ html.A(
269
+ children=[
270
+ html.Img(
271
+ src="assets/images/dpi.svg",
272
  style={
273
+ "height": "28px",
274
+ "verticalAlign": "middle",
275
+ "paddingRight": "8px",
 
 
 
 
 
 
276
  },
277
  ),
278
+ "Data Provenance Initiative",
279
+ ],
280
+ href="https://www.dataprovenance.org/",
281
+ target="_blank",
282
+ className="no-bg-link header-link",
283
+ style={
284
+ "display": "inline-block",
285
+ "padding": "6px 14px",
286
+ "fontSize": 13,
287
+ "color": "#FFFFFF", # white on dark header
288
+ "borderRadius": "18px",
289
+ "fontWeight": "700",
290
+ "textDecoration": "none",
291
+ "marginRight": "12px",
292
+ },
293
+ ),
294
+ html.A(
295
+ children=[
296
+ html.Img(
297
+ src="assets/images/hf.svg",
298
  style={
299
+ "height": "30px",
300
+ "verticalAlign": "middle",
 
 
 
 
 
301
  },
302
  ),
303
+ html.Span(
304
+ "Hugging Face",
305
+ className="hf-brand-text",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  ),
307
  ],
308
+ href="https://huggingface.co/",
309
+ target="_blank",
310
+ className="no-bg-link header-link",
311
+ style={
312
+ "display": "inline-flex",
313
+ "padding": "6px 14px",
314
+ "alignItems": "center",
315
+ "color": "#FFFFFF",
316
+ "borderRadius": "18px",
317
+ "textDecoration": "none",
318
+ "marginRight": "12px",
319
+ },
320
+ ),
321
+ html.A(
322
+ children=[
323
+ html.Span(
324
+ "Read the paper",
325
+ className="paper-text",
326
+ ),
327
+ ],
328
+ href="https://www.google.com/",
329
+ target="_blank",
330
+ className="no-bg-link header-link paper-link",
331
+ style={
332
+ "display": "inline-flex",
333
+ "alignItems": "center",
334
+ "padding": "6px 12px",
335
+ "fontSize": 14,
336
+ "margin": "0 auto",
337
+ "backgroundColor": "#AC482A",
338
+ "color": "#FFFFFF",
339
+ "borderRadius": "5px",
340
+ "textDecoration": "none",
341
+ "fontWeight": "700",
342
+ },
343
  ),
344
  ],
345
+ style={"display": "flex", "alignItems": "center"},
 
 
 
 
 
 
 
 
346
  ),
347
  ],
348
  style={
349
+ "display": "flex",
350
+ "justifyContent": "space-between",
351
+ "alignItems": "center",
352
+ "padding": "18px 24px",
353
+ "gap": "24px",
354
+ "backgroundColor": "#082030", # restored dark background
355
  },
356
  ),
357
+ # Title row with "live" button
 
358
  html.Div(
359
+ [
360
+ html.Span(
361
+ "The Open Model Leaderboard",
362
+ style={
363
+ "fontSize": 40,
364
+ "fontWeight": "700",
365
+ "textAlign": "center",
366
+ "marginTop": "20px",
367
+ "marginBottom": "20px",
368
+ },
369
+ ),
370
+ ],
371
  style={
372
+ "display": "flex",
373
+ "alignItems": "center",
374
+ "justifyContent": "center",
375
+ "gap": "12px",
376
+ "marginTop": "20px",
377
+ "marginBottom": "20px",
378
  },
379
  ),
380
+ # Intro / description below header (kept but styled to match layout)
381
  html.Div(
382
  children="This leaderboard assesses concentrations of power in the open model ecosystem across three hierarchies: countries, developers, and models. Explore how downloads are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face.",
383
  style={
 
395
  html.Div(
396
  [
397
  html.Div(
398
+ html.Span(
399
+ [
400
+ "Select Download View",
401
+ dmc.HoverCard(
402
+ width=260,
403
+ shadow="md",
404
+ position="top",
405
+ children=[
406
+ dmc.HoverCardTarget(
407
+ html.Span(
408
+ DashIconify(
409
+ icon="mdi:information-outline",
410
+ width=16,
411
+ height=16,
412
+ style={
413
+ "marginLeft": "6px",
414
+ "color": "#AC482A",
415
+ "verticalAlign": "middle",
416
+ },
417
+ ),
418
+ style={"cursor": "pointer"},
419
+ )
420
+ ),
421
+ dmc.HoverCardDropdown(
422
+ dmc.Text(
423
+ "We believe this filter isolates more authentic usage, mitigating the impact of automatic software downloads for older models.",
424
+ size="sm",
425
+ style={"maxWidth": "240px"},
426
+ )
427
+ ),
428
+ ],
429
+ ),
430
+ ],
431
+ style={
432
+ "display": "inline-flex",
433
+ "alignItems": "center",
434
+ },
435
+ ),
436
  style={
437
  "fontWeight": "700",
438
  "marginBottom": 8,
439
  "fontSize": 14,
440
+ "display": "flex",
441
+ "alignItems": "center",
442
  },
443
  ),
444
+ html.Div(
445
+ [
446
+ dmc.SegmentedControl(
447
+ id="segmented",
448
+ value="all-downloads",
449
+ color="#AC482A",
450
+ transitionDuration=200,
451
+ data=[
452
+ {
453
+ "value": "all-downloads",
454
+ "label": "All Downloads",
455
+ },
456
+ {
457
+ "value": "filtered-downloads",
458
+ "label": html.Span(
459
+ [
460
+ "Filtered Downloads",
461
+ ]
462
+ ),
463
+ },
464
+ ],
465
+ mb=10,
466
+ ),
467
  ],
468
+ style={"display": "flex", "alignItems": "center"},
469
  ),
470
  html.Div(
471
+ "Choose whether to count all downloads, or only downloads up to one year from model creation.",
472
  style={
473
  "fontSize": 13,
474
  "color": "#555",
 
479
  html.Div(
480
  [
481
  html.Div(
482
+ "Select Model Attribution",
483
  style={
484
  "fontWeight": "700",
485
  "marginBottom": 8,
486
  "fontSize": 14,
487
  },
488
  ),
489
+ dmc.SegmentedControl(
490
+ id="model-attribution-segmented",
491
+ value="uploader",
492
  color="#AC482A",
493
+ transitionDuration=200,
494
+ data=[
495
+ {
496
+ "value": "uploader",
497
+ "label": "Model Uploader",
498
+ },
499
+ {
500
+ "value": "original_creator",
501
+ "label": "Original Model Creator",
502
+ },
503
+ ],
504
  mb=10,
505
  ),
506
  html.Div(
507
+ "Toggle between having downloads attributed to the account that uploaded the model, or the account that uploaded the model that this was originally derived from.",
508
  style={
509
  "fontSize": 13,
510
  "color": "#555",
 
553
  icon="mdi:lightbulb-on-outline",
554
  width=20,
555
  height=20,
556
+ style={
557
+ "marginRight": "8px",
558
+ "color": "#082030",
559
+ },
560
  ),
561
  html.Span("Tip"),
562
  ],
 
572
  html.Div(
573
  [
574
  "Try switching between ",
575
+ html.Span(
576
+ "All Downloads",
577
+ style={
578
+ "fontWeight": "600",
579
+ "color": "#AC482A",
580
+ },
581
+ ),
582
+ " and ",
583
+ html.Span(
584
+ "Filtered Downloads",
585
+ style={
586
+ "fontWeight": "600",
587
+ "color": "#AC482A",
588
+ },
589
+ ),
590
+ " to compare net popularity (but many duplicate, unused downloads) versus more immediate interest as models are released. ",
591
+ "You can also toggle between ",
592
+ html.Span(
593
+ "Model Uploader",
594
+ style={
595
+ "fontWeight": "600",
596
+ "color": "#AC482A",
597
+ },
598
+ ),
599
  " and ",
600
+ html.Span(
601
+ "Original Model Creator",
602
+ style={
603
+ "fontWeight": "600",
604
+ "color": "#AC482A",
605
+ },
606
+ ),
607
+ " to see how attribution affects perceived popularity.",
608
  ],
609
  style={
610
  "fontSize": 13,
 
669
  },
670
  children=[
671
  html.Div(
672
+ children="The country leaderboard shows how downloads are distributed across different nations, highlighting which countries are leading in model usage and adoption.",
673
  style={
674
  "fontSize": 14,
675
  "marginTop": 18,
 
681
  id="loading-countries",
682
  type="circle",
683
  color="#AC482A",
684
+ children=html.Div(id="top_countries-table"),
685
  ),
686
  html.Button(
687
  id="top_countries-toggle",
 
710
  },
711
  children=[
712
  html.Div(
713
+ children="The developer leaderboard highlights the most influential model creators on Hugging Face, showcasing which developers have garnered the highest download counts for their models.",
714
  style={
715
  "fontSize": 14,
716
  "marginTop": 18,
 
722
  id="loading-developers",
723
  type="circle",
724
  color="#AC482A",
725
+ children=html.Div(
726
+ id="top_developers-table"
727
+ ),
728
  ),
729
  html.Button(
730
  id="top_developers-toggle",
 
753
  },
754
  children=[
755
  html.Div(
756
+ children="The model leaderboard showcases the most popular models on Hugging Face based on download counts, highlighting which models are driving adoption in the open model ecosystem.",
757
  style={
758
  "fontSize": 14,
759
  "marginTop": 18,
 
765
  id="loading-models",
766
  type="circle",
767
  color="#AC482A",
768
+ children=html.Div(id="top_models-table"),
769
  ),
770
  html.Button(
771
  id="top_models-toggle",
 
793
  "backgroundColor": "#ffffff",
794
  "minHeight": "100vh",
795
  },
796
+ ),
797
  ],
798
  )
799
 
800
 
801
  # Callbacks for interactivity
802
  # -- helper utilities to consolidate duplicated callback logic --
803
+ def _get_filtered_top_n_from_duckdb(
804
+ slider_value, group_col, top_n, view="all_downloads"
805
+ ):
806
  """
807
  Query DuckDB directly to get top N entries with metadata
808
  This minimizes data transfer by doing aggregation in DuckDB
 
959
  top_n, new_label = 10, "▼ Show Top 50"
960
 
961
  # Get filtered and aggregated data directly from DuckDB
962
+ df_filtered = _get_filtered_top_n_from_duckdb(
963
+ slider_value, group_col, top_n, view=view
964
+ )
965
 
966
  # Process the already-filtered data - pass derived_author_toggle
967
+ df, download_df = get_top_n_leaderboard(
968
+ df_filtered, group_col, top_n, derived_author_toggle=derived_author_toggle
969
+ )
970
  return render_table_content(
971
  df, download_df, chip_color=chip_color, filename=filename
972
  ), new_label
 
975
  # -- end helpers --
976
 
977
 
978
+ # --- Callback to store model attribution type ---
979
  @app.callback(
980
+ Output("model-attribution-type", "data"),
981
+ Input("model-attribution-segmented", "value"),
982
  )
983
+ def update_model_attribution_type(selected_value):
984
+ return selected_value
985
 
986
 
987
  # Callbacks for interactivity (modularized)
 
991
  Input("top_countries-toggle", "n_clicks"),
992
  Input("time-slider", "value"),
993
  Input("selected-view", "data"),
994
+ Input("model-attribution-type", "data"),
995
  State("top_countries-toggle", "children"),
996
  )
997
+ def update_top_countries(
998
+ n_clicks, slider_value, selected_view, attribution_type, current_label
999
+ ):
1000
  return _leaderboard_callback_logic(
1001
  n_clicks,
1002
  slider_value,
 
1006
  default_label="▼ Show Top 50",
1007
  chip_color="#F0F9FF",
1008
  view=selected_view,
1009
+ derived_author_toggle=(attribution_type == "uploader"),
1010
  )
1011
 
1012
 
 
1016
  Input("top_developers-toggle", "n_clicks"),
1017
  Input("time-slider", "value"),
1018
  Input("selected-view", "data"),
1019
+ Input("model-attribution-type", "data"),
1020
  State("top_developers-toggle", "children"),
1021
  )
1022
+ def update_top_developers(
1023
+ n_clicks, slider_value, selected_view, attribution_type, current_label
1024
+ ):
1025
+ # Use derived_author if attribution_type == "uploader", else author
1026
+ group_col = "derived_author" if attribution_type == "uploader" else "author"
1027
  return _leaderboard_callback_logic(
1028
  n_clicks,
1029
  slider_value,
 
1033
  default_label="▼ Show Top 50",
1034
  chip_color="#F0F9FF",
1035
  view=selected_view,
1036
+ derived_author_toggle=(attribution_type == "uploader"),
1037
  )
1038
 
1039
 
 
1043
  Input("top_models-toggle", "n_clicks"),
1044
  Input("time-slider", "value"),
1045
  Input("selected-view", "data"),
1046
+ Input("model-attribution-type", "data"),
1047
  State("top_models-toggle", "children"),
1048
  )
1049
+ def update_top_models(
1050
+ n_clicks, slider_value, selected_view, attribution_type, current_label
1051
+ ):
1052
  return _leaderboard_callback_logic(
1053
  n_clicks,
1054
  slider_value,
 
1058
  default_label="▼ Show More",
1059
  chip_color="#F0F9FF",
1060
  view=selected_view,
1061
+ derived_author_toggle=(attribution_type == "uploader"),
1062
  )
1063
 
1064
 
 
1069
  def update_thumb_labels(values):
1070
  return get_thumb_labels(values)
1071
 
1072
+
1073
  # --- Add callback to update selected view based on segmented control ---
1074
  @app.callback(
1075
  Output("selected-view", "data"),
 
1080
  return "one_year_rolling"
1081
  return "all_downloads"
1082
 
1083
+
1084
  # Run the app
1085
  if __name__ == "__main__":
1086
  app.run(debug=True)
graphs/leaderboard.py CHANGED
@@ -286,13 +286,13 @@ def render_table_content(
286
  def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggle=True):
287
  """
288
  Get top N entries for a leaderboard
289
-
290
  Args:
291
  filtered_df: Pandas DataFrame (already filtered by time from DuckDB query)
292
  group_col: Column to group by
293
  top_n: Number of top entries to return
294
- derived_author_toggle: Whether to use derived_author or author column
295
-
296
  Returns:
297
  tuple: (display_df, download_df)
298
  """
 
286
  def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggle=True):
287
  """
288
  Get top N entries for a leaderboard
289
+
290
  Args:
291
  filtered_df: Pandas DataFrame (already filtered by time from DuckDB query)
292
  group_col: Column to group by
293
  top_n: Number of top entries to return
294
+ derived_author_toggle: If True, attribute to model uploader (derived_author); if False, attribute to original model creator (author)
295
+
296
  Returns:
297
  tuple: (display_df, download_df)
298
  """