emsesc commited on
Commit
ffc8ed8
·
1 Parent(s): 66b3482

reduce to leaderboard

Browse files
app.py CHANGED
@@ -1,10 +1,7 @@
1
  from dash import Dash, html, dcc, Input, Output, State
2
  import pandas as pd
3
  import dash_mantine_components as dmc
4
- from graphs.model_market_share import create_stacked_area_chart, create_world_map, create_range_slider
5
  from graphs.leaderboard import create_leaderboard, get_top_n_leaderboard, render_table, render_table_content
6
- from graphs.model_characteristics import create_concentration_chart, create_line_plot
7
- from graphs.tree import generate_model_treemap
8
 
9
  # Initialize the app
10
  app = Dash()
@@ -12,142 +9,6 @@ server = app.server
12
 
13
  # Load pre-processed data frames
14
  filtered_df = pd.read_pickle("data_frames/filtered_df.pkl")
15
- model_topk_df = pd.read_pickle("data_frames/model_topk_df.pkl")
16
- model_gini_df = pd.read_pickle("data_frames/model_gini_df.pkl")
17
- model_hhi_df = pd.read_pickle("data_frames/model_hhi_df.pkl")
18
- language_concentration_df = pd.read_pickle("data_frames/language_concentration_df.pkl")
19
- license_concentration_df = pd.read_pickle("data_frames/download_license_cumsum_df.pkl")
20
- download_method_cumsum_df = pd.read_pickle("data_frames/download_method_cumsum_df.pkl")
21
- download_arch_cumsum_df = pd.read_pickle("data_frames/download_arch_cumsum_df.pkl")
22
- nat_topk_df = pd.read_pickle("data_frames/nat_topk_df.pkl")
23
- country_concentration_df = pd.read_pickle("data_frames/country_concentration_df.pkl")
24
- author_concentration_df = pd.read_pickle("data_frames/author_concentration_df.pkl")
25
- model_concentration_df = pd.read_pickle("data_frames/model_concentration_df.pkl")
26
- derived_country_concentration_df = pd.read_pickle("data_frames/derived_country_concentration_df_rolling.pkl")
27
- nat_gini_df = pd.read_pickle("data_frames/nat_gini_df.pkl")
28
- nat_hhi_df = pd.read_pickle("data_frames/nat_hhi_df.pkl")
29
-
30
- # Configurations
31
- TEMP_MODEL_EVENTS = {
32
- # "Yolo World Mirror": "2024-03-01",
33
- "Llama 3": "2024-04-17",
34
- "Stable Cascade": "2024-02-02",
35
- "Stable Diffusion 3": "2024-05-30",
36
- # "embed/upscale": "2023-03-24",
37
- "DeepSeek-R1": "2025-01-20",
38
- "Gemma-3 12B QAT": "2025-04-15", # gemma-3-12b-it-qat-4bit
39
- # "Qwen": "2025-03-05",
40
- # "Flux RedFlux": "2025-04-12",
41
- # "DeepSeek-V3": "2025-03-24",
42
- # "bloom": "2022-05-19",
43
- "DALLE2-PyTorch": "2022-06-25",
44
- "Stable Diffusion": "2022-08-10",
45
- "CLIP ViT": "2021-01-05",
46
- "YOLOv8": "2023-04-26",
47
- "Sentence Transformer MiniLM v2": "2021-08-30",
48
- }
49
-
50
- PALETTE_0 = [
51
- "#335C67",
52
- "#FFF3B0",
53
- "#E09F3E",
54
- "#9E2A2B",
55
- "#540B0E"
56
- ]
57
-
58
- LANG_SEGMENT_ORDER = [
59
- 'Monolingual: EN', 'Monolingual: HR', 'Monolingual: M/LR',
60
- 'Multilingual: HR', 'Multilingual', 'Unknown',
61
- ]
62
-
63
- LICENSE_SEGMENT_ORDER = [
64
- "Open Use", "Open Use (Acceptable Use Policy)", "Open Use (Non-Commercial Only)", "Attribution",
65
- "Acceptable Use Policy", "Non-Commercial Only", "Undocumented", "Undocumented (Acceptable Use Policy)",
66
- ]
67
-
68
- METHOD_PLOT_CHOICES = {
69
- "cumulative": "none", # none, mean, sum
70
- "y_col": "percent", # percent count
71
- "y_log": False, # True, False
72
- "period": "W",
73
- }
74
-
75
- ARCHITECTURE_PLOT_CHOICES = {
76
- "cumulative": "none", # none, mean, sum
77
- "y_col": "percent", # percent count
78
- "y_log": False, # True, False
79
- "period": "W",
80
- }
81
-
82
- metric_order = [
83
- 'USA', 'China', 'Germany', 'France', 'International / Online',
84
- 'Asia', 'Middle East', 'Rest of Europe', 'South America', 'UK',
85
- 'Africa', 'Other', "User",
86
- ]
87
-
88
- palette = [
89
- "#3870f2",
90
- "#e74c3c", # Green (Top 10-100) # Red (Top 1%)
91
- "#f39c12", # Orange (Top 1-10%)
92
- "#3498db", # Blue (Top 100-1000)
93
- "#7C2A50",
94
- "#9467bd",
95
- "#8c564b",
96
- "#e377c2",
97
- "#7f7f7f",
98
- "#27ae60",
99
- "#5ce7f6",
100
- "#f0e442",
101
- "#c2cbcc", # Gray (Rest)
102
- "#56b4e9",
103
- ]
104
-
105
- # Model Market Share Tab
106
- country_market_share_area = create_stacked_area_chart(
107
- derived_country_concentration_df, nat_gini_df, nat_hhi_df, TEMP_MODEL_EVENTS, palette, metric_order
108
- )
109
-
110
- # Define metric order
111
- metric_order = [
112
- "Top 1",
113
- "Top 1 - 10",
114
- "Top 10 - 100",
115
- "Top 100 - 1000",
116
- "Top 1000 - 10000",
117
- "Rest",
118
- ]
119
-
120
- model_market_share_area = create_stacked_area_chart(
121
- model_topk_df, model_gini_df, model_hhi_df, TEMP_MODEL_EVENTS, PALETTE_0, metric_order
122
- )
123
-
124
- world_map = create_world_map(
125
- filtered_df
126
- )
127
-
128
- slider = create_range_slider(
129
- model_topk_df
130
- )
131
-
132
- time_slider = dmc.RangeSlider(
133
- id="time-slider",
134
- min=model_topk_df['time'].min().timestamp(),
135
- max=model_topk_df['time'].max().timestamp(),
136
- value=[
137
- model_topk_df['time'].min().timestamp(),
138
- model_topk_df['time'].max().timestamp()
139
- ],
140
- step=24 * 60 * 60,
141
- color="blue",
142
- size="md",
143
- radius="xl",
144
- marks=[
145
- {"value": model_topk_df['time'].min().timestamp(), "label": model_topk_df['time'].min().strftime("%b %Y")},
146
- {"value": model_topk_df['time'].max().timestamp(), "label": model_topk_df['time'].max().strftime("%b %Y")}
147
- ],
148
- style={"width": "70%", "margin": "0 auto"},
149
- labelAlwaysOn=False,
150
- )
151
 
152
  # Create a dcc slider for time range selection by year
153
  created_slider = dcc.Slider(
@@ -160,27 +21,6 @@ created_slider = dcc.Slider(
160
  updatemode='mouseup',
161
  )
162
 
163
- # Model Characteristics Tab
164
- language_concentration_area = create_concentration_chart(
165
- language_concentration_df, 'time', 'metric', 'value', LANG_SEGMENT_ORDER, PALETTE_0
166
- )
167
-
168
- license_concentration_area = create_concentration_chart(
169
- license_concentration_df, 'period', 'status', 'percent', LICENSE_SEGMENT_ORDER, PALETTE_0
170
- )
171
-
172
- download_method_cumsum_line = create_line_plot(
173
- download_method_cumsum_df, METHOD_PLOT_CHOICES, PALETTE_0
174
- )
175
-
176
- download_arch_cumsum_line = create_line_plot(
177
- download_arch_cumsum_df, ARCHITECTURE_PLOT_CHOICES, PALETTE_0
178
- )
179
-
180
- tree_map = generate_model_treemap(
181
- filtered_df
182
- )
183
-
184
  # App layout
185
  app.layout = dmc.MantineProvider(
186
  theme={"colorScheme": "light",
@@ -223,61 +63,49 @@ app.layout = dmc.MantineProvider(
223
  ],
224
  style={'textAlign': 'center'}
225
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  html.Div(
227
  [
228
- dcc.Tabs([
229
- dcc.Tab(label='Model Market Share', children=[
230
- html.Div([
231
- html.Div(children='Select time range to update all graphs below:', style={'fontSize': 16, 'marginBottom': 6, 'marginTop': 20}),
232
- time_slider,
233
- html.Div(
234
- id='output-container-range-slider',
235
- style={
236
- 'textAlign': 'center',
237
- 'fontSize': 20,
238
- 'marginBottom': 15,
239
- 'marginTop': 30,
240
- 'backgroundColor': 'white',
241
- 'borderRadius': '12px',
242
- 'boxShadow': '0 2px 12px rgba(0,0,0,0.10)',
243
- 'padding': '18px',
244
- 'display': 'inline-block',
245
- }
246
- ),
247
- ], style={'marginBottom': 12, 'justifyContent': 'center', 'textAlign': 'center'}),
248
- html.Div([
249
- # dcc.Graph(id='stacked-area-chart'),
250
- dcc.Graph(figure=country_market_share_area),
251
- ], style={'marginBottom': 12}),
252
- html.Div([
253
- html.Div(
254
- dcc.Graph(id='world-map-with-slider'),
255
- style={'display': 'flex', 'justifyContent': 'center', 'marginBottom': 0}
256
- ),
257
- created_slider,
258
- ], style={'marginBottom': 12})
259
- ]),
260
- dcc.Tab(label='Leaderboard', children=[
261
  create_leaderboard(
262
- filtered_df
263
  )
264
  ]),
265
- dcc.Tab(label='Model Tree Map', children=[
266
- dcc.Graph(figure=tree_map)
 
 
267
  ]),
268
- dcc.Tab(label='Model Characteristics',children=[
269
- html.Div([
270
- html.H3("Language Concentration", style={'textAlign': 'center', 'marginBottom': 10}),
271
- dcc.Graph(figure=language_concentration_area),
272
- html.H3("License Distribution", style={'textAlign': 'center', 'marginBottom': 10}),
273
- dcc.Graph(figure=license_concentration_area),
274
- html.H3("Method Trends", style={'textAlign': 'center', 'marginBottom': 10}),
275
- dcc.Graph(figure=download_method_cumsum_line),
276
- html.H3("Architecture Trends", style={'textAlign': 'center', 'marginBottom': 10}),
277
- dcc.Graph(figure=download_arch_cumsum_line),
278
- ], style={'marginBottom': 12}),
279
  ]),
280
- ])
281
  ],
282
  style={
283
  'backgroundColor': 'white',
@@ -296,69 +124,33 @@ app.layout = dmc.MantineProvider(
296
 
297
  # Model Market Share Tab
298
  # On slider change, update output text
299
- @app.callback(
300
- Output('output-container-range-slider', 'children'),
301
- [Input('time-slider', 'value')]
302
- )
303
- def update_output(value):
304
- if value and len(value) == 2:
305
- start_time = pd.to_datetime(value[0], unit='s').strftime("%b %d, %Y")
306
- end_time = pd.to_datetime(value[1], unit='s').strftime("%b %d, %Y")
307
- return f"Selected time range: {start_time} to {end_time}"
308
- return "Select a time range"
309
-
310
- # On slider change, update world map
311
- @app.callback(
312
- Output('world-map-with-slider', 'figure'),
313
- Input('created-slider', 'value')
314
- )
315
- def update_world_map(value):
316
- # Filter by created year
317
- if value is None:
318
- return world_map
319
-
320
- created_after = f"{int(value)}-01-01"
321
- updated_fig = create_world_map(
322
- filtered_df,
323
- created_after=created_after
324
- )
325
- return updated_fig
326
-
327
-
328
- # On slider change, update leaderboard
329
  # @app.callback(
330
- # Output('leaderboard', 'figure'),
331
- # [Input('time-slider', 'relayoutData')]
332
  # )
333
- # def update_leaderboard(relayout_data):
334
- # if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data:
335
- # start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d')
336
- # end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d')
337
- # updated_fig = create_leaderboard(
338
- # country_concentration_df, author_concentration_df, model_concentration_df, start_time=start_time, end_time=end_time
339
- # )
340
- # updated_fig.update_layout(font_family="Inter")
341
- # return updated_fig
342
- # else:
343
- # return leaderboard
344
 
345
- # On slider change, update stacked area chart
346
  # @app.callback(
347
- # Output('stacked-area-chart', 'figure'),
348
- # Input('time-slider', 'value')
349
  # )
350
- # def update_stacked_area(value):
351
- # if value and len(value) == 2:
352
- # start_time = pd.to_datetime(value[0], unit='s').strftime('%Y-%m-%d')
353
- # end_time = pd.to_datetime(value[1], unit='s').strftime('%Y-%m-%d')
354
- # updated_fig = create_stacked_area_chart(
355
- # model_topk_df, model_gini_df, model_hhi_df,
356
- # TEMP_MODEL_EVENTS, PALETTE_0,
357
- # start_time=start_time, end_time=end_time
358
- # )
359
- # updated_fig.update_layout(font_family="Inter")
360
- # return updated_fig
361
- # return model_market_share_area
362
 
363
  @app.callback(
364
  Output("top_countries-table", "children"),
 
1
  from dash import Dash, html, dcc, Input, Output, State
2
  import pandas as pd
3
  import dash_mantine_components as dmc
 
4
  from graphs.leaderboard import create_leaderboard, get_top_n_leaderboard, render_table, render_table_content
 
 
5
 
6
  # Initialize the app
7
  app = Dash()
 
9
 
10
  # Load pre-processed data frames
11
  filtered_df = pd.read_pickle("data_frames/filtered_df.pkl")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # Create a dcc slider for time range selection by year
14
  created_slider = dcc.Slider(
 
21
  updatemode='mouseup',
22
  )
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  # App layout
25
  app.layout = dmc.MantineProvider(
26
  theme={"colorScheme": "light",
 
63
  ],
64
  style={'textAlign': 'center'}
65
  ),
66
+ html.Div(
67
+ children=[
68
+ dmc.SegmentedControl(
69
+ id="segmented",
70
+ value="all-time",
71
+ data=[
72
+ {"value": "one-year", "label": "One Year Window"},
73
+ {"value": "all-time", "label": "All Time"},
74
+ ],
75
+ mb=10,
76
+ ),
77
+ html.Span(id="global-toggle-status", style={"marginLeft": "8px"}),
78
+ created_slider,
79
+ html.Div(children='This is an explanation of the filters on this block.', style={'fontSize': 12, 'marginTop': 20, 'marginLeft': 10, 'marginRight': 10}),
80
+ ],
81
+ style={
82
+ 'backgroundColor': 'white',
83
+ 'borderRadius': '18px',
84
+ 'boxShadow': '0 4px 24px rgba(0,0,0,0.10)',
85
+ 'padding': '32px',
86
+ 'margin': '32px auto',
87
+ 'maxWidth': '1000px',
88
+ }
89
+ ),
90
  html.Div(
91
  [
92
+ dcc.Tabs(children=[ # wrap Tabs here
93
+ dcc.Tab(label='Countries', children=[
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  create_leaderboard(
95
+ filtered_df, "countries"
96
  )
97
  ]),
98
+ dcc.Tab(label='Developers', children=[
99
+ create_leaderboard(
100
+ filtered_df, "developers"
101
+ )
102
  ]),
103
+ dcc.Tab(label='Models', children=[
104
+ create_leaderboard(
105
+ filtered_df, "models"
106
+ )
 
 
 
 
 
 
 
107
  ]),
108
+ ]),
109
  ],
110
  style={
111
  'backgroundColor': 'white',
 
124
 
125
  # Model Market Share Tab
126
  # On slider change, update output text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  # @app.callback(
128
+ # Output('output-container-range-slider', 'children'),
129
+ # [Input('time-slider', 'value')]
130
  # )
131
+ # def update_output(value):
132
+ # if value and len(value) == 2:
133
+ # start_time = pd.to_datetime(value[0], unit='s').strftime("%b %d, %Y")
134
+ # end_time = pd.to_datetime(value[1], unit='s').strftime("%b %d, %Y")
135
+ # return f"Selected time range: {start_time} to {end_time}"
136
+ # return "Select a time range"
 
 
 
 
 
137
 
138
+ # On slider change, update world map
139
  # @app.callback(
140
+ # Output('world-map-with-slider', 'figure'),
141
+ # Input('created-slider', 'value')
142
  # )
143
+ # def update_world_map(value):
144
+ # # Filter by created year
145
+ # if value is None:
146
+ # return world_map
147
+
148
+ # created_after = f"{int(value)}-01-01"
149
+ # updated_fig = create_world_map(
150
+ # filtered_df,
151
+ # created_after=created_after
152
+ # )
153
+ # return updated_fig
 
154
 
155
  @app.callback(
156
  Output("top_countries-table", "children"),
data_frames/agg_devsize_downloads.pkl DELETED
Binary file (24.1 kB)
 
data_frames/agg_natsize_downloads.pkl DELETED
Binary file (2.09 kB)
 
data_frames/derived_country_concentration_df_rolling.pkl DELETED
Binary file (83 kB)
 
data_frames/dev_gini_df.pkl DELETED
Binary file (5.59 kB)
 
data_frames/dev_hhi_df.pkl DELETED
Binary file (5.59 kB)
 
data_frames/dev_topk_df.pkl DELETED
Binary file (28.3 kB)
 
data_frames/download_arch_cumsum_df.pkl DELETED
Binary file (78.6 kB)
 
data_frames/download_license_cumsum_df.pkl DELETED
Binary file (64.5 kB)
 
data_frames/download_method_cumsum_df.pkl DELETED
Binary file (78.6 kB)
 
data_frames/download_openness_cumsum_df.pkl DELETED
Binary file (29.3 kB)
 
data_frames/language_concentration_df.pkl DELETED
Binary file (28.4 kB)
 
data_frames/model_gini_df.pkl DELETED
Binary file (5.59 kB)
 
data_frames/model_hhi_df.pkl DELETED
Binary file (5.59 kB)
 
data_frames/model_topk_df.pkl DELETED
Binary file (28.3 kB)
 
data_frames/nat_gini_df.pkl DELETED
Binary file (5.59 kB)
 
data_frames/nat_hhi_df.pkl DELETED
Binary file (5.59 kB)
 
data_frames/nat_topk_df.pkl DELETED
Binary file (28.3 kB)
 
graphs/__pycache__/model_characteristics.cpython-39.pyc DELETED
Binary file (2.6 kB)
 
graphs/__pycache__/model_market_share.cpython-39.pyc DELETED
Binary file (5.94 kB)
 
graphs/leaderboard.py CHANGED
@@ -1,5 +1,7 @@
1
  import pandas as pd
2
  from dash import html, dcc
 
 
3
  import base64
4
 
5
  button_style = {
@@ -120,12 +122,16 @@ def df_to_download_link(df, filename):
120
  b64 = base64.b64encode(csv_string.encode()).decode()
121
  return html.Div(
122
  html.A(
123
- "Download CSV",
124
- id=f"download-{filename}",
125
- download=f"{filename}.csv",
126
- href=f"data:text/csv;base64,{b64}",
127
- target="_blank",
128
- style=button_style
 
 
 
 
129
  ),
130
  style={"textAlign": "right"}
131
  )
@@ -223,7 +229,7 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
223
  download_top["Total Value"] = download_top["Total Value"].astype(int)
224
  download_top["% of total"] = download_top["% of total"].round(2)
225
 
226
- top["Name"].replace("User", "user", inplace=True)
227
 
228
  # All relevant metadata columns
229
  meta_cols = meta_cols_map.get(group_col, [])
@@ -301,14 +307,14 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
301
  return download_info
302
 
303
  # Apply metadata builder to top dataframe
304
- top["Metadata"] = top["Name"].map(build_metadata)
305
  download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]]
306
  download_info_df = pd.DataFrame(download_info_list)
307
  download_top = pd.concat([download_top, download_info_df], axis=1)
308
 
309
  return top[["Name", "Metadata", "% of total"]], download_top
310
 
311
- def create_leaderboard(filtered_df, start_time=None, top_n=10):
312
  # Filter by time
313
  if start_time is not None:
314
  filtered_df = filtered_df[(filtered_df["created"] >= start_time) & (filtered_df["time"] >= start_time)]
@@ -326,11 +332,9 @@ def create_leaderboard(filtered_df, start_time=None, top_n=10):
326
  top_developers, download_top_developers = get_top_n_leaderboard(filtered_df, "author", top_n)
327
  top_models, download_top_models = get_top_n_leaderboard(filtered_df, "model", top_n)
328
 
329
- # Layout with 3 stacked tables
330
- layout = html.Div([
331
- render_table(top_countries, download_top_countries, "Top Countries", chip_color="#FCE8E6", bar_color="#FF6F61", filename="top_countries"),
332
- render_table(top_developers, download_top_developers, "Top Developers", chip_color="#E6F4EA", bar_color="#4CAF50", filename="top_developers"),
333
- render_table(top_models, download_top_models, "Top Models", chip_color="#E8F0FE", bar_color="#2196F3", filename="top_models"),
334
- ])
335
-
336
- return layout
 
1
  import pandas as pd
2
  from dash import html, dcc
3
+ from dash_iconify import DashIconify
4
+ import dash_mantine_components as dmc
5
  import base64
6
 
7
  button_style = {
 
122
  b64 = base64.b64encode(csv_string.encode()).decode()
123
  return html.Div(
124
  html.A(
125
+ children=dmc.ActionIcon(
126
+ DashIconify(icon="mdi:download", width=24),
127
+ size="lg"
128
+ ),
129
+ id=f"download-{filename}",
130
+ download=f"{filename}.csv",
131
+ href=f"data:text/csv;base64,{b64}",
132
+ target="_blank",
133
+ title="Download CSV",
134
+ style={"padding": "6px 12px", "display": "inline-flex", "alignItems": "center", "justifyContent": "center"}
135
  ),
136
  style={"textAlign": "right"}
137
  )
 
229
  download_top["Total Value"] = download_top["Total Value"].astype(int)
230
  download_top["% of total"] = download_top["% of total"].round(2)
231
 
232
+ top["Name"].replace("User", "user")
233
 
234
  # All relevant metadata columns
235
  meta_cols = meta_cols_map.get(group_col, [])
 
307
  return download_info
308
 
309
  # Apply metadata builder to top dataframe
310
+ top["Metadata"] = top["Name"].astype(object).apply(build_metadata)
311
  download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]]
312
  download_info_df = pd.DataFrame(download_info_list)
313
  download_top = pd.concat([download_top, download_info_df], axis=1)
314
 
315
  return top[["Name", "Metadata", "% of total"]], download_top
316
 
317
+ def create_leaderboard(filtered_df, board_type, start_time=None, top_n=10):
318
  # Filter by time
319
  if start_time is not None:
320
  filtered_df = filtered_df[(filtered_df["created"] >= start_time) & (filtered_df["time"] >= start_time)]
 
332
  top_developers, download_top_developers = get_top_n_leaderboard(filtered_df, "author", top_n)
333
  top_models, download_top_models = get_top_n_leaderboard(filtered_df, "model", top_n)
334
 
335
+ if board_type == "countries":
336
+ return render_table(top_countries, download_top_countries, "Top Countries", chip_color="#FCE8E6", bar_color="#FF6F61", filename="top_countries")
337
+ elif board_type == "developers":
338
+ return render_table(top_developers, download_top_developers, "Top Developers", chip_color="#E6F4EA", bar_color="#4CAF50", filename="top_developers")
339
+ else:
340
+ return render_table(top_models, download_top_models, "Top Models", chip_color="#E8F0FE", bar_color="#2196F3", filename="top_models")
 
 
graphs/model_characteristics.py DELETED
@@ -1,145 +0,0 @@
1
- import plotly.graph_objects as go
2
- import plotly.express as px
3
-
4
- def create_concentration_chart(
5
- df,
6
- period_col,
7
- metric_col,
8
- value_col,
9
- order,
10
- palette
11
- ):
12
- fig = go.Figure()
13
-
14
- # Create stacked area traces
15
- for i, metric in enumerate(order):
16
- metric_data = df[df[metric_col] == metric]
17
-
18
- # Sort by time and get values
19
- metric_data = metric_data.sort_values(period_col)
20
- x_vals = metric_data[period_col]
21
- y_vals = metric_data[value_col]
22
-
23
- # Add area trace
24
- fig.add_trace(
25
- go.Scatter(
26
- x=x_vals,
27
- y=y_vals,
28
- name=metric,
29
- mode='lines',
30
- line=dict(width=0),
31
- fill='tonexty' if i > 0 else 'tozeroy',
32
- fillcolor=palette[i % len(palette)],
33
- stackgroup='one',
34
- hovertemplate='<b>%{fullData.name}</b><br>' +
35
- 'Time: %{x}<br>' +
36
- 'Value: %{y}<extra></extra>'
37
- )
38
- )
39
-
40
- fig.update_layout(
41
- autosize=True,
42
- font_size=14,
43
- showlegend=True,
44
- margin=dict(l=60, r=60, t=40, b=80), # Increased bottom margin
45
- plot_bgcolor="white",
46
- hovermode="x unified",
47
- legend=dict(
48
- orientation="h", # Horizontal legend
49
- yanchor="top", # Anchor the top of the legend box
50
- y=-0.25, # Place it below the plot
51
- xanchor="center",
52
- x=0.5
53
- )
54
- )
55
-
56
- fig.update_xaxes(
57
- title_text="",
58
- showgrid=True,
59
- gridcolor='lightgray',
60
- gridwidth=1
61
- )
62
-
63
- fig.update_yaxes(
64
- title_text="",
65
- showgrid=True,
66
- gridcolor='lightgray',
67
- gridwidth=1
68
- )
69
-
70
- return fig
71
-
72
- def create_line_plot(
73
- df,
74
- plot_choices,
75
- color_palette=None
76
- ):
77
- fig = go.Figure()
78
-
79
- groups = df['status'].unique()
80
-
81
- if color_palette is None:
82
- color_palette = px.colors.qualitative.Set1
83
-
84
- for i, group in enumerate(groups):
85
- group_data = df[df['status'] == group]
86
- group_data = group_data.sort_values('period')
87
-
88
- x_vals = group_data['period']
89
- y_vals = group_data[plot_choices["y_col"]]
90
-
91
- if plot_choices.get("y_format") == "percent":
92
- y_vals = y_vals * 100
93
-
94
- fig.add_trace(
95
- go.Scatter(
96
- x=x_vals,
97
- y=y_vals,
98
- name=group,
99
- mode='lines',
100
- line=dict(
101
- color=color_palette[i % len(color_palette)],
102
- width=3
103
- ),
104
- opacity=0.85,
105
- hovertemplate='<b>%{fullData.name}</b><br>' +
106
- 'Period: %{x}<br>' +
107
- 'Value: %{y:.2f}%<extra></extra>' if plot_choices.get("y_format") == "percent"
108
- else '<b>%{fullData.name}</b><br>Period: %{x}<br>Value: %{y}<extra></extra>'
109
- )
110
- )
111
-
112
- fig.update_layout(
113
- autosize=True,
114
- font_size=14,
115
- showlegend=True,
116
- margin=dict(l=60, r=60, t=40, b=80), # Increased bottom margin
117
- plot_bgcolor="white",
118
- hovermode="x unified",
119
- legend=dict(
120
- orientation="h", # Horizontal legend
121
- yanchor="top", # Anchor the top of the legend box
122
- y=-0.25, # Place it below the plot
123
- xanchor="center",
124
- x=0.5
125
- )
126
- )
127
-
128
- fig.update_xaxes(
129
- title_text="Period",
130
- showgrid=False,
131
- zeroline=False
132
- )
133
-
134
- y_title = plot_choices["y_col"]
135
- if plot_choices.get("y_format") == "percent":
136
- y_title += " (%)"
137
-
138
- fig.update_yaxes(
139
- title_text=y_title,
140
- showgrid=False,
141
- zeroline=False,
142
- type='log' if plot_choices.get("y_log") else 'linear'
143
- )
144
-
145
- return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
graphs/model_market_share.py DELETED
@@ -1,347 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- import plotly.graph_objects as go
4
- from plotly.subplots import make_subplots
5
-
6
- def create_stacked_area_chart(
7
- topk_df, gini_df, hhi_df, events, palette, metric_order, start_time=None, end_time=None
8
- ):
9
-
10
- # Create subplot with secondary y-axis
11
- fig = make_subplots(specs=[[{"secondary_y": True}]])
12
-
13
- # Create stacked area traces
14
- for i, metric in enumerate(metric_order):
15
- metric_data = topk_df[topk_df["metric"] == metric]
16
-
17
- # Sort by time and get values
18
- metric_data = metric_data.sort_values("time")
19
- if start_time:
20
- metric_data = metric_data[metric_data["time"] >= start_time]
21
- if end_time:
22
- metric_data = metric_data[metric_data["time"] <= end_time]
23
-
24
- x_vals = metric_data["time"]
25
- y_vals = metric_data["value"]
26
-
27
- # Add area trace
28
- fig.add_trace(
29
- go.Scatter(
30
- x=x_vals,
31
- y=y_vals,
32
- name=metric,
33
- mode="lines",
34
- line=dict(width=0, color=palette[i % len(palette)]),
35
- fill="tonexty" if i > 0 else "tozeroy",
36
- fillcolor=palette[i % len(palette)],
37
- stackgroup="one",
38
- hovertemplate="<b>%{fullData.name}</b><br>"
39
- + "Time: %{x}<br>"
40
- + "Value: %{y}<extra></extra>",
41
- ),
42
- secondary_y=False,
43
- )
44
-
45
- # Add overlay lines
46
- # Gini Coefficient
47
- # gini_data = gini_df.sort_values("time")
48
- # if start_time:
49
- # gini_data = gini_data[gini_data["time"] >= start_time]
50
- # if end_time:
51
- # gini_data = gini_data[gini_data["time"] <= end_time]
52
- # fig.add_trace(
53
- # go.Scatter(
54
- # x=gini_data["time"],
55
- # y=gini_data["value"],
56
- # name="Gini Coefficient",
57
- # mode="lines",
58
- # line=dict(color="#6b46c1", width=3),
59
- # yaxis="y2",
60
- # hovertemplate="<b>Gini Coefficient</b><br>"
61
- # + "Time: %{x}<br>"
62
- # + "Value: %{y:.3f}<extra></extra>",
63
- # ),
64
- # secondary_y=True,
65
- # )
66
-
67
- # # HHI (×10)
68
- # hhi_data = hhi_df.sort_values("time")
69
- # if start_time:
70
- # hhi_data = hhi_data[hhi_data["time"] >= start_time]
71
- # if end_time:
72
- # hhi_data = hhi_data[hhi_data["time"] <= end_time]
73
- # fig.add_trace(
74
- # go.Scatter(
75
- # x=hhi_data["time"],
76
- # y=hhi_data["value"] * 10,
77
- # name="HHI (×10)",
78
- # mode="lines",
79
- # line=dict(color="#ec4899", width=3),
80
- # yaxis="y2",
81
- # hovertemplate="<b>HHI (×10)</b><br>"
82
- # + "Time: %{x}<br>"
83
- # + "Value: %{y:.3f}<extra></extra>",
84
- # ),
85
- # secondary_y=True,
86
- # )
87
-
88
- # Add vertical lines for events
89
- for event_name, event_date in events.items():
90
- fig.add_shape(
91
- type="line",
92
- x0=event_date,
93
- x1=event_date,
94
- y0=0,
95
- y1=1,
96
- yref="paper",
97
- line=dict(color="#333333", width=2, dash="dash"),
98
- )
99
-
100
- fig.add_annotation(
101
- x=event_date,
102
- y=0.5,
103
- yref="paper",
104
- text=event_name,
105
- showarrow=False,
106
- xshift=-10,
107
- yshift=10,
108
- font=dict(size=12, color="black"),
109
- textangle=270,
110
- align="left",
111
- )
112
-
113
- fig.update_layout(
114
- autosize=True,
115
- font_size=14,
116
- showlegend=True,
117
- margin=dict(l=60, r=60, t=40, b=80), # Increased bottom margin
118
- plot_bgcolor="white",
119
- hovermode="x unified",
120
- legend=dict(
121
- orientation="h", # Horizontal legend
122
- yanchor="top", # Anchor the top of the legend box
123
- y=-0.25, # Place it below the plot
124
- xanchor="center",
125
- x=0.5
126
- )
127
- )
128
-
129
-
130
- # Update x-axis to be governed by start_time/end_time
131
- xaxis_range = None
132
- if start_time is not None and end_time is not None:
133
- xaxis_range = [start_time, end_time]
134
- elif start_time is not None:
135
- xaxis_range = [start_time, None]
136
- elif end_time is not None:
137
- xaxis_range = [None, end_time]
138
-
139
- fig.update_xaxes(
140
- title_text="",
141
- showgrid=True,
142
- gridcolor="lightgray",
143
- gridwidth=1,
144
- range=xaxis_range,
145
- )
146
-
147
- # Update primary y-axis (left)
148
- fig.update_yaxes(
149
- title_text="National Concentration (%)",
150
- showgrid=True,
151
- gridcolor="lightgray",
152
- gridwidth=1,
153
- secondary_y=False,
154
- )
155
-
156
- # Update secondary y-axis (right)
157
- fig.update_yaxes(
158
- title_text="Concentration Indices", showgrid=False, secondary_y=True
159
- )
160
-
161
- return fig
162
-
163
-
164
- def create_world_map(
165
- df, top_n_labels=20, created_after=None
166
- ):
167
- # Create a filtered_df with only countries
168
- df = df[df['org_country_single'] != 'HF']
169
- df = df[df['org_country_single'] != 'Online']
170
- df = df[df['org_country_single'] != 'International']
171
- df = df[df['org_country_single'] != 'user']
172
-
173
- # Filter out models created after 2024-01-01 and downloads after 2024-01-01
174
- if created_after:
175
- df = df[df['created'] > created_after]
176
- df = df[df['time'] > created_after]
177
-
178
- # Country code mapping
179
- country_code_map = {
180
- "Germany": "DEU",
181
- "United States of America": "USA",
182
- "China": "CHN",
183
- "France": "FRA",
184
- "India": "IND",
185
- "Israel": "ISR",
186
- "South Korea": "KOR",
187
- "United Kingdom": "GBR",
188
- "Switzerland": "CHE",
189
- "United Arab Emirates": "ARE",
190
- "Vietnam": "VNM",
191
- "Singapore": "SGP",
192
- "Chile": "CHL",
193
- "Hong Kong": "HKG",
194
- "Japan": "JPN",
195
- "Canada": "CAN",
196
- "Spain": "ESP",
197
- "Finland": "FIN",
198
- "Indonesia": "IDN",
199
- "Russia": "RUS",
200
- "Iran": "IRN",
201
- "Belarus": "BLR",
202
- "Thailand": "THA",
203
- "UAE": "ARE",
204
- "Argentina": "ARG",
205
- "Iceland": "ISL",
206
- "Poland": "POL",
207
- "Sweden": "SWE",
208
- "Taiwan": "TWN",
209
- "Lebanon": "LBN",
210
- "Algeria": "DZA",
211
- "Bulgaria": "BGR",
212
- "Norway": "NOR",
213
- "Netherlands": "NLD",
214
- "Hungary": "HUN",
215
- "Estonia": "EST",
216
- "Qatar": "QAT",
217
- "Brazil": "BRA",
218
- "Morocco": "MAR",
219
- "Slovenia": "SVN",
220
- "Ghana": "GHA",
221
- "Uganda": "UGA",
222
- "Turkey": "TUR",
223
- }
224
-
225
- df["country_code"] = df["org_country_single"].map(country_code_map)
226
- df = df.dropna(subset=["country_code"])
227
-
228
- # Fix country plot
229
-
230
- fig = make_subplots(
231
- rows=1,
232
- cols=1,
233
- specs=[[{"type": "geo"}]],
234
- )
235
-
236
- downloads_by_country = (
237
- df.groupby(['org_country_single', 'country_code'])['downloads']
238
- .sum()
239
- .reset_index()
240
- )
241
-
242
- # Prepare top countries for annotation
243
- total_downloads = float(downloads_by_country['downloads'].sum())
244
- downloads_by_country['pct'] = (downloads_by_country['downloads'] / total_downloads * 100.0)
245
-
246
- # Create hover text
247
- hover_text = []
248
- for _, row in downloads_by_country.iterrows():
249
- hover_text.append(
250
- f"<b>{row['org_country_single']}</b><br>"
251
- f"Avg Downloads: {row['pct']:.1f}% of total<br>"
252
- )
253
-
254
- linear_ticks = [0.01, 0.1, 10, 50, 100] # percent values
255
- log_ticks = np.log10(linear_ticks) # what you're actually plotting
256
-
257
- # Add choropleth to plot
258
- fig.add_trace(
259
- go.Choropleth(
260
- locations=downloads_by_country["country_code"],
261
- z=np.log10(downloads_by_country["pct"]),
262
- text=hover_text,
263
- hovertemplate="%{text}<extra></extra>",
264
- colorscale=[
265
- "#001219",
266
- "#0a9396",
267
- "#94d2bd",
268
- "#e9d8a6",
269
- "#ee9b00",
270
- "#ca6702",
271
- "#bb3e03",
272
- "#9b2226",
273
- ],
274
- colorbar=dict(
275
- title="Avg % of Total Downloads",
276
- tickvals=log_ticks, # positions in log space
277
- ticktext=[f"{t}%" for t in linear_ticks], # labels shown
278
- tickfont=dict(size=12),
279
- len=0.6,
280
- x=1.02,
281
- y=0.7,
282
- ),
283
- marker_line_color="#ffffff",
284
- marker_line_width=1.5,
285
- geo="geo",
286
- ),
287
- row=1,
288
- col=1,
289
- )
290
-
291
- # Update layout
292
- fig.update_layout(
293
- title=dict(
294
- text="Model Downloads by Country",
295
- x=0.5,
296
- font=dict(size=20),
297
- pad=dict(t=10),
298
- ),
299
- width=1200,
300
- height=700, # Increased height for a larger map
301
- plot_bgcolor="#ffffff",
302
- paper_bgcolor="#ffffff",
303
- )
304
-
305
- # Update geo layout
306
- fig.update_geos(
307
- showframe=False,
308
- showland=True,
309
- landcolor="#d0cfcf",
310
- coastlinecolor="#b8b8b8",
311
- projection_type="natural earth",
312
- bgcolor="#ffffff",
313
- )
314
-
315
- return fig
316
-
317
- def create_range_slider(df):
318
- if df.empty or "time" not in df.columns:
319
- return go.Figure()
320
-
321
- times = sorted(df["time"].unique())
322
- fig = go.Figure()
323
-
324
- # Invisible trace just to attach slider to the x-axis
325
- fig.add_trace(
326
- go.Scatter(
327
- x=times,
328
- y=[0] * len(times),
329
- mode="lines",
330
- line=dict(color="rgba(0,0,0,0)"), # Invisible line
331
- hoverinfo="skip",
332
- showlegend=False
333
- )
334
- )
335
-
336
- # Enable range slider
337
- fig.update_layout(
338
- xaxis=dict(
339
- rangeslider=dict(visible=False),
340
- type="date"
341
- ),
342
- yaxis=dict(visible=False),
343
- margin=dict(t=20, b=20, l=20, r=20),
344
- height=100
345
- )
346
-
347
- return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
graphs/tree.py DELETED
@@ -1,142 +0,0 @@
1
- import plotly.express as px
2
- import pandas as pd
3
-
4
- PALETTE_0 = [
5
- "#335C67",
6
- "#FFF3B0",
7
- "#E09F3E",
8
- "#9E2A2B",
9
- "#540B0E"
10
- ]
11
-
12
- def generate_model_treemap(df, parent_col='merged_derived_from', child_col='model', value_col='downloads'):
13
- # filtered_df[parent_col] = filtered_df[parent_col].apply(lambda x: str(x[0]) if isinstance(x, list) and x else None)
14
-
15
- df = pd.read_pickle('data_frames/filtered_tree_df.pkl')
16
- # Filter out nan, No parent, and Unsure
17
- df = df[~df[parent_col].isin([None, "['Unsure']", 'nan'])]
18
-
19
- # Find all models that act as a parent
20
- parent_models = set(df[parent_col].dropna())
21
-
22
- # Assign empty parent only if row has no parent and is not itself a parent
23
- df[parent_col] = df[parent_col].where(
24
- df[parent_col].notna() | df[child_col].isin(parent_models),
25
- other=""
26
- )
27
-
28
- fig = px.treemap(
29
- df,
30
- path=[parent_col, child_col],
31
- values=value_col,
32
- hover_data=['author', 'estimated_parameters', 'created'],
33
- color=value_col,
34
- color_continuous_scale='Viridis'
35
- )
36
-
37
- fig.update_layout(
38
- height=1200, # make the plot tall
39
- margin=dict(t=50, l=25, r=25, b=25) # add some breathing room
40
- )
41
-
42
- return fig
43
-
44
- # def generate_model_treemap(df, parent_col='merged_derived_from', child_col='model', value_col='downloads'):
45
- # # iterate over the rows and stringify the lists in 'merged_derived_from'
46
-
47
- # df.to_pickle('filtered_tree_df.pkl')
48
-
49
- # fig = px.icicle(
50
- # df,
51
- # path=[parent_col, child_col],
52
- # values=value_col,
53
- # hover_data=['author', 'estimated_parameters', 'created'],
54
- # color=value_col,
55
- # color_continuous_scale='Viridis'
56
- # )
57
-
58
- # fig.update_layout(
59
- # height=1400,
60
- # margin=dict(t=50, l=25, r=25, b=25)
61
- # )
62
- # return fig
63
-
64
-
65
- # import plotly.graph_objects as go
66
- # import networkx as nx
67
- # import pandas as pd
68
-
69
- # def generate_model_treemap(df, parent_col='merged_derived_from', child_col='model',
70
- # value_col='downloads', top_n=1000):
71
-
72
- # # Fill missing parents
73
- # df[parent_col] = str(df[parent_col][0])
74
-
75
- # # Keep only top_n by downloads
76
- # df = df.sort_values(value_col, ascending=False).head(top_n)
77
-
78
- # # Build directed graph
79
- # G = nx.DiGraph()
80
- # for _, row in df.iterrows():
81
- # parent = row[parent_col]
82
- # child = row[child_col]
83
- # G.add_edge(parent, child, weight=row.get(value_col, 1))
84
-
85
- # # Layout positions (smaller k → tighter graph)
86
- # pos = nx.spring_layout(G, k=0.3, seed=42)
87
-
88
- # # Edges
89
- # edge_x, edge_y = [], []
90
- # for parent, child in G.edges():
91
- # x0, y0 = pos[parent]
92
- # x1, y1 = pos[child]
93
- # edge_x += [x0, x1, None]
94
- # edge_y += [y0, y1, None]
95
-
96
- # edge_trace = go.Scatter(
97
- # x=edge_x, y=edge_y,
98
- # line=dict(width=0.8, color="#888"),
99
- # hoverinfo="none",
100
- # mode="lines"
101
- # )
102
-
103
- # # Nodes
104
- # node_x, node_y, sizes, texts = [], [], [], []
105
- # for node in G.nodes():
106
- # x, y = pos[node]
107
- # node_x.append(x)
108
- # node_y.append(y)
109
- # downloads = df.loc[df[child_col] == node, value_col].sum()
110
- # sizes.append(max(10, downloads**0.3))
111
- # texts.append(f"{node}<br>Downloads: {downloads}")
112
-
113
- # node_trace = go.Scatter(
114
- # x=node_x, y=node_y,
115
- # mode="markers+text",
116
- # text=[n for n in G.nodes()],
117
- # textposition="top center",
118
- # hovertext=texts,
119
- # hoverinfo="text",
120
- # marker=dict(
121
- # showscale=True,
122
- # colorscale="Viridis",
123
- # color=sizes,
124
- # size=sizes,
125
- # colorbar=dict(
126
- # thickness=15,
127
- # title=f"{value_col} (scaled)",
128
- # xanchor="left",
129
- # ),
130
- # line_width=2
131
- # )
132
- # )
133
-
134
- # return go.Figure(data=[edge_trace, node_trace],
135
- # layout=go.Layout(
136
- # title=f"Model Tree (Top {top_n} by {value_col})",
137
- # showlegend=False,
138
- # hovermode="closest",
139
- # margin=dict(b=20, l=5, r=5, t=40),
140
- # xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
141
- # yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
142
- # ))