File size: 9,904 Bytes
7e57a09
fd38574
6054b77
f136ea6
927a4de
 
 
f9010df
927a4de
 
 
 
 
 
 
7e57a09
f9010df
fd38574
 
1b5583f
de24f31
fd38574
927a4de
 
35ffa10
 
 
da7a067
 
 
1b5583f
da7a067
 
 
 
65d3762
 
f136ea6
 
35ffa10
f136ea6
 
66b3482
4ad4863
 
 
927a4de
 
 
 
 
 
 
 
6054b77
 
927a4de
 
 
 
 
 
1b5583f
 
927a4de
 
 
6054b77
4ad4863
 
 
 
 
 
7e57a09
 
35ffa10
fe024d4
1b5583f
 
927a4de
fd38574
4ad4863
fd38574
927a4de
 
 
35ffa10
927a4de
3cceb68
65d3762
 
3cceb68
65d3762
 
3cceb68
927a4de
1b5583f
 
 
 
 
 
 
 
 
 
927a4de
 
 
 
 
 
 
1b5583f
 
 
 
 
927a4de
 
 
 
 
 
1b5583f
 
 
927a4de
1b5583f
 
 
 
 
 
 
927a4de
1b5583f
 
 
 
 
 
 
927a4de
4ad4863
 
 
 
1b5583f
 
 
35ffa10
65d3762
4ad4863
 
65d3762
1b5583f
65d3762
1b5583f
 
 
927a4de
4ad4863
1b5583f
4ad4863
 
 
 
 
35ffa10
da7a067
1b5583f
4ad4863
b233a23
927a4de
b233a23
 
 
 
1b5583f
 
 
35ffa10
65d3762
b233a23
 
65d3762
1b5583f
65d3762
da7a067
 
1b5583f
 
 
927a4de
4ad4863
1b5583f
4ad4863
3cceb68
4ad4863
35ffa10
927a4de
35ffa10
da7a067
1b5583f
4ad4863
b233a23
927a4de
b233a23
 
 
 
1b5583f
 
 
35ffa10
65d3762
b233a23
 
65d3762
1b5583f
65d3762
1b5583f
 
 
927a4de
4ad4863
1b5583f
4ad4863
 
 
 
 
35ffa10
da7a067
1b5583f
4ad4863
b233a23
927a4de
4ad4863
35ffa10
 
4ad4863
35ffa10
 
b233a23
927a4de
1b5583f
 
 
 
 
 
 
927a4de
35ffa10
 
 
 
 
 
 
 
b233a23
7e57a09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd38574
4ad4863
35ffa10
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
from dash import Dash, html, dcc, Input, Output, State, no_update, ctx
import pandas as pd
import dash_mantine_components as dmc
import time

from config import DATASET_ID
from data_utils import create_fresh_duckdb_with_views, get_last_updated
from graphs.leaderboard import (
    leaderboard_callback_logic,
)
from helpers import build_slider_marks, get_thumb_label_single, get_thumb_labels
from layout_components import (
    build_main_layout,
    build_range_slider,
    build_single_slider,
    DATA_URL,
)

# Initialize the app
app = Dash(suppress_callback_exceptions=True) # suppress callback exceptions for multi-page layout
server = app.server

# Load dataset and determine time range
print(f"Attempting to connect to dataset from Hugging Face Hub: {DATASET_ID}")
try:
    overall_start_time = time.time()

    # Create fresh connection, views, and read start/end time
    conn = create_fresh_duckdb_with_views()
    try:
        time_range = conn.execute("SELECT MIN(time) AS min_time, MAX(time) AS max_time FROM all_downloads;").fetchdf()
        start_dt = pd.to_datetime(time_range["min_time"].iloc[0])
        end_dt = pd.to_datetime(time_range["max_time"].iloc[0])
    finally:
        conn.close()

    msg = f"Successfully connected to datasets in {time.time() - overall_start_time:.2f}s."
    print(msg)
except Exception as e:
    err_msg = f"Failed to load dataset(s). Error: {e}"
    print(err_msg)
    raise

# Create a dcc slider for time range selection by year (readable marks)
start_ts = int(start_dt.timestamp())
end_ts = int(end_dt.timestamp())
marks = build_slider_marks(start_dt, end_dt)

time_slider = build_range_slider(
    start_ts,
    end_ts,
    [start_ts, end_ts],
    marks,
    thumb_children=get_thumb_labels([start_ts, end_ts]),
)

time_slider_alltime = build_single_slider(
    start_ts,
    end_ts,
    end_ts,
    marks,
    thumb_children=get_thumb_label_single(end_ts),
)

last_updated_label = get_last_updated()

# Define the app layout
app.layout = dmc.MantineProvider(
    theme={
        "colorScheme": "light",
        "primaryColor": "blue",
        "fontFamily": "Inter, sans-serif",
    },
    children=[
        html.A(id="paper-redirect", style={"display": "none"}),
        html.Div(id="data-link-open-noop", style={"display": "none"}),
        dcc.Store(id="selected-view", data="all_downloads"),
        dcc.Store(id="model-attribution-type", data="uploader"),
        dcc.Store(id="time-slider-value", data=[start_ts, end_ts]),
        dcc.Store(id="time-slider-alltime-value", data=end_ts),
        build_main_layout(last_updated_label, time_slider),
    ],
)

# ----------
# Callbacks
# ----------

# Update model attribution type based on user selection
@app.callback(
    Output("model-attribution-type", "data"),
    Input("model-attribution-segmented", "value"),
)
def update_model_attribution_type(selected_value):
    return selected_value

# Toggle between range slider and all-time slider
@app.callback(
    Output("slider-container", "children"),
    Output("slider-description", "children"),
    Input("time-range-toggle", "checked"),
    State("time-slider-value", "data"),
    State("time-slider-alltime-value", "data"),
)
def toggle_time_slider(is_alltime, range_value, alltime_value):
    if is_alltime:
        description = "Select a specific date to view all-time cumulative downloads up to that point."
        slider_value = alltime_value if alltime_value is not None else end_ts
        slider = build_single_slider(
            start_ts,
            end_ts,
            slider_value,
            marks,
            thumb_children=get_thumb_label_single(slider_value),
        )
        return [slider], description
    else:
        description = "Adjust the slider to filter leaderboard results by the difference in downloads within the time range."
        slider_val = range_value if range_value is not None else [start_ts, end_ts]
        slider = build_range_slider(
            start_ts,
            end_ts,
            slider_val,
            marks,
            thumb_children=get_thumb_labels(slider_val),
        )
        return [slider], description

# Sync slider values to dcc.Store components
@app.callback(
    Output("time-slider-value", "data"),
    Input("time-slider", "value"),
)
def sync_time_slider_value(value):
    return value

# Sync all-time slider value to dcc.Store component
@app.callback(
    Output("time-slider-alltime-value", "data"),
    Input("time-slider-alltime", "value"),
)
def sync_time_slider_alltime_value(value):
    return value

# Update Top Countries leaderboard
@app.callback(
    Output("top_countries-table", "children"),
    Output("top_countries-toggle", "children"),
    Input("top_countries-toggle", "n_clicks"),
    Input("time-slider-value", "data"),
    Input("time-slider-alltime-value", "data"),
    Input("time-range-toggle", "checked"),
    Input("selected-view", "data"),
    Input("model-attribution-type", "data"),
    State("top_countries-toggle", "children"),
)
def update_top_countries(
    n_clicks, slider_value, slider_alltime_value, is_alltime, selected_view, attribution_type, current_label
):
    # Use the appropriate slider value based on the toggle
    active_slider_value = slider_alltime_value if is_alltime else slider_value
    
    return leaderboard_callback_logic(
        n_clicks,
        active_slider_value,
        current_label,
        group_col="org_country_single",
        filename="top_countries",
        default_label="▼ Show Top 50",
        chip_color="#F0F9FF",
        view=selected_view,
        derived_author_toggle=(attribution_type == "original_creator"),
        is_alltime=is_alltime,
    )

# Update Top Developers leaderboard
@app.callback(
    Output("top_developers-table", "children"),
    Output("top_developers-toggle", "children"),
    Input("top_developers-toggle", "n_clicks"),
    Input("time-slider-value", "data"),
    Input("time-slider-alltime-value", "data"),
    Input("time-range-toggle", "checked"),
    Input("selected-view", "data"),
    Input("model-attribution-type", "data"),
    State("top_developers-toggle", "children"),
)
def update_top_developers(
    n_clicks, slider_value, slider_alltime_value, is_alltime, selected_view, attribution_type, current_label
):
    # Use derived_author if attribution_type == "original_creator", else author
    group_col = "derived_author" if attribution_type == "original_creator" else "author"
    # Use the appropriate slider value based on the toggle
    active_slider_value = slider_alltime_value if is_alltime else slider_value
    
    return leaderboard_callback_logic(
        n_clicks,
        active_slider_value,
        current_label,
        group_col=group_col,
        filename="top_developers",
        default_label="▼ Show Top 50",
        chip_color="#F0F9FF",
        view=selected_view,
        derived_author_toggle=(attribution_type == "original_creator"),
        is_alltime=is_alltime,
    )

# Update Top Models leaderboard
@app.callback(
    Output("top_models-table", "children"),
    Output("top_models-toggle", "children"),
    Input("top_models-toggle", "n_clicks"),
    Input("time-slider-value", "data"),
    Input("time-slider-alltime-value", "data"),
    Input("time-range-toggle", "checked"),
    Input("selected-view", "data"),
    Input("model-attribution-type", "data"),
    State("top_models-toggle", "children"),
)
def update_top_models(
    n_clicks, slider_value, slider_alltime_value, is_alltime, selected_view, attribution_type, current_label
):
    # Use the appropriate slider value based on the toggle
    active_slider_value = slider_alltime_value if is_alltime else slider_value
    
    return leaderboard_callback_logic(
        n_clicks,
        active_slider_value,
        current_label,
        group_col="model",
        filename="top_models",
        default_label="▼ Show More",
        chip_color="#F0F9FF",
        view=selected_view,
        derived_author_toggle=(attribution_type == "original_creator"),
        is_alltime=is_alltime,
    )

# Update thumb labels for range slider
@app.callback(
    Output("time-slider", "thumbChildren"),
    Input("time-slider", "value"),
)
def update_thumb_labels(values):
    return get_thumb_labels(values)

# Update thumb label for all-time slider
@app.callback(
    Output("time-slider-alltime", "thumbChildren"),
    Input("time-slider-alltime", "value"),
)
def update_thumb_label_alltime(value):
    return get_thumb_label_single(value)

# Update selected view based on segmented control
@app.callback(
    Output("selected-view", "data"),
    Input("segmented", "value"),
)
def update_selected_view(seg_value):
    if seg_value == "filtered-downloads":
        return "one_year_rolling"
    return "all_downloads"


@app.callback(
    Output("paper-disclaimer-modal", "opened"),
    Output("paper-redirect", "href"),
    Input("data-link-trigger-header", "n_clicks"),
    Input("paper-disclaimer-close", "n_clicks"),
    Input("paper-disclaimer-confirm", "n_clicks"),
    State("paper-disclaimer-modal", "opened"),
    prevent_initial_call=True,
)
def handle_paper_disclaimer(
    data_clicks,
    close_clicks,
    confirm_clicks,
    modal_opened,
):
    triggered_id = ctx.triggered_id

    if triggered_id == "data-link-trigger-header":
        return True, no_update

    if triggered_id == "paper-disclaimer-close":
        return False, no_update

    if triggered_id == "paper-disclaimer-confirm":
        return False, DATA_URL

    return modal_opened, no_update


app.clientside_callback(
    """
    function(url) {
        if (url) {
            window.open(url, "_blank", "noopener,noreferrer");
        }
        return "";
    }
    """,
    Output("data-link-open-noop", "children"),
    Input("paper-redirect", "href"),
    prevent_initial_call=True,
)

# Run the app
if __name__ == "__main__":
    app.run(debug=True)