from dash import Dash, html, dcc, Input, Output, State import pandas as pd import dash_mantine_components as dmc from datasets import load_dataset import time from graphs.leaderboard import ( create_leaderboard, get_top_n_leaderboard, render_table_content, ) # Initialize the app app = Dash() server = app.server # Load parquet file from Hugging Face HF_DATASET_ID = "emsesc/open_model_evolution_data" hf_parquet_url = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/" data_files = { "filtered_df": hf_parquet_url + "filtered_df.parquet", # "weekly_df": hf_parquet_url + "weekly_df.parquet", } filtered_df = pd.DataFrame() print(f"Attempting to load dataset from Hugging Face Hub: {HF_DATASET_ID}") try: overall_start_time = time.time() dataset = load_dataset("parquet", data_files=data_files) df = dataset["filtered_df"].to_pandas() filtered_df = df.copy() msg = f"Successfully loaded dataset in {time.time() - overall_start_time:.2f}s." print(msg) except Exception as e: err_msg = f"Failed to load dataset. Error: {e}" print(err_msg) # List columns for reference print(filtered_df.columns.tolist()) # Create a dcc slider for time range selection by year (readable marks) start_dt = filtered_df["time"].min() end_dt = filtered_df["time"].max() start_ts = int(start_dt.timestamp()) end_ts = int(end_dt.timestamp()) marks = [] # Add start label (e.g. "Jan 2020") marks.append({"value": start_ts, "label": start_dt.strftime("%b %Y")}) # Add yearly marks between start and end (e.g. "2021", "2022") for yr in range(start_dt.year, end_dt.year + 1): yr_ts = int(pd.Timestamp(year=yr, month=1, day=1).timestamp()) start_yr = int(pd.Timestamp(year=start_dt.year, month=1, day=1).timestamp()) if yr_ts != start_yr and yr_ts != end_ts: marks.append({"value": yr_ts, "label": str(yr)}) # Add end label (e.g. "Dec 2024") marks.append({"value": end_ts, "label": end_dt.strftime("%b %Y")}) # Create a dcc slider for time range selection by year time_slider = dmc.RangeSlider( id="time-slider", min=start_ts, max=end_ts, value=[ start_ts, end_ts, ], step=24 * 60 * 60, color="#AC482A", size="md", radius="xl", marks=marks, style={"width": "70%", "margin": "0 auto"}, labelAlwaysOn=False, # thumbChildren=[ # dmc.Text(id="time-slider-thumb-from-label", size="xs", children="Hello"), # dmc.Text(id="time-slider-thumb-to-label", size="xs"), # ] ) # App layout app.layout = dmc.MantineProvider( theme={ "colorScheme": "light", "primaryColor": "blue", "fontFamily": "Inter, sans-serif", }, children=[ html.Div( [ # Header html.Div( [ html.Div( [ html.Div( [ html.Div( children="Visualizing the Open Model Ecosystem", style={ "fontSize": 22, "fontWeight": "700", "lineHeight": "1.1", }, ), html.Div( children="An interactive dashboard to explore trends in open models on Hugging Face", style={ "fontSize": 13, "marginTop": 6, "opacity": 0.9, }, ), ], style={ "display": "flex", "flexDirection": "column", "justifyContent": "center", }, ), html.Div( [ html.A( children=[ html.Img( src="assets/images/dpi-logo.svg", style={ "height": "28px", "verticalAlign": "middle", "paddingRight": "8px", }, ), "Data Provenance Initiative", ], href="https://www.dataprovenance.org/", target="_blank", style={ "display": "inline-block", "padding": "6px 14px", "fontSize": 13, "color": "#082030", "backgroundColor": "#ffffff", "borderRadius": "18px", "fontWeight": "700", "textDecoration": "none", "marginRight": "12px", }, ), html.A( children=[ html.Img( src="assets/images/Hf-logo-with-title.svg", style={ "height": "30px", "verticalAlign": "middle", }, ) ], href="https://huggingface.co/", target="_blank", style={ "display": "inline-flex", "padding": "6px 14px", "alignItems": "center", "backgroundColor": "#ffffff", "borderRadius": "18px", "textDecoration": "none", }, ), ], style={"display": "flex", "alignItems": "center"}, ), ], style={ "marginLeft": "50px", "marginRight": "50px", "display": "flex", "justifyContent": "space-between", "alignItems": "center", "padding": "18px 24px", "gap": "24px", }, ), ], style={ "backgroundColor": "#082030", "color": "white", "width": "100%", }, ), # Intro / description below header (kept but styled to match layout) # Title html.Div( children="Model Leaderboard", # Change this to your desired title style={ "fontSize": 40, "fontWeight": "700", "textAlign": "center", "marginTop": 20, "marginBottom": 20, }, ), # Button html.Div( children=[ html.Button( "Read the paper", # Change this to your desired button text id="my-button", style={ "padding": "10px 20px", "fontSize": 16, "margin": "0 auto", "display": "block", "backgroundColor": "#AC482A", "color": "white", "border": "none", "borderRadius": "5px", "cursor": "pointer", }, ), ], style={"textAlign": "center", "marginBottom": 20}, ), html.Div( children="Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s...", style={ "fontSize": 14, "marginTop": 18, "marginBottom": 12, "marginLeft": 100, "marginRight": 100, "textAlign": "center", }, ), # Main content (filters + tabs) html.Div( children=[ html.Div( [ html.Div( "Select Window", style={ "fontWeight": "700", "marginBottom": 8, "fontSize": 14, }, ), dmc.SegmentedControl( id="segmented", value="all-downloads", color="#AC482A", transitionDuration=200, data=[ { "value": "all-downloads", "label": "All Downloads", }, { "value": "filtered-downloads", "label": "Filtered Downloads", }, ], mb=10, ), html.Span( id="global-toggle-status", style={ "marginLeft": "8px", "display": "inline-block", "marginTop": 6, }, ), ], style={"flex": 1, "minWidth": "220px"}, ), html.Div( [ html.Div( "Select Time Range", style={ "fontWeight": "700", "marginBottom": 8, "fontSize": 14, }, ), time_slider, ], style={"flex": 2, "minWidth": "320px"}, ), ], style={ "display": "flex", "gap": "24px", "padding": "32px", "alignItems": "flex-start", # 'margin': '24px auto 64px', # centered horizontally "marginLeft": "100px", "marginRight": "100px", "backgroundColor": "#FFFBF9", "borderRadius": "18px", }, ), html.Div( [ dcc.Tabs( id="leaderboard-tabs", value="Countries", children=[ # wrap Tabs here dcc.Tab( label="Countries", value="Countries", style={ "backgroundColor": "transparent", "border": "none", "padding": "10px 18px", "color": "#6B7280", "fontWeight": "500", }, selected_style={ "backgroundColor": "transparent", "border": "none", "padding": "10px 18px", "fontWeight": "700", "borderBottom": "3px solid #082030", # underline only }, children=[ create_leaderboard(filtered_df, "countries") ], ), dcc.Tab( label="Developers", value="Developers", style={ "backgroundColor": "transparent", "border": "none", "padding": "10px 18px", "color": "#6B7280", "fontWeight": "500", }, selected_style={ "backgroundColor": "transparent", "border": "none", "padding": "10px 18px", "fontWeight": "700", "borderBottom": "3px solid #082030", }, children=[ create_leaderboard(filtered_df, "developers") ], ), dcc.Tab( label="Models", value="Models", style={ "backgroundColor": "transparent", "border": "none", "padding": "10px 18px", "color": "#6B7280", "fontWeight": "500", }, selected_style={ "backgroundColor": "transparent", "border": "none", "padding": "10px 18px", "fontWeight": "700", "borderBottom": "3px solid #082030", }, children=[ create_leaderboard(filtered_df, "models") ], ), ], ), ], style={ "borderRadius": "18px", "padding": "32px", "marginTop": "12px", "marginBottom": "64px", "marginLeft": "50px", "marginRight": "50px", # 'maxWidth': '1250px', }, ), ], style={ "fontFamily": "Inter", "backgroundColor": "#ffffff", "minHeight": "100vh", }, ) ], ) # Callbacks for interactivity # -- helper utilities to consolidate duplicated callback logic -- def _apply_time_slider(slider_value): if slider_value and len(slider_value) == 2: start = pd.to_datetime(slider_value[0], unit="s") end = pd.to_datetime(slider_value[1], unit="s") return filtered_df[(filtered_df["time"] >= start) & (filtered_df["time"] <= end)] return filtered_df def _leaderboard_callback_logic(n_clicks, slider_value, current_label, group_col, filename, default_label="▼ Show Top 50", chip_color="#F0F9FF"): # Normalize label on first load if current_label is None: current_label = default_label # Determine top_n and next label if n_clicks == 0: top_n = 10 new_label = current_label elif "Show Top 50" in current_label: top_n, new_label = 50, "▼ Show Top 100" elif "Show Top 100" in current_label: top_n, new_label = 100, "▲ Show Less" else: top_n, new_label = 10, "▼ Show Top 50" # Apply time filter and build table df_time = _apply_time_slider(slider_value) df, download_df = get_top_n_leaderboard(df_time, group_col, top_n) return render_table_content(df, download_df, chip_color=chip_color, filename=filename), new_label # -- end helpers -- # ...existing code... # Callbacks for interactivity (modularized) @app.callback( Output("top_countries-table", "children"), Output("top_countries-toggle", "children"), Input("top_countries-toggle", "n_clicks"), Input("time-slider", "value"), State("top_countries-toggle", "children"), ) def update_top_countries(n_clicks, slider_value, current_label): return _leaderboard_callback_logic( n_clicks, slider_value, current_label, group_col="org_country_single", filename="top_countries", default_label="▼ Show Top 50", chip_color="#F0F9FF", ) @app.callback( Output("top_developers-table", "children"), Output("top_developers-toggle", "children"), Input("top_developers-toggle", "n_clicks"), Input("time-slider", "value"), State("top_developers-toggle", "children"), ) def update_top_developers(n_clicks, slider_value, current_label): return _leaderboard_callback_logic( n_clicks, slider_value, current_label, group_col="author", filename="top_developers", default_label="▼ Show More", chip_color="#F0F9FF", ) @app.callback( Output("top_models-table", "children"), Output("top_models-toggle", "children"), Input("top_models-toggle", "n_clicks"), Input("time-slider", "value"), State("top_models-toggle", "children"), ) def update_top_models(n_clicks, slider_value, current_label): return _leaderboard_callback_logic( n_clicks, slider_value, current_label, group_col="model", filename="top_models", default_label="▼ Show More", chip_color="#F0F9FF", ) @app.callback( Output("time-slider", "label"), Input("time-slider", "value") ) def update_range_labels(values): start_label = pd.to_datetime(values[0], unit="s").strftime("%b %Y") end_label = pd.to_datetime(values[1], unit="s").strftime("%b %Y") return [start_label, end_label] # Run the app if __name__ == "__main__": app.run(debug=True)