Spaces:

economies-open-ai
/

open-model-evolution

Running

App Files Files Community

emsesc commited on Aug 29

Commit

b466419

1 Parent(s): 77e9502

refactor code

Browse files

Files changed (5) hide show

app.py +85 -57
graphs/__pycache__/model_characteristics.cpython-39.pyc +0 -0
graphs/__pycache__/model_market_share.cpython-39.pyc +0 -0
graphs/model_characteristics.py +23 -39
graphs/model_market_share.py +99 -132

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
-# Import packages
 from dash import Dash, html, dcc, Input, Output
 import pandas as pd
-import plotly.express as px
-from graphs.model_market_share import create_plotly_stacked_area_chart, create_plotly_world_map, create_plotly_range_slider, create_leaderboard
-from graphs.model_characteristics import create_plotly_language_concentration_chart, create_plotly_publication_curves_with_legend
 # Initialize the app
 app = Dash()
@@ -22,7 +20,7 @@ country_concentration_df = pd.read_pickle("data_frames/country_concentration_df.
 author_concentration_df = pd.read_pickle("data_frames/author_concentration_df.pkl")
 model_concentration_df = pd.read_pickle("data_frames/model_concentration_df.pkl")
 TEMP_MODEL_EVENTS = {
     # "Yolo World Mirror": "2024-03-01",
     "Llama 3": "2024-04-17",
@@ -50,25 +48,15 @@ PALETTE_0 = [
     "#540B0E"
 ]
-fig = create_plotly_stacked_area_chart(
-    model_topk_df, model_gini_df, model_hhi_df, TEMP_MODEL_EVENTS, PALETTE_0
-)
 LANG_SEGMENT_ORDER = [
     'Monolingual: EN', 'Monolingual: HR', 'Monolingual: M/LR',
     'Multilingual: HR', 'Multilingual', 'Unknown',
 ]
-fig2 = create_plotly_language_concentration_chart(
-    language_concentration_df, 'time', 'metric', 'value', LANG_SEGMENT_ORDER, PALETTE_0
-)
 LICENSE_SEGMENT_ORDER = [
     "Open Use", "Open Use (Acceptable Use Policy)", "Open Use (Non-Commercial Only)", "Attribution",
     "Acceptable Use Policy", "Non-Commercial Only", "Undocumented", "Undocumented (Acceptable Use Policy)",
 ]
-fig3 = create_plotly_language_concentration_chart(
-    license_concentration_df, 'period', 'status', 'percent', LICENSE_SEGMENT_ORDER, PALETTE_0
-)
 METHOD_PLOT_CHOICES = {
     "cumulative": "none", # none, mean, sum
@@ -76,9 +64,6 @@ METHOD_PLOT_CHOICES = {
     "y_log": False, # True, False
     "period": "W",
 }
-fig4 = create_plotly_publication_curves_with_legend(
-    download_method_cumsum_df, METHOD_PLOT_CHOICES, PALETTE_0
-)
 ARCHITECTURE_PLOT_CHOICES = {
     "cumulative": "none", # none, mean, sum
@@ -86,35 +71,41 @@ ARCHITECTURE_PLOT_CHOICES = {
     "y_log": False, # True, False
     "period": "W",
 }
-fig5 = create_plotly_publication_curves_with_legend(
-    download_arch_cumsum_df, ARCHITECTURE_PLOT_CHOICES, PALETTE_0
 )
-fig6 = create_plotly_world_map(
     country_concentration_df, "time", "metric", "value"
 )
-fig7 = create_leaderboard(
     country_concentration_df, author_concentration_df, model_concentration_df
 )
-slider = create_plotly_range_slider(
     model_topk_df
 )
-slider2 = create_plotly_range_slider(
-    country_concentration_df
 )
-# Make global font family
-fig.update_layout(font_family="Inter")
-fig2.update_layout(font_family="Inter")
-fig3.update_layout(font_family="Inter")
-fig4.update_layout(font_family="Inter")
-fig5.update_layout(font_family="Inter")
-fig6.update_layout(font_family="Inter")
-slider.update_layout(font_family="Inter")
-slider2.update_layout(font_family="Inter")
 # App layout
 app.layout = html.Div(
@@ -123,7 +114,34 @@ app.layout = html.Div(
             [
                 html.Div(children='Visualizing the Open Model Ecosystem', style={'fontSize': 28, 'fontWeight': 'bold', 'marginBottom': 6}),
                 html.Div(children='An interactive dashboard to explore trends in open models on Hugging Face', style={'fontSize': 16, 'marginBottom': 12}),
                 html.Hr(style={'marginTop': 8, 'marginBottom': 8}),
             ],
             style={'textAlign': 'center'}
         ),
@@ -133,7 +151,7 @@ app.layout = html.Div(
                     dcc.Tab(label='Model Market Share', children=[
                         html.Div([
                             html.Div(children='Select time range to update all graphs below:', style={'fontSize': 16, 'marginBottom': 6, 'marginTop': 10}),
-                            dcc.Graph(figure=slider2, id='time-slider', style={'height': '100px'}),
                             html.Div(
                                 id='output-container-range-slider',
                                 style={
@@ -166,6 +184,8 @@ app.layout = html.Div(
                             dcc.Dropdown(['Language Concentration', 'Architecture', 'License', 'Method'], 'Language Concentration', id='dropdown'),
                         ], style={'marginTop': 6}),
                     ]),
                 ])
             ],
             style={
@@ -181,6 +201,10 @@ app.layout = html.Div(
     style={'fontFamily': 'Inter', 'backgroundColor': '#f7f7fa', 'minHeight': '100vh'}
 )
 @app.callback(
     Output('output-container-range-slider', 'children'),
     [Input('time-slider', 'relayoutData')]
@@ -192,22 +216,8 @@ def update_output(relayout_data):
         return f'Selected time range: {start_time} to {end_time}'
     else:
         return 'Selected time range: All data'
-# On dropdown change, update graph
-@app.callback(
-    Output('language-concentration-chart', 'figure'),
-    [Input('dropdown', 'value')]
-)
-def update_graph(selected_metric):
-    if selected_metric == 'Language Concentration':
-        return fig2
-    elif selected_metric == 'License':
-        return fig3
-    elif selected_metric == 'Method':
-        return fig4
-    elif selected_metric == 'Architecture':
-        return fig5
 @app.callback(
     Output('world-map-with-slider', 'figure'),
     [Input('time-slider', 'relayoutData')]
@@ -216,14 +226,15 @@ def update_map(relayout_data):
     if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data:
         start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d')
         end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d')
-        updated_fig = create_plotly_world_map(
             country_concentration_df, "time", "metric", "value", start_time=start_time, end_time=end_time
         )
         updated_fig.update_layout(font_family="Inter")
         return updated_fig
     else:
-        return fig6
 @app.callback(
     Output('leaderboard', 'figure'),
     [Input('time-slider', 'relayoutData')]
@@ -238,8 +249,9 @@ def update_leaderboard(relayout_data):
         updated_fig.update_layout(font_family="Inter")
         return updated_fig
     else:
-        return fig7
 @app.callback(
     Output('stacked-area-chart', 'figure'),
     [Input('time-slider', 'relayoutData')]
@@ -248,14 +260,30 @@ def update_stacked_area(relayout_data):
     if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data:
         start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d')
         end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d')
-        updated_fig = create_plotly_stacked_area_chart(
             model_topk_df, model_gini_df, model_hhi_df, TEMP_MODEL_EVENTS, PALETTE_0,
             start_time=start_time, end_time=end_time
         )
         updated_fig.update_layout(font_family="Inter")
         return updated_fig
     else:
-        return fig
 # Run the app
 if __name__ == '__main__':

 from dash import Dash, html, dcc, Input, Output
 import pandas as pd
+from graphs.model_market_share import create_stacked_area_chart, create_world_map, create_range_slider, create_leaderboard
+from graphs.model_characteristics import create_concentration_chart, create_line_plot
 # Initialize the app
 app = Dash()
 author_concentration_df = pd.read_pickle("data_frames/author_concentration_df.pkl")
 model_concentration_df = pd.read_pickle("data_frames/model_concentration_df.pkl")
+# Configurations
 TEMP_MODEL_EVENTS = {
     # "Yolo World Mirror": "2024-03-01",
     "Llama 3": "2024-04-17",
     "#540B0E"
 ]
 LANG_SEGMENT_ORDER = [
     'Monolingual: EN', 'Monolingual: HR', 'Monolingual: M/LR',
     'Multilingual: HR', 'Multilingual', 'Unknown',
 ]
 LICENSE_SEGMENT_ORDER = [
     "Open Use", "Open Use (Acceptable Use Policy)", "Open Use (Non-Commercial Only)", "Attribution",
     "Acceptable Use Policy", "Non-Commercial Only", "Undocumented", "Undocumented (Acceptable Use Policy)",
 ]
 METHOD_PLOT_CHOICES = {
     "cumulative": "none", # none, mean, sum
     "y_log": False, # True, False
     "period": "W",
 }
 ARCHITECTURE_PLOT_CHOICES = {
     "cumulative": "none", # none, mean, sum
     "y_log": False, # True, False
     "period": "W",
 }
+# Create initial figures
+# Model Market Share Tab
+model_market_share_area = create_stacked_area_chart(
+    model_topk_df, model_gini_df, model_hhi_df, TEMP_MODEL_EVENTS, PALETTE_0
 )
+world_map = create_world_map(
     country_concentration_df, "time", "metric", "value"
 )
+leaderboard = create_leaderboard(
     country_concentration_df, author_concentration_df, model_concentration_df
 )
+slider = create_range_slider(
     model_topk_df
 )
+# Model Characteristics Tab
+language_concentration_area = create_concentration_chart(
+    language_concentration_df, 'time', 'metric', 'value', LANG_SEGMENT_ORDER, PALETTE_0
+)
+license_concentration_area = create_concentration_chart(
+    license_concentration_df, 'period', 'status', 'percent', LICENSE_SEGMENT_ORDER, PALETTE_0
+)
+download_method_cumsum_line = create_line_plot(
+    download_method_cumsum_df, METHOD_PLOT_CHOICES, PALETTE_0
 )
+download_arch_cumsum_line = create_line_plot(
+    download_arch_cumsum_df, ARCHITECTURE_PLOT_CHOICES, PALETTE_0
+)
 # App layout
 app.layout = html.Div(
             [
                 html.Div(children='Visualizing the Open Model Ecosystem', style={'fontSize': 28, 'fontWeight': 'bold', 'marginBottom': 6}),
                 html.Div(children='An interactive dashboard to explore trends in open models on Hugging Face', style={'fontSize': 16, 'marginBottom': 12}),
+                html.Div(
+                    children=[
+                        html.A(
+                            "Data Provenance Initiative",
+                            href="https://www.dataprovenance.org/",
+                            target="_blank",
+                            style={
+                                'display': 'inline-block',
+                                'padding': '4px 14px',
+                                'fontSize': 13,
+                                'color': 'white',
+                                'backgroundColor': '#2563eb',
+                                'border': 'none',
+                                'borderRadius': '18px',
+                                'textDecoration': 'none',
+                                'fontWeight': 'bold',
+                                'boxShadow': '0 2px 8px rgba(37,99,235,0.08)',
+                                'marginLeft': '6px',
+                                'marginBottom': '4px',
+                                'transition': 'background 0.2s',
+                                'cursor': 'pointer'
+                            }
+                        )
+                    ],
+                    style={'fontSize': 14, 'marginBottom': 12}
+                ),
                 html.Hr(style={'marginTop': 8, 'marginBottom': 8}),
+                html.Div(children='Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry\'s standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.', style={'fontSize': 14, 'marginBottom': 12, 'marginLeft': 100, 'marginRight': 100}),
             ],
             style={'textAlign': 'center'}
         ),
                     dcc.Tab(label='Model Market Share', children=[
                         html.Div([
                             html.Div(children='Select time range to update all graphs below:', style={'fontSize': 16, 'marginBottom': 6, 'marginTop': 10}),
+                            dcc.Graph(figure=slider, id='time-slider', style={'height': '100px'}),
                             html.Div(
                                 id='output-container-range-slider',
                                 style={
                             dcc.Dropdown(['Language Concentration', 'Architecture', 'License', 'Method'], 'Language Concentration', id='dropdown'),
                         ], style={'marginTop': 6}),
                     ]),
+                    dcc.Tab(label='Model Relationships', children=[
+                    ]),
                 ])
             ],
             style={
     style={'fontFamily': 'Inter', 'backgroundColor': '#f7f7fa', 'minHeight': '100vh'}
 )
+# Callbacks for interactivity
+# Model Market Share Tab
+# On slider change, update output text
 @app.callback(
     Output('output-container-range-slider', 'children'),
     [Input('time-slider', 'relayoutData')]
         return f'Selected time range: {start_time} to {end_time}'
     else:
         return 'Selected time range: All data'
+# On slider change, update world map
 @app.callback(
     Output('world-map-with-slider', 'figure'),
     [Input('time-slider', 'relayoutData')]
     if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data:
         start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d')
         end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d')
+        updated_fig = create_world_map(
             country_concentration_df, "time", "metric", "value", start_time=start_time, end_time=end_time
         )
         updated_fig.update_layout(font_family="Inter")
         return updated_fig
     else:
+        return world_map
+# On slider change, update leaderboard
 @app.callback(
     Output('leaderboard', 'figure'),
     [Input('time-slider', 'relayoutData')]
         updated_fig.update_layout(font_family="Inter")
         return updated_fig
     else:
+        return leaderboard
+# On slider change, update stacked area chart
 @app.callback(
     Output('stacked-area-chart', 'figure'),
     [Input('time-slider', 'relayoutData')]
     if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data:
         start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d')
         end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d')
+        updated_fig = create_stacked_area_chart(
             model_topk_df, model_gini_df, model_hhi_df, TEMP_MODEL_EVENTS, PALETTE_0,
             start_time=start_time, end_time=end_time
         )
         updated_fig.update_layout(font_family="Inter")
         return updated_fig
     else:
+        return model_market_share_area
+# Model Characteristics Tab
+# On dropdown change, update graph
+@app.callback(
+    Output('language-concentration-chart', 'figure'),
+    [Input('dropdown', 'value')]
+)
+def update_graph(selected_metric):
+    if selected_metric == 'Language Concentration':
+        return language_concentration_area
+    elif selected_metric == 'License':
+        return license_concentration_area
+    elif selected_metric == 'Method':
+        return download_method_cumsum_line
+    elif selected_metric == 'Architecture':
+        return download_arch_cumsum_line
 # Run the app
 if __name__ == '__main__':

graphs/__pycache__/model_characteristics.cpython-39.pyc CHANGED Viewed

Binary files a/graphs/__pycache__/model_characteristics.cpython-39.pyc and b/graphs/__pycache__/model_characteristics.cpython-39.pyc differ

graphs/__pycache__/model_market_share.cpython-39.pyc CHANGED Viewed

Binary files a/graphs/__pycache__/model_market_share.cpython-39.pyc and b/graphs/__pycache__/model_market_share.cpython-39.pyc differ

graphs/model_characteristics.py CHANGED Viewed

@@ -1,27 +1,19 @@
 import plotly.graph_objects as go
 import plotly.express as px
-def create_plotly_language_concentration_chart(
-    language_concentration_df,
     period_col,
     metric_col,
     value_col,
-    LANG_SEGMENT_ORDER,
-    PALETTE_0
-):
-    """
-    Convert the language concentration visualization to Plotly
-    """
-    # Create figure
     fig = go.Figure()
-    # Get unique time periods
-    time_periods = sorted(language_concentration_df[period_col].unique())
     # Create stacked area traces
-    for i, metric in enumerate(LANG_SEGMENT_ORDER):
-        metric_data = language_concentration_df[language_concentration_df[metric_col] == metric]
         # Sort by time and get values
         metric_data = metric_data.sort_values(period_col)
@@ -37,7 +29,7 @@ def create_plotly_language_concentration_chart(
                 mode='lines',
                 line=dict(width=0),
                 fill='tonexty' if i > 0 else 'tozeroy',
-                fillcolor=PALETTE_0[i % len(PALETTE_0)],
                 stackgroup='one',
                 hovertemplate='<b>%{fullData.name}</b><br>' +
                              'Time: %{x}<br>' +
@@ -45,12 +37,10 @@ def create_plotly_language_concentration_chart(
             )
         )
-    # Update layout
     fig.update_layout(
         autosize=True,
-        font_family="Times New Roman",
         font_size=14,
-        showlegend=True,  # Show legend for language concentration
         legend=dict(
             title="Language Concentration",
             orientation="v",
@@ -64,7 +54,6 @@ def create_plotly_language_concentration_chart(
         hovermode='x unified'
     )
-    # Update x-axis
     fig.update_xaxes(
         title_text="",
         showgrid=True,
@@ -72,7 +61,6 @@ def create_plotly_language_concentration_chart(
         gridwidth=1
     )
-    # Update y-axis
     fig.update_yaxes(
         title_text="",
         showgrid=True,
@@ -82,30 +70,26 @@ def create_plotly_language_concentration_chart(
     return fig
-def create_plotly_publication_curves_with_legend(
-    download_method_cumsum_df,
-    METHOD_PLOT_CHOICES,
     color_palette=None
 ):
-    """
-    Version with traditional legend instead of inline labels
-    """
     fig = go.Figure()
-    groups = download_method_cumsum_df['status'].unique()
     if color_palette is None:
         color_palette = px.colors.qualitative.Set1
     for i, group in enumerate(groups):
-        group_data = download_method_cumsum_df[download_method_cumsum_df['status'] == group]
         group_data = group_data.sort_values('period')
         x_vals = group_data['period']
-        y_vals = group_data[METHOD_PLOT_CHOICES["y_col"]]
-        if METHOD_PLOT_CHOICES.get("y_format") == "percent":
             y_vals = y_vals * 100
         fig.add_trace(
@@ -121,7 +105,7 @@ def create_plotly_publication_curves_with_legend(
                 opacity=0.85,
                 hovertemplate='<b>%{fullData.name}</b><br>' +
                              'Period: %{x}<br>' +
-                             'Value: %{y:.2f}%<extra></extra>' if METHOD_PLOT_CHOICES.get("y_format") == "percent"
                              else '<b>%{fullData.name}</b><br>Period: %{x}<br>Value: %{y}<extra></extra>'
             )
         )
@@ -147,16 +131,16 @@ def create_plotly_publication_curves_with_legend(
         showgrid=False,
         zeroline=False
     )
-    y_title = METHOD_PLOT_CHOICES["y_col"]
-    if METHOD_PLOT_CHOICES.get("y_format") == "percent":
         y_title += " (%)"
     fig.update_yaxes(
         title_text=y_title,
         showgrid=False,
         zeroline=False,
-        type='log' if METHOD_PLOT_CHOICES.get("y_log") else 'linear'
     )
     return fig

 import plotly.graph_objects as go
 import plotly.express as px
+def create_concentration_chart(
+    df,
     period_col,
     metric_col,
     value_col,
+    order,
+    palette
+):
     fig = go.Figure()
     # Create stacked area traces
+    for i, metric in enumerate(order):
+        metric_data = df[df[metric_col] == metric]
         # Sort by time and get values
         metric_data = metric_data.sort_values(period_col)
                 mode='lines',
                 line=dict(width=0),
                 fill='tonexty' if i > 0 else 'tozeroy',
+                fillcolor=palette[i % len(palette)],
                 stackgroup='one',
                 hovertemplate='<b>%{fullData.name}</b><br>' +
                              'Time: %{x}<br>' +
             )
         )
     fig.update_layout(
         autosize=True,
         font_size=14,
+        showlegend=True,
         legend=dict(
             title="Language Concentration",
             orientation="v",
         hovermode='x unified'
     )
     fig.update_xaxes(
         title_text="",
         showgrid=True,
         gridwidth=1
     )
     fig.update_yaxes(
         title_text="",
         showgrid=True,
     return fig
+def create_line_plot(
+    df,
+    plot_choices,
     color_palette=None
 ):
     fig = go.Figure()
+    groups = df['status'].unique()
     if color_palette is None:
         color_palette = px.colors.qualitative.Set1
     for i, group in enumerate(groups):
+        group_data = df[df['status'] == group]
         group_data = group_data.sort_values('period')
         x_vals = group_data['period']
+        y_vals = group_data[plot_choices["y_col"]]
+        if plot_choices.get("y_format") == "percent":
             y_vals = y_vals * 100
         fig.add_trace(
                 opacity=0.85,
                 hovertemplate='<b>%{fullData.name}</b><br>' +
                              'Period: %{x}<br>' +
+                             'Value: %{y:.2f}%<extra></extra>' if plot_choices.get("y_format") == "percent"
                              else '<b>%{fullData.name}</b><br>Period: %{x}<br>Value: %{y}<extra></extra>'
             )
         )
         showgrid=False,
         zeroline=False
     )
+    y_title = plot_choices["y_col"]
+    if plot_choices.get("y_format") == "percent":
         y_title += " (%)"
     fig.update_yaxes(
         title_text=y_title,
         showgrid=False,
         zeroline=False,
+        type='log' if plot_choices.get("y_log") else 'linear'
     )
     return fig

graphs/model_market_share.py CHANGED Viewed

@@ -4,12 +4,9 @@ import pandas as pd
 filtered_df = pd.read_pickle("data_frames/filtered_df.pkl")
-def create_plotly_stacked_area_chart(
-    model_topk_df, model_gini_df, model_hhi_df, TEMP_MODEL_EVENTS, PALETTE_0, start_time=None, end_time=None
 ):
-    """
-    Convert the visualization_util stacked area chart to Plotly
-    """
     # Create subplot with secondary y-axis
     fig = make_subplots(specs=[[{"secondary_y": True}]])
@@ -26,7 +23,7 @@ def create_plotly_stacked_area_chart(
     # Create stacked area traces
     for i, metric in enumerate(metric_order):
-        metric_data = model_topk_df[model_topk_df["metric"] == metric]
         # Sort by time and get values
         metric_data = metric_data.sort_values("time")
@@ -45,9 +42,9 @@ def create_plotly_stacked_area_chart(
                 y=y_vals,
                 name=metric,
                 mode="lines",
-                line=dict(width=0, color=PALETTE_0[i % len(PALETTE_0)]),
                 fill="tonexty" if i > 0 else "tozeroy",
-                fillcolor=PALETTE_0[i % len(PALETTE_0)],  # Add opacity
                 stackgroup="one",
                 hovertemplate="<b>%{fullData.name}</b><br>"
                 + "Time: %{x}<br>"
@@ -58,7 +55,7 @@ def create_plotly_stacked_area_chart(
     # Add overlay lines
     # Gini Coefficient
-    gini_data = model_gini_df.sort_values("time")
     if start_time:
         gini_data = gini_data[gini_data["time"] >= start_time]
     if end_time:
@@ -79,7 +76,7 @@ def create_plotly_stacked_area_chart(
     )
     # HHI (×10)
-    hhi_data = model_hhi_df.sort_values("time")
     if start_time:
         hhi_data = hhi_data[hhi_data["time"] >= start_time]
     if end_time:
@@ -87,7 +84,7 @@ def create_plotly_stacked_area_chart(
     fig.add_trace(
         go.Scatter(
             x=hhi_data["time"],
-            y=hhi_data["value"] * 10,  # Multiply by 10 as indicated
             name="HHI (×10)",
             mode="lines",
             line=dict(color="#ec4899", width=3),
@@ -100,7 +97,7 @@ def create_plotly_stacked_area_chart(
     )
     # Add vertical lines for events
-    for event_name, event_date in TEMP_MODEL_EVENTS.items():
         fig.add_shape(
             type="line",
             x0=event_date,
@@ -122,12 +119,10 @@ def create_plotly_stacked_area_chart(
             font=dict(size=12),
         )
-    # Update layout
     fig.update_layout(
         autosize=True,
-        font_family="Inter",
         font_size=14,
-        showlegend=False,  # Set to True if you want to show legend
         margin=dict(l=60, r=60, t=40, b=60),
         plot_bgcolor="white",
         hovermode="x unified",
@@ -167,30 +162,12 @@ def create_plotly_stacked_area_chart(
     return fig
-def create_plotly_world_map(
     df, time_col="time", metric_col="metric", value_col="value", top_n_labels=10, start_time=None, end_time=None
 ):
     # Get all unique times and sort them
     times = sorted(df[time_col].unique())
-    # Create aggregated data across the full time range initially
-    regions_to_exclude = [
-        "Asia",
-        "Europe",
-        "North America",
-        "South America",
-        "Africa",
-        "Oceania",
-        "Middle East",
-        "Unknown",
-        "Online",
-        "International",
-        "HF",
-    ]
-    # Filter out regions
-    country_data = df[~df[metric_col].isin(regions_to_exclude)].copy()
     # Country code mapping
     country_code_map = {
         "Germany": "DEU",
@@ -238,16 +215,13 @@ def create_plotly_world_map(
         "Turkey": "TUR",
     }
-    country_data["country_code"] = country_data[metric_col].map(country_code_map)
-    mapped_data = country_data.dropna(subset=["country_code"])
-    # Create subplot with secondary plot for range slider
     fig = make_subplots(
-        rows=2,
         cols=1,
-        row_heights=[0.85, 0.15],
-        vertical_spacing=0.02,
-        specs=[[{"type": "geo"}], [{"type": "scatter"}]],
     )
     # Function to aggregate data for time range
@@ -264,13 +238,13 @@ def create_plotly_world_map(
         agg_data["percentage"] = agg_data[value_col] * 100
         return agg_data.sort_values("percentage", ascending=False)
-    # Initial data (full range)
     if start_time is None:
         start_time = times[0]
     if end_time is None:
         end_time = times[-1]
     initial_data = aggregate_time_range(start_time, end_time)
-    top_countries = initial_data.head(top_n_labels)
     # Create hover text
     hover_text = []
@@ -281,7 +255,7 @@ def create_plotly_world_map(
             f"Avg Value: {row[value_col]:.6f}"
         )
-    # Add choropleth to first subplot
     fig.add_trace(
         go.Choropleth(
             locations=initial_data["country_code"],
@@ -300,13 +274,13 @@ def create_plotly_world_map(
             ],
             colorbar=dict(
                 title="Avg % of Total Downloads",
-                tickfont=dict(size=12, family="Inter, system-ui, sans-serif"),
                 len=0.6,
                 x=1.02,
                 y=0.7,
             ),
-            marker_line_color="#219ebc",
-            marker_line_width=0.4,
             geo="geo",
         ),
         row=1,
@@ -314,63 +288,62 @@ def create_plotly_world_map(
     )
     # Country center coordinates for labels
-    country_centers = {
-        "USA": {"lat": 39.8, "lon": -98.5},
-        "CHN": {"lat": 35.8, "lon": 104.2},
-        "DEU": {"lat": 51.2, "lon": 10.4},
-        "GBR": {"lat": 55.4, "lon": -3.4},
-        "FRA": {"lat": 46.6, "lon": 2.2},
-        "JPN": {"lat": 36.2, "lon": 138.3},
-        "IND": {"lat": 20.6, "lon": 78.9},
-        "CAN": {"lat": 56.1, "lon": -106.3},
-        "RUS": {"lat": 61.5, "lon": 105.3},
-        "BRA": {"lat": -14.2, "lon": -51.9},
-        "AUS": {"lat": -25.3, "lon": 133.8},
-        "KOR": {"lat": 35.9, "lon": 127.8},
-    }
-    # Add initial labels using scattergeo instead of annotations
-    label_lons = []
-    label_lats = []
-    label_texts = []
-    for _, country in top_countries.iterrows():
-        country_code = country["country_code"]
-        if country_code in country_centers:
-            center = country_centers[country_code]
-            label_lons.append(center["lon"])
-            label_lats.append(center["lat"])
-            label_texts.append(f"{country['percentage']:.1f}%")
-    # Add text labels as a scattergeo trace
-    fig.add_trace(
-        go.Scattergeo(
-            lon=label_lons,
-            lat=label_lats,
-            text=label_texts,
-            mode="text",
-            textfont=dict(
-                color="#ffffff", size=13, family="Inter, system-ui, sans-serif"
-            ),
-            textposition="middle center",
-            showlegend=False,
-            hoverinfo="skip",
-            geo="geo",
-        ),
-        row=1,
-        col=1,
-    )
     # Update layout
     fig.update_layout(
         title=dict(
             text="Model Downloads by Country",
             x=0.5,
-            font=dict(size=20, family="Inter, system-ui, sans-serif", color="#212529"),
         ),
         width=1200,
         height=800,
-        font=dict(family="Inter, system-ui, sans-serif"),
         plot_bgcolor="#ffffff",
         paper_bgcolor="#ffffff",
         margin=dict(l=0, r=120, t=100, b=60),
@@ -379,35 +352,27 @@ def create_plotly_world_map(
     # Update geo layout
     fig.update_geos(
         showframe=False,
-        showcoastlines=True,
         showland=True,
-        landcolor="#f8f9fa",
-        coastlinecolor="#023047",
-        oceancolor="#8ecae6",
-        projection_type="equirectangular",
         bgcolor="#ffffff",
     )
-    # Remove excessive whitespace below the map by adjusting subplot row heights and margins
-    fig.update_layout(
-        margin=dict(l=0, r=120, t=100, b=20),  # Reduce bottom margin
-        height=600,  # Reduce overall figure height
-    )
     return fig
-def create_plotly_range_slider(df):
     if df.empty or "time" not in df.columns:
         return go.Figure()
     times = sorted(df["time"].unique())
     fig = go.Figure()
     # Invisible trace just to attach slider to the x-axis
     fig.add_trace(
         go.Scatter(
             x=times,
-            y=[0] * len(times),  # Dummy y-values
             mode="lines",
             line=dict(color="rgba(0,0,0,0)"),  # Invisible line
             hoverinfo="skip",
@@ -421,14 +386,33 @@ def create_plotly_range_slider(df):
             rangeslider=dict(visible=False),
             type="date"
         ),
-        yaxis=dict(visible=False),  # Hide y-axis since it's dummy
         margin=dict(t=20, b=20, l=20, r=20),
-        height=100  # Compact slider-only view
     )
     return fig
 def create_leaderboard(country_df, developer_df, model_df, start_time=None, end_time=None, top_n=10):
     # Ensure datetime
     country_df["time"] = pd.to_datetime(country_df["time"])
     developer_df["time"] = pd.to_datetime(developer_df["time"])
@@ -449,25 +433,6 @@ def create_leaderboard(country_df, developer_df, model_df, start_time=None, end_
         how="left"
     ).rename(columns={"country": "country_metric"}).drop(columns=["model"])
-    # Country -> Emoji mapping
-    country_emoji_map = {
-        "United States of America": "🇺🇸",
-        "China": "🇨🇳",
-        "Germany": "🇩🇪",
-        "France": "🇫🇷",
-        "India": "🇮🇳",
-        "Italy": "🇮🇹",
-        "Japan": "🇯🇵",
-        "South Korea": "🇰🇷",
-        "United Kingdom": "🇬🇧",
-        "Canada": "🇨🇦",
-        "Brazil": "🇧🇷",
-        "Australia": "🇦🇺",
-        "Unknown": "❓",
-        "Finland": "🇫🇮",
-        "Lebanon": "🇱🇧 ",
-    }
     if start_time is None:
         start_time = country_df["time"].min()
     if end_time is None:
@@ -487,6 +452,7 @@ def create_leaderboard(country_df, developer_df, model_df, start_time=None, end_
     if country_df_filtered.empty and developer_df_filtered.empty and model_df_filtered.empty:
         return go.Figure()
     def get_top_n_leaderboard(df, group_col, label, top_n=10):
         top = (
             df.groupby(group_col)["value"]
@@ -501,9 +467,10 @@ def create_leaderboard(country_df, developer_df, model_df, start_time=None, end_
             top["% of total"] = top["Total Value"] / total_value * 100
         else:
             top["% of total"] = 0
         # add column with metadata (country emoji for country, country for developer/model)
         if label == "Country":
-            top["Metadata"] = top[label].map(country_emoji_map).fillna("")
         else:
             # Get the country_metric for each developer/model with the already merged info
             top = top.merge(
@@ -512,8 +479,8 @@ def create_leaderboard(country_df, developer_df, model_df, start_time=None, end_
                 right_on=group_col,
                 how="left"
             ).drop(columns=[group_col])
-            top["Metadata"] = top["country_metric"].map(country_emoji_map).fillna("")
-        return top[[label, "Metadata", "% of total"]]
     top_countries = get_top_n_leaderboard(country_df_filtered, "metric", "Country", top_n=top_n)
     top_developers = get_top_n_leaderboard(developer_df_filtered, "metric", "Developer", top_n=top_n)

 filtered_df = pd.read_pickle("data_frames/filtered_df.pkl")
+def create_stacked_area_chart(
+    topk_df, gini_df, hhi_df, events, palette, start_time=None, end_time=None
 ):
     # Create subplot with secondary y-axis
     fig = make_subplots(specs=[[{"secondary_y": True}]])
     # Create stacked area traces
     for i, metric in enumerate(metric_order):
+        metric_data = topk_df[topk_df["metric"] == metric]
         # Sort by time and get values
         metric_data = metric_data.sort_values("time")
                 y=y_vals,
                 name=metric,
                 mode="lines",
+                line=dict(width=0, color=palette[i % len(palette)]),
                 fill="tonexty" if i > 0 else "tozeroy",
+                fillcolor=palette[i % len(palette)],
                 stackgroup="one",
                 hovertemplate="<b>%{fullData.name}</b><br>"
                 + "Time: %{x}<br>"
     # Add overlay lines
     # Gini Coefficient
+    gini_data = gini_df.sort_values("time")
     if start_time:
         gini_data = gini_data[gini_data["time"] >= start_time]
     if end_time:
     )
     # HHI (×10)
+    hhi_data = hhi_df.sort_values("time")
     if start_time:
         hhi_data = hhi_data[hhi_data["time"] >= start_time]
     if end_time:
     fig.add_trace(
         go.Scatter(
             x=hhi_data["time"],
+            y=hhi_data["value"] * 10,
             name="HHI (×10)",
             mode="lines",
             line=dict(color="#ec4899", width=3),
     )
     # Add vertical lines for events
+    for event_name, event_date in events.items():
         fig.add_shape(
             type="line",
             x0=event_date,
             font=dict(size=12),
         )
     fig.update_layout(
         autosize=True,
         font_size=14,
+        showlegend=True,
         margin=dict(l=60, r=60, t=40, b=60),
         plot_bgcolor="white",
         hovermode="x unified",
     return fig
+def create_world_map(
     df, time_col="time", metric_col="metric", value_col="value", top_n_labels=10, start_time=None, end_time=None
 ):
     # Get all unique times and sort them
     times = sorted(df[time_col].unique())
     # Country code mapping
     country_code_map = {
         "Germany": "DEU",
         "Turkey": "TUR",
     }
+    df["country_code"] = df[metric_col].map(country_code_map)
+    mapped_data = df.dropna(subset=["country_code"])
     fig = make_subplots(
+        rows=1,
         cols=1,
+        specs=[[{"type": "geo"}]],
     )
     # Function to aggregate data for time range
         agg_data["percentage"] = agg_data[value_col] * 100
         return agg_data.sort_values("percentage", ascending=False)
+    # Initial data if start or end time are not set (full range)
     if start_time is None:
         start_time = times[0]
     if end_time is None:
         end_time = times[-1]
     initial_data = aggregate_time_range(start_time, end_time)
+    # top_countries = initial_data.head(top_n_labels)
     # Create hover text
     hover_text = []
             f"Avg Value: {row[value_col]:.6f}"
         )
+    # Add choropleth to plot
     fig.add_trace(
         go.Choropleth(
             locations=initial_data["country_code"],
             ],
             colorbar=dict(
                 title="Avg % of Total Downloads",
+                tickfont=dict(size=12),
                 len=0.6,
                 x=1.02,
                 y=0.7,
             ),
+            marker_line_color="#ffffff",
+            marker_line_width=1.5,
             geo="geo",
         ),
         row=1,
     )
     # Country center coordinates for labels
+    # country_centers = {
+    #     "USA": {"lat": 39.8, "lon": -98.5},
+    #     "CHN": {"lat": 35.8, "lon": 104.2},
+    #     "DEU": {"lat": 51.2, "lon": 10.4},
+    #     "GBR": {"lat": 55.4, "lon": -3.4},
+    #     "FRA": {"lat": 46.6, "lon": 2.2},
+    #     "JPN": {"lat": 36.2, "lon": 138.3},
+    #     "IND": {"lat": 20.6, "lon": 78.9},
+    #     "CAN": {"lat": 56.1, "lon": -106.3},
+    #     "RUS": {"lat": 61.5, "lon": 105.3},
+    #     "BRA": {"lat": -14.2, "lon": -51.9},
+    #     "AUS": {"lat": -25.3, "lon": 133.8},
+    #     "KOR": {"lat": 35.9, "lon": 127.8},
+    # }
+    # # Add initial labels using scattergeo instead of annotations
+    # label_lons = []
+    # label_lats = []
+    # label_texts = []
+    # for _, country in top_countries.iterrows():
+    #     country_code = country["country_code"]
+    #     if country_code in country_centers:
+    #         center = country_centers[country_code]
+    #         label_lons.append(center["lon"])
+    #         label_lats.append(center["lat"])
+    #         label_texts.append(f"{country['percentage']:.1f}%")
+    # # Add text labels as a scattergeo trace
+    # fig.add_trace(
+    #     go.Scattergeo(
+    #         lon=label_lons,
+    #         lat=label_lats,
+    #         text=label_texts,
+    #         mode="text",
+    #         textfont=dict(
+    #             color="#ffffff", size=13, family="Inter, system-ui, sans-serif"
+    #         ),
+    #         textposition="middle center",
+    #         showlegend=False,
+    #         hoverinfo="skip",
+    #         geo="geo",
+    #     ),
+    #     row=1,
+    #     col=1,
+    # )
     # Update layout
     fig.update_layout(
         title=dict(
             text="Model Downloads by Country",
             x=0.5,
+            font=dict(size=20),
         ),
         width=1200,
         height=800,
         plot_bgcolor="#ffffff",
         paper_bgcolor="#ffffff",
         margin=dict(l=0, r=120, t=100, b=60),
     # Update geo layout
     fig.update_geos(
         showframe=False,
         showland=True,
+        landcolor="#d0cfcf",
+        coastlinecolor="#b8b8b8",
+        projection_type="natural earth",
         bgcolor="#ffffff",
     )
     return fig
+def create_range_slider(df):
     if df.empty or "time" not in df.columns:
         return go.Figure()
     times = sorted(df["time"].unique())
     fig = go.Figure()
     # Invisible trace just to attach slider to the x-axis
     fig.add_trace(
         go.Scatter(
             x=times,
+            y=[0] * len(times),
             mode="lines",
             line=dict(color="rgba(0,0,0,0)"),  # Invisible line
             hoverinfo="skip",
             rangeslider=dict(visible=False),
             type="date"
         ),
+        yaxis=dict(visible=False),
         margin=dict(t=20, b=20, l=20, r=20),
+        height=100
     )
     return fig
 def create_leaderboard(country_df, developer_df, model_df, start_time=None, end_time=None, top_n=10):
+    # Country -> Emoji mapping
+    country_emoji_map = {
+        "United States of America": "🇺🇸",
+        "China": "🇨🇳",
+        "Germany": "🇩🇪",
+        "France": "🇫🇷",
+        "India": "🇮🇳",
+        "Italy": "🇮🇹",
+        "Japan": "🇯🇵",
+        "South Korea": "🇰🇷",
+        "United Kingdom": "🇬🇧",
+        "Canada": "🇨🇦",
+        "Brazil": "🇧🇷",
+        "Australia": "🇦🇺",
+        "Unknown": "❓",
+        "Finland": "🇫🇮",
+        "Lebanon": "🇱🇧 ",
+    }
     # Ensure datetime
     country_df["time"] = pd.to_datetime(country_df["time"])
     developer_df["time"] = pd.to_datetime(developer_df["time"])
         how="left"
     ).rename(columns={"country": "country_metric"}).drop(columns=["model"])
     if start_time is None:
         start_time = country_df["time"].min()
     if end_time is None:
     if country_df_filtered.empty and developer_df_filtered.empty and model_df_filtered.empty:
         return go.Figure()
+    # Function to get top N leaderboard with percentage
     def get_top_n_leaderboard(df, group_col, label, top_n=10):
         top = (
             df.groupby(group_col)["value"]
             top["% of total"] = top["Total Value"] / total_value * 100
         else:
             top["% of total"] = 0
         # add column with metadata (country emoji for country, country for developer/model)
         if label == "Country":
+            top["Attributes"] = top[label].map(country_emoji_map).fillna("")
         else:
             # Get the country_metric for each developer/model with the already merged info
             top = top.merge(
                 right_on=group_col,
                 how="left"
             ).drop(columns=[group_col])
+            top["Attributes"] = top["country_metric"].map(country_emoji_map).fillna("")
+        return top[[label, "Attributes", "% of total"]]
     top_countries = get_top_n_leaderboard(country_df_filtered, "metric", "Country", top_n=top_n)
     top_developers = get_top_n_leaderboard(developer_df_filtered, "metric", "Developer", top_n=top_n)