Spaces:

economies-open-ai
/

open-model-evolution

Running

App Files Files Community

emsesc commited on Oct 18

Commit

ffc8ed8

1 Parent(s): 66b3482

reduce to leaderboard

Browse files

Files changed (24) hide show

app.py +58 -266
data_frames/agg_devsize_downloads.pkl +0 -0
data_frames/agg_natsize_downloads.pkl +0 -0
data_frames/derived_country_concentration_df_rolling.pkl +0 -0
data_frames/dev_gini_df.pkl +0 -0
data_frames/dev_hhi_df.pkl +0 -0
data_frames/dev_topk_df.pkl +0 -0
data_frames/download_arch_cumsum_df.pkl +0 -0
data_frames/download_license_cumsum_df.pkl +0 -0
data_frames/download_method_cumsum_df.pkl +0 -0
data_frames/download_openness_cumsum_df.pkl +0 -0
data_frames/language_concentration_df.pkl +0 -0
data_frames/model_gini_df.pkl +0 -0
data_frames/model_hhi_df.pkl +0 -0
data_frames/model_topk_df.pkl +0 -0
data_frames/nat_gini_df.pkl +0 -0
data_frames/nat_hhi_df.pkl +0 -0
data_frames/nat_topk_df.pkl +0 -0
graphs/__pycache__/model_characteristics.cpython-39.pyc +0 -0
graphs/__pycache__/model_market_share.cpython-39.pyc +0 -0
graphs/leaderboard.py +21 -17
graphs/model_characteristics.py +0 -145
graphs/model_market_share.py +0 -347
graphs/tree.py +0 -142

app.py CHANGED Viewed

@@ -1,10 +1,7 @@
 from dash import Dash, html, dcc, Input, Output, State
 import pandas as pd
 import dash_mantine_components as dmc
-from graphs.model_market_share import create_stacked_area_chart, create_world_map, create_range_slider
 from graphs.leaderboard import create_leaderboard, get_top_n_leaderboard, render_table, render_table_content
-from graphs.model_characteristics import create_concentration_chart, create_line_plot
-from graphs.tree import generate_model_treemap
 # Initialize the app
 app = Dash()
@@ -12,142 +9,6 @@ server = app.server
 # Load pre-processed data frames
 filtered_df = pd.read_pickle("data_frames/filtered_df.pkl")
-model_topk_df = pd.read_pickle("data_frames/model_topk_df.pkl")
-model_gini_df = pd.read_pickle("data_frames/model_gini_df.pkl")
-model_hhi_df = pd.read_pickle("data_frames/model_hhi_df.pkl")
-language_concentration_df = pd.read_pickle("data_frames/language_concentration_df.pkl")
-license_concentration_df = pd.read_pickle("data_frames/download_license_cumsum_df.pkl")
-download_method_cumsum_df = pd.read_pickle("data_frames/download_method_cumsum_df.pkl")
-download_arch_cumsum_df = pd.read_pickle("data_frames/download_arch_cumsum_df.pkl")
-nat_topk_df = pd.read_pickle("data_frames/nat_topk_df.pkl")
-country_concentration_df = pd.read_pickle("data_frames/country_concentration_df.pkl")
-author_concentration_df = pd.read_pickle("data_frames/author_concentration_df.pkl")
-model_concentration_df = pd.read_pickle("data_frames/model_concentration_df.pkl")
-derived_country_concentration_df = pd.read_pickle("data_frames/derived_country_concentration_df_rolling.pkl")
-nat_gini_df = pd.read_pickle("data_frames/nat_gini_df.pkl")
-nat_hhi_df = pd.read_pickle("data_frames/nat_hhi_df.pkl")
-# Configurations
-TEMP_MODEL_EVENTS = {
-    # "Yolo World Mirror": "2024-03-01",
-    "Llama 3": "2024-04-17",
-    "Stable Cascade": "2024-02-02",
-    "Stable Diffusion 3": "2024-05-30",
-    # "embed/upscale": "2023-03-24",
-    "DeepSeek-R1": "2025-01-20",
-    "Gemma-3 12B QAT": "2025-04-15", # gemma-3-12b-it-qat-4bit
-    # "Qwen": "2025-03-05",
-    # "Flux RedFlux": "2025-04-12",
-    # "DeepSeek-V3": "2025-03-24",
-    # "bloom": "2022-05-19",
-    "DALLE2-PyTorch": "2022-06-25",
-    "Stable Diffusion": "2022-08-10",
-    "CLIP ViT": "2021-01-05",
-    "YOLOv8": "2023-04-26",
-    "Sentence Transformer MiniLM v2": "2021-08-30",
-}
-PALETTE_0 = [
-    "#335C67",
-    "#FFF3B0",
-    "#E09F3E",
-    "#9E2A2B",
-    "#540B0E"
-]
-LANG_SEGMENT_ORDER = [
-    'Monolingual: EN', 'Monolingual: HR', 'Monolingual: M/LR',
-    'Multilingual: HR', 'Multilingual', 'Unknown',
-]
-LICENSE_SEGMENT_ORDER = [
-    "Open Use", "Open Use (Acceptable Use Policy)", "Open Use (Non-Commercial Only)", "Attribution",
-    "Acceptable Use Policy", "Non-Commercial Only", "Undocumented", "Undocumented (Acceptable Use Policy)",
-]
-METHOD_PLOT_CHOICES = {
-    "cumulative": "none", # none, mean, sum
-    "y_col": "percent", # percent count
-    "y_log": False, # True, False
-    "period": "W",
-}
-ARCHITECTURE_PLOT_CHOICES = {
-    "cumulative": "none", # none, mean, sum
-    "y_col": "percent", # percent count
-    "y_log": False, # True, False
-    "period": "W",
-}
-metric_order = [
-    'USA', 'China', 'Germany', 'France', 'International / Online',
-    'Asia', 'Middle East', 'Rest of Europe', 'South America', 'UK',
-    'Africa', 'Other', "User",
-]
-palette = [
-            "#3870f2",
-            "#e74c3c",  # Green (Top 10-100)  # Red (Top 1%)
-            "#f39c12",  # Orange (Top 1-10%)
-            "#3498db",  # Blue (Top 100-1000)
-            "#7C2A50",
-            "#9467bd",
-            "#8c564b",
-            "#e377c2",
-            "#7f7f7f",
-            "#27ae60",
-            "#5ce7f6",
-            "#f0e442",
-            "#c2cbcc",  # Gray (Rest)
-            "#56b4e9",
-        ]
-# Model Market Share Tab
-country_market_share_area = create_stacked_area_chart(
-    derived_country_concentration_df, nat_gini_df, nat_hhi_df, TEMP_MODEL_EVENTS, palette, metric_order
-)
-# Define metric order
-metric_order = [
-    "Top 1",
-    "Top 1 - 10",
-    "Top 10 - 100",
-    "Top 100 - 1000",
-    "Top 1000 - 10000",
-    "Rest",
-]
-model_market_share_area = create_stacked_area_chart(
-    model_topk_df, model_gini_df, model_hhi_df, TEMP_MODEL_EVENTS, PALETTE_0, metric_order
-)
-world_map = create_world_map(
-    filtered_df
-)
-slider = create_range_slider(
-    model_topk_df
-)
-time_slider = dmc.RangeSlider(
-    id="time-slider",
-    min=model_topk_df['time'].min().timestamp(),
-    max=model_topk_df['time'].max().timestamp(),
-    value=[
-        model_topk_df['time'].min().timestamp(),
-        model_topk_df['time'].max().timestamp()
-    ],
-    step=24 * 60 * 60,
-    color="blue",
-    size="md",
-    radius="xl",
-    marks=[
-        {"value": model_topk_df['time'].min().timestamp(), "label": model_topk_df['time'].min().strftime("%b %Y")},
-        {"value": model_topk_df['time'].max().timestamp(), "label": model_topk_df['time'].max().strftime("%b %Y")}
-    ],
-    style={"width": "70%", "margin": "0 auto"},
-    labelAlwaysOn=False,
-)
 # Create a dcc slider for time range selection by year
 created_slider = dcc.Slider(
@@ -160,27 +21,6 @@ created_slider = dcc.Slider(
     updatemode='mouseup',
 )
-# Model Characteristics Tab
-language_concentration_area = create_concentration_chart(
-    language_concentration_df, 'time', 'metric', 'value', LANG_SEGMENT_ORDER, PALETTE_0
-)
-license_concentration_area = create_concentration_chart(
-    license_concentration_df, 'period', 'status', 'percent', LICENSE_SEGMENT_ORDER, PALETTE_0
-)
-download_method_cumsum_line = create_line_plot(
-    download_method_cumsum_df, METHOD_PLOT_CHOICES, PALETTE_0
-)
-download_arch_cumsum_line = create_line_plot(
-    download_arch_cumsum_df, ARCHITECTURE_PLOT_CHOICES, PALETTE_0
-)
-tree_map = generate_model_treemap(
-    filtered_df
-)
 # App layout
 app.layout = dmc.MantineProvider(
     theme={"colorScheme": "light",
@@ -223,61 +63,49 @@ app.layout = dmc.MantineProvider(
             ],
             style={'textAlign': 'center'}
         ),
         html.Div(
             [
-                dcc.Tabs([
-                    dcc.Tab(label='Model Market Share', children=[
-                        html.Div([
-                            html.Div(children='Select time range to update all graphs below:', style={'fontSize': 16, 'marginBottom': 6, 'marginTop': 20}),
-                            time_slider,
-                            html.Div(
-                                id='output-container-range-slider',
-                                style={
-                                    'textAlign': 'center',
-                                    'fontSize': 20,
-                                    'marginBottom': 15,
-                                    'marginTop': 30,
-                                    'backgroundColor': 'white',
-                                    'borderRadius': '12px',
-                                    'boxShadow': '0 2px 12px rgba(0,0,0,0.10)',
-                                    'padding': '18px',
-                                    'display': 'inline-block',
-                                }
-                            ),
-                        ], style={'marginBottom': 12, 'justifyContent': 'center', 'textAlign': 'center'}),
-                        html.Div([
-                            # dcc.Graph(id='stacked-area-chart'),
-                            dcc.Graph(figure=country_market_share_area),
-                        ], style={'marginBottom': 12}),
-                        html.Div([
-                            html.Div(
-                                dcc.Graph(id='world-map-with-slider'),
-                                style={'display': 'flex', 'justifyContent': 'center', 'marginBottom': 0}
-                            ),
-                            created_slider,
-                        ], style={'marginBottom': 12})
-                    ]),
-                    dcc.Tab(label='Leaderboard', children=[
                         create_leaderboard(
-                            filtered_df
                         )
                     ]),
-                    dcc.Tab(label='Model Tree Map', children=[
-                        dcc.Graph(figure=tree_map)
                     ]),
-                    dcc.Tab(label='Model Characteristics',children=[
-                        html.Div([
-                            html.H3("Language Concentration", style={'textAlign': 'center', 'marginBottom': 10}),
-                            dcc.Graph(figure=language_concentration_area),
-                            html.H3("License Distribution", style={'textAlign': 'center', 'marginBottom': 10}),
-                            dcc.Graph(figure=license_concentration_area),
-                            html.H3("Method Trends", style={'textAlign': 'center', 'marginBottom': 10}),
-                            dcc.Graph(figure=download_method_cumsum_line),
-                            html.H3("Architecture Trends", style={'textAlign': 'center', 'marginBottom': 10}),
-                            dcc.Graph(figure=download_arch_cumsum_line),
-                        ], style={'marginBottom': 12}),
                     ]),
-                ])
             ],
             style={
                 'backgroundColor': 'white',
@@ -296,69 +124,33 @@ app.layout = dmc.MantineProvider(
 # Model Market Share Tab
 # On slider change, update output text
-@app.callback(
-    Output('output-container-range-slider', 'children'),
-    [Input('time-slider', 'value')]
-)
-def update_output(value):
-    if value and len(value) == 2:
-        start_time = pd.to_datetime(value[0], unit='s').strftime("%b %d, %Y")
-        end_time = pd.to_datetime(value[1], unit='s').strftime("%b %d, %Y")
-        return f"Selected time range: {start_time} to {end_time}"
-    return "Select a time range"
-# On slider change, update world map
-@app.callback(
-    Output('world-map-with-slider', 'figure'),
-    Input('created-slider', 'value')
-)
-def update_world_map(value):
-    # Filter by created year
-    if value is None:
-        return world_map
-    created_after = f"{int(value)}-01-01"
-    updated_fig = create_world_map(
-        filtered_df,
-        created_after=created_after
-    )
-    return updated_fig
-# On slider change, update leaderboard
 # @app.callback(
-#     Output('leaderboard', 'figure'),
-#     [Input('time-slider', 'relayoutData')]
 # )
-# def update_leaderboard(relayout_data):
-#     if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data:
-#         start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d')
-#         end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d')
-#         updated_fig = create_leaderboard(
-#             country_concentration_df, author_concentration_df, model_concentration_df, start_time=start_time, end_time=end_time
-#         )
-#         updated_fig.update_layout(font_family="Inter")
-#         return updated_fig
-#     else:
-#         return leaderboard
-# On slider change, update stacked area chart
 # @app.callback(
-#     Output('stacked-area-chart', 'figure'),
-#     Input('time-slider', 'value')
 # )
-# def update_stacked_area(value):
-#     if value and len(value) == 2:
-#         start_time = pd.to_datetime(value[0], unit='s').strftime('%Y-%m-%d')
-#         end_time = pd.to_datetime(value[1], unit='s').strftime('%Y-%m-%d')
-#         updated_fig = create_stacked_area_chart(
-#             model_topk_df, model_gini_df, model_hhi_df,
-#             TEMP_MODEL_EVENTS, PALETTE_0,
-#             start_time=start_time, end_time=end_time
-#         )
-#         updated_fig.update_layout(font_family="Inter")
-#         return updated_fig
-#     return model_market_share_area
 @app.callback(
     Output("top_countries-table", "children"),

 from dash import Dash, html, dcc, Input, Output, State
 import pandas as pd
 import dash_mantine_components as dmc
 from graphs.leaderboard import create_leaderboard, get_top_n_leaderboard, render_table, render_table_content
 # Initialize the app
 app = Dash()
 # Load pre-processed data frames
 filtered_df = pd.read_pickle("data_frames/filtered_df.pkl")
 # Create a dcc slider for time range selection by year
 created_slider = dcc.Slider(
     updatemode='mouseup',
 )
 # App layout
 app.layout = dmc.MantineProvider(
     theme={"colorScheme": "light",
             ],
             style={'textAlign': 'center'}
         ),
+        html.Div(
+            children=[
+            dmc.SegmentedControl(
+                        id="segmented",
+                        value="all-time",
+                        data=[
+                            {"value": "one-year", "label": "One Year Window"},
+                            {"value": "all-time", "label": "All Time"},
+                        ],
+                        mb=10,
+                    ),
+            html.Span(id="global-toggle-status", style={"marginLeft": "8px"}),
+            created_slider,
+            html.Div(children='This is an explanation of the filters on this block.', style={'fontSize': 12, 'marginTop': 20, 'marginLeft': 10, 'marginRight': 10}),
+            ],
+            style={
+                'backgroundColor': 'white',
+                'borderRadius': '18px',
+                'boxShadow': '0 4px 24px rgba(0,0,0,0.10)',
+                'padding': '32px',
+                'margin': '32px auto',
+                'maxWidth': '1000px',
+            }
+        ),
         html.Div(
             [
+                dcc.Tabs(children=[  # wrap Tabs here
+                    dcc.Tab(label='Countries', children=[
                         create_leaderboard(
+                            filtered_df, "countries"
                         )
                     ]),
+                    dcc.Tab(label='Developers', children=[
+                        create_leaderboard(
+                            filtered_df, "developers"
+                        )
                     ]),
+                    dcc.Tab(label='Models', children=[
+                        create_leaderboard(
+                            filtered_df, "models"
+                        )
                     ]),
+                ]),
             ],
             style={
                 'backgroundColor': 'white',
 # Model Market Share Tab
 # On slider change, update output text
 # @app.callback(
+#     Output('output-container-range-slider', 'children'),
+#     [Input('time-slider', 'value')]
 # )
+# def update_output(value):
+#     if value and len(value) == 2:
+#         start_time = pd.to_datetime(value[0], unit='s').strftime("%b %d, %Y")
+#         end_time = pd.to_datetime(value[1], unit='s').strftime("%b %d, %Y")
+#         return f"Selected time range: {start_time} to {end_time}"
+#     return "Select a time range"
+# On slider change, update world map
 # @app.callback(
+#     Output('world-map-with-slider', 'figure'),
+#     Input('created-slider', 'value')
 # )
+# def update_world_map(value):
+#     # Filter by created year
+#     if value is None:
+#         return world_map
+#     created_after = f"{int(value)}-01-01"
+#     updated_fig = create_world_map(
+#         filtered_df,
+#         created_after=created_after
+#     )
+#     return updated_fig
 @app.callback(
     Output("top_countries-table", "children"),

data_frames/agg_devsize_downloads.pkl DELETED Viewed

Binary file (24.1 kB)

data_frames/agg_natsize_downloads.pkl DELETED Viewed

Binary file (2.09 kB)

data_frames/derived_country_concentration_df_rolling.pkl DELETED Viewed

Binary file (83 kB)

data_frames/dev_gini_df.pkl DELETED Viewed

Binary file (5.59 kB)

data_frames/dev_hhi_df.pkl DELETED Viewed

Binary file (5.59 kB)

data_frames/dev_topk_df.pkl DELETED Viewed

Binary file (28.3 kB)

data_frames/download_arch_cumsum_df.pkl DELETED Viewed

Binary file (78.6 kB)

data_frames/download_license_cumsum_df.pkl DELETED Viewed

Binary file (64.5 kB)

data_frames/download_method_cumsum_df.pkl DELETED Viewed

Binary file (78.6 kB)

data_frames/download_openness_cumsum_df.pkl DELETED Viewed

Binary file (29.3 kB)

data_frames/language_concentration_df.pkl DELETED Viewed

Binary file (28.4 kB)

data_frames/model_gini_df.pkl DELETED Viewed

Binary file (5.59 kB)

data_frames/model_hhi_df.pkl DELETED Viewed

Binary file (5.59 kB)

data_frames/model_topk_df.pkl DELETED Viewed

Binary file (28.3 kB)

data_frames/nat_gini_df.pkl DELETED Viewed

Binary file (5.59 kB)

data_frames/nat_hhi_df.pkl DELETED Viewed

Binary file (5.59 kB)

data_frames/nat_topk_df.pkl DELETED Viewed

Binary file (28.3 kB)

graphs/__pycache__/model_characteristics.cpython-39.pyc DELETED Viewed

Binary file (2.6 kB)

graphs/__pycache__/model_market_share.cpython-39.pyc DELETED Viewed

Binary file (5.94 kB)

graphs/leaderboard.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import pandas as pd
 from dash import html, dcc
 import base64
 button_style = {
@@ -120,12 +122,16 @@ def df_to_download_link(df, filename):
     b64 = base64.b64encode(csv_string.encode()).decode()
     return html.Div(
         html.A(
-        "Download CSV",
-        id=f"download-{filename}",
-        download=f"{filename}.csv",
-        href=f"data:text/csv;base64,{b64}",
-        target="_blank",
-        style=button_style
         ),
         style={"textAlign": "right"}
     )
@@ -223,7 +229,7 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
     download_top["Total Value"] = download_top["Total Value"].astype(int)
     download_top["% of total"] = download_top["% of total"].round(2)
-    top["Name"].replace("User", "user", inplace=True)
     # All relevant metadata columns
     meta_cols = meta_cols_map.get(group_col, [])
@@ -301,14 +307,14 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
         return download_info
     # Apply metadata builder to top dataframe
-    top["Metadata"] = top["Name"].map(build_metadata)
     download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]]
     download_info_df = pd.DataFrame(download_info_list)
     download_top = pd.concat([download_top, download_info_df], axis=1)
     return top[["Name", "Metadata", "% of total"]], download_top
-def create_leaderboard(filtered_df, start_time=None, top_n=10):
     # Filter by time
     if start_time is not None:
         filtered_df = filtered_df[(filtered_df["created"] >= start_time) & (filtered_df["time"] >= start_time)]
@@ -326,11 +332,9 @@ def create_leaderboard(filtered_df, start_time=None, top_n=10):
     top_developers, download_top_developers = get_top_n_leaderboard(filtered_df, "author", top_n)
     top_models, download_top_models = get_top_n_leaderboard(filtered_df, "model", top_n)
-    # Layout with 3 stacked tables
-    layout = html.Div([
-        render_table(top_countries, download_top_countries, "Top Countries", chip_color="#FCE8E6", bar_color="#FF6F61", filename="top_countries"),
-        render_table(top_developers, download_top_developers, "Top Developers", chip_color="#E6F4EA", bar_color="#4CAF50", filename="top_developers"),
-        render_table(top_models, download_top_models, "Top Models", chip_color="#E8F0FE", bar_color="#2196F3", filename="top_models"),
-    ])
-    return layout

 import pandas as pd
 from dash import html, dcc
+from dash_iconify import DashIconify
+import dash_mantine_components as dmc
 import base64
 button_style = {
     b64 = base64.b64encode(csv_string.encode()).decode()
     return html.Div(
         html.A(
+            children=dmc.ActionIcon(
+                DashIconify(icon="mdi:download", width=24),
+                size="lg"
+            ),
+            id=f"download-{filename}",
+            download=f"{filename}.csv",
+            href=f"data:text/csv;base64,{b64}",
+            target="_blank",
+            title="Download CSV",
+            style={"padding": "6px 12px", "display": "inline-flex", "alignItems": "center", "justifyContent": "center"}
         ),
         style={"textAlign": "right"}
     )
     download_top["Total Value"] = download_top["Total Value"].astype(int)
     download_top["% of total"] = download_top["% of total"].round(2)
+    top["Name"].replace("User", "user")
     # All relevant metadata columns
     meta_cols = meta_cols_map.get(group_col, [])
         return download_info
     # Apply metadata builder to top dataframe
+    top["Metadata"] = top["Name"].astype(object).apply(build_metadata)
     download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]]
     download_info_df = pd.DataFrame(download_info_list)
     download_top = pd.concat([download_top, download_info_df], axis=1)
     return top[["Name", "Metadata", "% of total"]], download_top
+def create_leaderboard(filtered_df, board_type, start_time=None, top_n=10):
     # Filter by time
     if start_time is not None:
         filtered_df = filtered_df[(filtered_df["created"] >= start_time) & (filtered_df["time"] >= start_time)]
     top_developers, download_top_developers = get_top_n_leaderboard(filtered_df, "author", top_n)
     top_models, download_top_models = get_top_n_leaderboard(filtered_df, "model", top_n)
+    if board_type == "countries":
+        return render_table(top_countries, download_top_countries, "Top Countries", chip_color="#FCE8E6", bar_color="#FF6F61", filename="top_countries")
+    elif board_type == "developers":
+        return render_table(top_developers, download_top_developers, "Top Developers", chip_color="#E6F4EA", bar_color="#4CAF50", filename="top_developers")
+    else:
+        return render_table(top_models, download_top_models, "Top Models", chip_color="#E8F0FE", bar_color="#2196F3", filename="top_models")

graphs/model_characteristics.py DELETED Viewed

@@ -1,145 +0,0 @@
-import plotly.graph_objects as go
-import plotly.express as px
-def create_concentration_chart(
-    df,
-    period_col,
-    metric_col,
-    value_col,
-    order,
-    palette
-):
-    fig = go.Figure()
-    # Create stacked area traces
-    for i, metric in enumerate(order):
-        metric_data = df[df[metric_col] == metric]
-        # Sort by time and get values
-        metric_data = metric_data.sort_values(period_col)
-        x_vals = metric_data[period_col]
-        y_vals = metric_data[value_col]
-        # Add area trace
-        fig.add_trace(
-            go.Scatter(
-                x=x_vals,
-                y=y_vals,
-                name=metric,
-                mode='lines',
-                line=dict(width=0),
-                fill='tonexty' if i > 0 else 'tozeroy',
-                fillcolor=palette[i % len(palette)],
-                stackgroup='one',
-                hovertemplate='<b>%{fullData.name}</b><br>' +
-                             'Time: %{x}<br>' +
-                             'Value: %{y}<extra></extra>'
-            )
-        )
-    fig.update_layout(
-        autosize=True,
-        font_size=14,
-        showlegend=True,
-        margin=dict(l=60, r=60, t=40, b=80),  # Increased bottom margin
-        plot_bgcolor="white",
-        hovermode="x unified",
-        legend=dict(
-            orientation="h",     # Horizontal legend
-            yanchor="top",       # Anchor the top of the legend box
-            y=-0.25,             # Place it below the plot
-            xanchor="center",
-            x=0.5
-        )
-    )
-    fig.update_xaxes(
-        title_text="",
-        showgrid=True,
-        gridcolor='lightgray',
-        gridwidth=1
-    )
-    fig.update_yaxes(
-        title_text="",
-        showgrid=True,
-        gridcolor='lightgray',
-        gridwidth=1
-    )
-    return fig
-def create_line_plot(
-    df,
-    plot_choices,
-    color_palette=None
-):
-    fig = go.Figure()
-    groups = df['status'].unique()
-    if color_palette is None:
-        color_palette = px.colors.qualitative.Set1
-    for i, group in enumerate(groups):
-        group_data = df[df['status'] == group]
-        group_data = group_data.sort_values('period')
-        x_vals = group_data['period']
-        y_vals = group_data[plot_choices["y_col"]]
-        if plot_choices.get("y_format") == "percent":
-            y_vals = y_vals * 100
-        fig.add_trace(
-            go.Scatter(
-                x=x_vals,
-                y=y_vals,
-                name=group,
-                mode='lines',
-                line=dict(
-                    color=color_palette[i % len(color_palette)],
-                    width=3
-                ),
-                opacity=0.85,
-                hovertemplate='<b>%{fullData.name}</b><br>' +
-                             'Period: %{x}<br>' +
-                             'Value: %{y:.2f}%<extra></extra>' if plot_choices.get("y_format") == "percent"
-                             else '<b>%{fullData.name}</b><br>Period: %{x}<br>Value: %{y}<extra></extra>'
-            )
-        )
-    fig.update_layout(
-        autosize=True,
-        font_size=14,
-        showlegend=True,
-        margin=dict(l=60, r=60, t=40, b=80),  # Increased bottom margin
-        plot_bgcolor="white",
-        hovermode="x unified",
-        legend=dict(
-            orientation="h",     # Horizontal legend
-            yanchor="top",       # Anchor the top of the legend box
-            y=-0.25,             # Place it below the plot
-            xanchor="center",
-            x=0.5
-        )
-    )
-    fig.update_xaxes(
-        title_text="Period",
-        showgrid=False,
-        zeroline=False
-    )
-    y_title = plot_choices["y_col"]
-    if plot_choices.get("y_format") == "percent":
-        y_title += " (%)"
-    fig.update_yaxes(
-        title_text=y_title,
-        showgrid=False,
-        zeroline=False,
-        type='log' if plot_choices.get("y_log") else 'linear'
-    )
-    return fig

graphs/model_market_share.py DELETED Viewed

@@ -1,347 +0,0 @@
-import numpy as np
-import pandas as pd
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-def create_stacked_area_chart(
-    topk_df, gini_df, hhi_df, events, palette, metric_order, start_time=None, end_time=None
-):
-    # Create subplot with secondary y-axis
-    fig = make_subplots(specs=[[{"secondary_y": True}]])
-    # Create stacked area traces
-    for i, metric in enumerate(metric_order):
-        metric_data = topk_df[topk_df["metric"] == metric]
-        # Sort by time and get values
-        metric_data = metric_data.sort_values("time")
-        if start_time:
-            metric_data = metric_data[metric_data["time"] >= start_time]
-        if end_time:
-            metric_data = metric_data[metric_data["time"] <= end_time]
-        x_vals = metric_data["time"]
-        y_vals = metric_data["value"]
-        # Add area trace
-        fig.add_trace(
-            go.Scatter(
-                x=x_vals,
-                y=y_vals,
-                name=metric,
-                mode="lines",
-                line=dict(width=0, color=palette[i % len(palette)]),
-                fill="tonexty" if i > 0 else "tozeroy",
-                fillcolor=palette[i % len(palette)],
-                stackgroup="one",
-                hovertemplate="<b>%{fullData.name}</b><br>"
-                + "Time: %{x}<br>"
-                + "Value: %{y}<extra></extra>",
-            ),
-            secondary_y=False,
-        )
-    # Add overlay lines
-    # Gini Coefficient
-    # gini_data = gini_df.sort_values("time")
-    # if start_time:
-    #     gini_data = gini_data[gini_data["time"] >= start_time]
-    # if end_time:
-    #     gini_data = gini_data[gini_data["time"] <= end_time]
-    # fig.add_trace(
-    #     go.Scatter(
-    #         x=gini_data["time"],
-    #         y=gini_data["value"],
-    #         name="Gini Coefficient",
-    #         mode="lines",
-    #         line=dict(color="#6b46c1", width=3),
-    #         yaxis="y2",
-    #         hovertemplate="<b>Gini Coefficient</b><br>"
-    #         + "Time: %{x}<br>"
-    #         + "Value: %{y:.3f}<extra></extra>",
-    #     ),
-    #     secondary_y=True,
-    # )
-    # # HHI (×10)
-    # hhi_data = hhi_df.sort_values("time")
-    # if start_time:
-    #     hhi_data = hhi_data[hhi_data["time"] >= start_time]
-    # if end_time:
-    #     hhi_data = hhi_data[hhi_data["time"] <= end_time]
-    # fig.add_trace(
-    #     go.Scatter(
-    #         x=hhi_data["time"],
-    #         y=hhi_data["value"] * 10,
-    #         name="HHI (×10)",
-    #         mode="lines",
-    #         line=dict(color="#ec4899", width=3),
-    #         yaxis="y2",
-    #         hovertemplate="<b>HHI (×10)</b><br>"
-    #         + "Time: %{x}<br>"
-    #         + "Value: %{y:.3f}<extra></extra>",
-    #     ),
-    #     secondary_y=True,
-    # )
-    # Add vertical lines for events
-    for event_name, event_date in events.items():
-        fig.add_shape(
-            type="line",
-            x0=event_date,
-            x1=event_date,
-            y0=0,
-            y1=1,
-            yref="paper",
-            line=dict(color="#333333", width=2, dash="dash"),
-        )
-        fig.add_annotation(
-            x=event_date,
-            y=0.5,
-            yref="paper",
-            text=event_name,
-            showarrow=False,
-            xshift=-10,
-            yshift=10,
-            font=dict(size=12, color="black"),
-            textangle=270,
-            align="left",
-        )
-    fig.update_layout(
-        autosize=True,
-        font_size=14,
-        showlegend=True,
-        margin=dict(l=60, r=60, t=40, b=80),  # Increased bottom margin
-        plot_bgcolor="white",
-        hovermode="x unified",
-        legend=dict(
-            orientation="h",     # Horizontal legend
-            yanchor="top",       # Anchor the top of the legend box
-            y=-0.25,             # Place it below the plot
-            xanchor="center",
-            x=0.5
-        )
-    )
-    # Update x-axis to be governed by start_time/end_time
-    xaxis_range = None
-    if start_time is not None and end_time is not None:
-        xaxis_range = [start_time, end_time]
-    elif start_time is not None:
-        xaxis_range = [start_time, None]
-    elif end_time is not None:
-        xaxis_range = [None, end_time]
-    fig.update_xaxes(
-        title_text="",
-        showgrid=True,
-        gridcolor="lightgray",
-        gridwidth=1,
-        range=xaxis_range,
-    )
-    # Update primary y-axis (left)
-    fig.update_yaxes(
-        title_text="National Concentration (%)",
-        showgrid=True,
-        gridcolor="lightgray",
-        gridwidth=1,
-        secondary_y=False,
-    )
-    # Update secondary y-axis (right)
-    fig.update_yaxes(
-        title_text="Concentration Indices", showgrid=False, secondary_y=True
-    )
-    return fig
-def create_world_map(
-    df, top_n_labels=20, created_after=None
-):
-    # Create a filtered_df with only countries
-    df = df[df['org_country_single'] != 'HF']
-    df = df[df['org_country_single'] != 'Online']
-    df = df[df['org_country_single'] != 'International']
-    df = df[df['org_country_single'] != 'user']
-    # Filter out models created after 2024-01-01 and downloads after 2024-01-01
-    if created_after:
-        df = df[df['created'] > created_after]
-        df = df[df['time'] > created_after]
-    # Country code mapping
-    country_code_map = {
-        "Germany": "DEU",
-        "United States of America": "USA",
-        "China": "CHN",
-        "France": "FRA",
-        "India": "IND",
-        "Israel": "ISR",
-        "South Korea": "KOR",
-        "United Kingdom": "GBR",
-        "Switzerland": "CHE",
-        "United Arab Emirates": "ARE",
-        "Vietnam": "VNM",
-        "Singapore": "SGP",
-        "Chile": "CHL",
-        "Hong Kong": "HKG",
-        "Japan": "JPN",
-        "Canada": "CAN",
-        "Spain": "ESP",
-        "Finland": "FIN",
-        "Indonesia": "IDN",
-        "Russia": "RUS",
-        "Iran": "IRN",
-        "Belarus": "BLR",
-        "Thailand": "THA",
-        "UAE": "ARE",
-        "Argentina": "ARG",
-        "Iceland": "ISL",
-        "Poland": "POL",
-        "Sweden": "SWE",
-        "Taiwan": "TWN",
-        "Lebanon": "LBN",
-        "Algeria": "DZA",
-        "Bulgaria": "BGR",
-        "Norway": "NOR",
-        "Netherlands": "NLD",
-        "Hungary": "HUN",
-        "Estonia": "EST",
-        "Qatar": "QAT",
-        "Brazil": "BRA",
-        "Morocco": "MAR",
-        "Slovenia": "SVN",
-        "Ghana": "GHA",
-        "Uganda": "UGA",
-        "Turkey": "TUR",
-    }
-    df["country_code"] = df["org_country_single"].map(country_code_map)
-    df = df.dropna(subset=["country_code"])
-    # Fix country plot
-    fig = make_subplots(
-        rows=1,
-        cols=1,
-        specs=[[{"type": "geo"}]],
-    )
-    downloads_by_country = (
-        df.groupby(['org_country_single', 'country_code'])['downloads']
-        .sum()
-        .reset_index()
-    )
-    # Prepare top countries for annotation
-    total_downloads = float(downloads_by_country['downloads'].sum())
-    downloads_by_country['pct'] = (downloads_by_country['downloads'] / total_downloads * 100.0)
-    # Create hover text
-    hover_text = []
-    for _, row in downloads_by_country.iterrows():
-        hover_text.append(
-            f"<b>{row['org_country_single']}</b><br>"
-            f"Avg Downloads: {row['pct']:.1f}% of total<br>"
-        )
-    linear_ticks = [0.01, 0.1, 10, 50, 100]  # percent values
-    log_ticks = np.log10(linear_ticks) # what you're actually plotting
-    # Add choropleth to plot
-    fig.add_trace(
-        go.Choropleth(
-            locations=downloads_by_country["country_code"],
-            z=np.log10(downloads_by_country["pct"]),
-            text=hover_text,
-            hovertemplate="%{text}<extra></extra>",
-            colorscale=[
-                "#001219",
-                "#0a9396",
-                "#94d2bd",
-                "#e9d8a6",
-                "#ee9b00",
-                "#ca6702",
-                "#bb3e03",
-                "#9b2226",
-            ],
-            colorbar=dict(
-                title="Avg % of Total Downloads",
-                tickvals=log_ticks, # positions in log space
-                ticktext=[f"{t}%" for t in linear_ticks],  # labels shown
-                tickfont=dict(size=12),
-                len=0.6,
-                x=1.02,
-                y=0.7,
-            ),
-            marker_line_color="#ffffff",
-            marker_line_width=1.5,
-            geo="geo",
-        ),
-        row=1,
-        col=1,
-    )
-    # Update layout
-    fig.update_layout(
-        title=dict(
-            text="Model Downloads by Country",
-            x=0.5,
-            font=dict(size=20),
-            pad=dict(t=10),
-        ),
-        width=1200,
-        height=700,  # Increased height for a larger map
-        plot_bgcolor="#ffffff",
-        paper_bgcolor="#ffffff",
-    )
-    # Update geo layout
-    fig.update_geos(
-        showframe=False,
-        showland=True,
-        landcolor="#d0cfcf",
-        coastlinecolor="#b8b8b8",
-        projection_type="natural earth",
-        bgcolor="#ffffff",
-    )
-    return fig
-def create_range_slider(df):
-    if df.empty or "time" not in df.columns:
-        return go.Figure()
-    times = sorted(df["time"].unique())
-    fig = go.Figure()
-    # Invisible trace just to attach slider to the x-axis
-    fig.add_trace(
-        go.Scatter(
-            x=times,
-            y=[0] * len(times),
-            mode="lines",
-            line=dict(color="rgba(0,0,0,0)"),  # Invisible line
-            hoverinfo="skip",
-            showlegend=False
-        )
-    )
-    # Enable range slider
-    fig.update_layout(
-        xaxis=dict(
-            rangeslider=dict(visible=False),
-            type="date"
-        ),
-        yaxis=dict(visible=False),
-        margin=dict(t=20, b=20, l=20, r=20),
-        height=100
-    )
-    return fig

graphs/tree.py DELETED Viewed

@@ -1,142 +0,0 @@
-import plotly.express as px
-import pandas as pd
-PALETTE_0 = [
-    "#335C67",
-    "#FFF3B0",
-    "#E09F3E",
-    "#9E2A2B",
-    "#540B0E"
-]
-def generate_model_treemap(df, parent_col='merged_derived_from', child_col='model', value_col='downloads'):
-    # filtered_df[parent_col] = filtered_df[parent_col].apply(lambda x: str(x[0]) if isinstance(x, list) and x else None)
-    df = pd.read_pickle('data_frames/filtered_tree_df.pkl')
-    # Filter out nan, No parent, and Unsure
-    df = df[~df[parent_col].isin([None, "['Unsure']", 'nan'])]
-    # Find all models that act as a parent
-    parent_models = set(df[parent_col].dropna())
-    # Assign empty parent only if row has no parent and is not itself a parent
-    df[parent_col] = df[parent_col].where(
-        df[parent_col].notna() | df[child_col].isin(parent_models),
-        other=""
-    )
-    fig = px.treemap(
-        df,
-        path=[parent_col, child_col],
-        values=value_col,
-        hover_data=['author', 'estimated_parameters', 'created'],
-        color=value_col,
-        color_continuous_scale='Viridis'
-    )
-    fig.update_layout(
-        height=1200,  # make the plot tall
-        margin=dict(t=50, l=25, r=25, b=25)  # add some breathing room
-    )
-    return fig
-# def generate_model_treemap(df, parent_col='merged_derived_from', child_col='model', value_col='downloads'):
-#     # iterate over the rows and stringify the lists in 'merged_derived_from'
-#     df.to_pickle('filtered_tree_df.pkl')
-#     fig = px.icicle(
-#         df,
-#         path=[parent_col, child_col],
-#         values=value_col,
-#         hover_data=['author', 'estimated_parameters', 'created'],
-#         color=value_col,
-#         color_continuous_scale='Viridis'
-#     )
-#     fig.update_layout(
-#         height=1400,
-#         margin=dict(t=50, l=25, r=25, b=25)
-#     )
-#     return fig
-# import plotly.graph_objects as go
-# import networkx as nx
-# import pandas as pd
-# def generate_model_treemap(df, parent_col='merged_derived_from', child_col='model',
-#                         value_col='downloads', top_n=1000):
-#     # Fill missing parents
-#     df[parent_col] = str(df[parent_col][0])
-#     # Keep only top_n by downloads
-#     df = df.sort_values(value_col, ascending=False).head(top_n)
-#     # Build directed graph
-#     G = nx.DiGraph()
-#     for _, row in df.iterrows():
-#         parent = row[parent_col]
-#         child = row[child_col]
-#         G.add_edge(parent, child, weight=row.get(value_col, 1))
-#     # Layout positions (smaller k → tighter graph)
-#     pos = nx.spring_layout(G, k=0.3, seed=42)
-#     # Edges
-#     edge_x, edge_y = [], []
-#     for parent, child in G.edges():
-#         x0, y0 = pos[parent]
-#         x1, y1 = pos[child]
-#         edge_x += [x0, x1, None]
-#         edge_y += [y0, y1, None]
-#     edge_trace = go.Scatter(
-#         x=edge_x, y=edge_y,
-#         line=dict(width=0.8, color="#888"),
-#         hoverinfo="none",
-#         mode="lines"
-#     )
-#     # Nodes
-#     node_x, node_y, sizes, texts = [], [], [], []
-#     for node in G.nodes():
-#         x, y = pos[node]
-#         node_x.append(x)
-#         node_y.append(y)
-#         downloads = df.loc[df[child_col] == node, value_col].sum()
-#         sizes.append(max(10, downloads**0.3))
-#         texts.append(f"{node}<br>Downloads: {downloads}")
-#     node_trace = go.Scatter(
-#         x=node_x, y=node_y,
-#         mode="markers+text",
-#         text=[n for n in G.nodes()],
-#         textposition="top center",
-#         hovertext=texts,
-#         hoverinfo="text",
-#         marker=dict(
-#             showscale=True,
-#             colorscale="Viridis",
-#             color=sizes,
-#             size=sizes,
-#             colorbar=dict(
-#                 thickness=15,
-#                 title=f"{value_col} (scaled)",
-#                 xanchor="left",
-#             ),
-#             line_width=2
-#         )
-#     )
-#     return go.Figure(data=[edge_trace, node_trace],
-#                      layout=go.Layout(
-#                          title=f"Model Tree (Top {top_n} by {value_col})",
-#                          showlegend=False,
-#                          hovermode="closest",
-#                          margin=dict(b=20, l=5, r=5, t=40),
-#                          xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
-#                          yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
-#                      ))