Spaces:

economies-open-ai
/

open-model-evolution

Running

App Files Files Community

emsesc commited on Aug 24

Commit

fd38574

0 Parent(s):

initial charts

Browse files

Files changed (21) hide show

.gitignore +2 -0
app.py +113 -0
data_frames/agg_devsize_downloads.pkl +0 -0
data_frames/agg_natsize_downloads.pkl +0 -0
data_frames/dev_gini_df.pkl +0 -0
data_frames/dev_hhi_df.pkl +0 -0
data_frames/dev_topk_df.pkl +0 -0
data_frames/download_arch_cumsum_df.pkl +0 -0
data_frames/download_license_cumsum_df.pkl +0 -0
data_frames/download_method_cumsum_df.pkl +0 -0
data_frames/download_openness_cumsum_df.pkl +0 -0
data_frames/language_concentration_df.pkl +0 -0
data_frames/model_gini_df.pkl +0 -0
data_frames/model_hhi_df.pkl +0 -0
data_frames/model_topk_df.pkl +0 -0
data_frames/nat_gini_df.pkl +0 -0
data_frames/nat_hhi_df.pkl +0 -0
data_frames/nat_topk_df.pkl +0 -0
graphs/model_characteristics.py +83 -0
graphs/model_market_share.py +143 -0
requirements.txt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ venv/*
2	+ __pycache__/*

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+# Import packages
+from dash import Dash, html, dcc, callback, Input, Output
+import pandas as pd
+import pickle
+import plotly.express as px
+from graphs.model_market_share import create_plotly_stacked_area_chart
+from graphs.model_characteristics import create_plotly_language_concentration_chart
+# Incorporate data
+df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder2007.csv')
+# Initialize the app
+app = Dash()
+# Load all pickle files in data_frames/ as loop
+with open('data_frames/model_topk_df.pkl', 'rb') as f:
+    model_topk_df = pickle.load(f)
+with open('data_frames/model_gini_df.pkl', 'rb') as f:
+    model_gini_df = pickle.load(f)
+with open('data_frames/model_hhi_df.pkl', 'rb') as f:
+    model_hhi_df = pickle.load(f)
+with open('data_frames/language_concentration_df.pkl', 'rb') as f:
+    language_concentration_df = pickle.load(f)
+with open('data_frames/download_license_cumsum_df.pkl', 'rb') as f:
+    license_concentration_df = pickle.load(f)
+TEMP_MODEL_EVENTS = {
+    # "Yolo World Mirror": "2024-03-01",
+    "Llama 3": "2024-04-17",
+    "Stable Cascade": "2024-02-02",
+    "Stable Diffusion 3": "2024-05-30",
+    # "embed/upscale": "2023-03-24",
+    "DeepSeek-R1": "2025-01-20",
+    "Gemma-3 12B QAT": "2025-04-15", # gemma-3-12b-it-qat-4bit
+    # "Qwen": "2025-03-05",
+    # "Flux RedFlux": "2025-04-12",
+    # "DeepSeek-V3": "2025-03-24",
+    # "bloom": "2022-05-19",
+    "DALLE2-PyTorch": "2022-06-25",
+    "Stable Diffusion": "2022-08-10",
+    "CLIP ViT": "2021-01-05",
+    "YOLOv8": "2023-04-26",
+    "Sentence Transformer MiniLM v2": "2021-08-30",
+}
+PALETTE_0 = [
+    "#335C67",
+    "#FFF3B0",
+    "#E09F3E",
+    "#9E2A2B",
+    "#540B0E"
+]
+fig = create_plotly_stacked_area_chart(
+    model_topk_df, model_gini_df, model_hhi_df, TEMP_MODEL_EVENTS, PALETTE_0
+)
+LANG_SEGMENT_ORDER = [
+    'Monolingual: EN', 'Monolingual: HR', 'Monolingual: M/LR',
+    'Multilingual: HR', 'Multilingual', 'Unknown',
+]
+fig2 = create_plotly_language_concentration_chart(
+    language_concentration_df, 'time', 'metric', 'value', LANG_SEGMENT_ORDER, PALETTE_0
+)
+LICENSE_SEGMENT_ORDER = [
+    "Open Use", "Open Use (Acceptable Use Policy)", "Open Use (Non-Commercial Only)", "Attribution",
+    "Acceptable Use Policy", "Non-Commercial Only", "Undocumented", "Undocumented (Acceptable Use Policy)",
+]
+fig3 = create_plotly_language_concentration_chart(
+    license_concentration_df, 'period', 'status', 'percent', LICENSE_SEGMENT_ORDER, PALETTE_0
+)
+# Make global font family
+fig.update_layout(font_family="Inter")
+fig2.update_layout(font_family="Inter")
+fig3.update_layout(font_family="Inter")
+# App layout
+app.layout = html.Div(
+    [
+        html.Div(children='Visualizing the Open Model Ecosystem', style={'fontSize': 28, 'fontWeight': 'bold', 'marginBottom': 10}),
+        html.Div(children='An interactive dashboard to explore trends in open models on Hugging Face', style={'fontSize': 16, 'marginBottom': 20}),
+        html.Hr(),
+        dcc.Tabs([
+            dcc.Tab(label='Model Market Share', children=[
+                dcc.Graph(figure=fig, id='stacked-area-chart'),
+            ]),
+            dcc.Tab(label='Model Characteristics', children=[
+                dcc.Graph(id='language-concentration-chart'),
+                html.Div([
+                    dcc.Dropdown(['Language Concentration', 'Architecture', 'License', 'Method'], 'Language Concentration', id='dropdown'),
+                ]),
+            ]),
+        ])
+    ],
+    style={'fontFamily': 'Inter'}
+)
+# On dropdown change, update graph
+@app.callback(
+    Output('language-concentration-chart', 'figure'),
+    [Input('dropdown', 'value')]
+)
+def update_graph(selected_metric):
+    if selected_metric == 'Language Concentration':
+        return fig2
+    elif selected_metric == 'License':
+        return fig3
+# Run the app
+if __name__ == '__main__':
+    app.run(debug=True)

data_frames/agg_devsize_downloads.pkl ADDED Viewed

Binary file (24.1 kB). View file

data_frames/agg_natsize_downloads.pkl ADDED Viewed

Binary file (2.09 kB). View file

data_frames/dev_gini_df.pkl ADDED Viewed

Binary file (5.59 kB). View file

data_frames/dev_hhi_df.pkl ADDED Viewed

Binary file (5.59 kB). View file

data_frames/dev_topk_df.pkl ADDED Viewed

Binary file (28.3 kB). View file

data_frames/download_arch_cumsum_df.pkl ADDED Viewed

Binary file (78.6 kB). View file

data_frames/download_license_cumsum_df.pkl ADDED Viewed

Binary file (64.5 kB). View file

data_frames/download_method_cumsum_df.pkl ADDED Viewed

Binary file (78.6 kB). View file

data_frames/download_openness_cumsum_df.pkl ADDED Viewed

Binary file (29.3 kB). View file

data_frames/language_concentration_df.pkl ADDED Viewed

Binary file (28.4 kB). View file

data_frames/model_gini_df.pkl ADDED Viewed

Binary file (5.59 kB). View file

data_frames/model_hhi_df.pkl ADDED Viewed

Binary file (5.59 kB). View file

data_frames/model_topk_df.pkl ADDED Viewed

Binary file (28.3 kB). View file

data_frames/nat_gini_df.pkl ADDED Viewed

Binary file (5.59 kB). View file

data_frames/nat_hhi_df.pkl ADDED Viewed

Binary file (5.59 kB). View file

data_frames/nat_topk_df.pkl ADDED Viewed

Binary file (28.3 kB). View file

graphs/model_characteristics.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import plotly.graph_objects as go
+def create_plotly_language_concentration_chart(
+    language_concentration_df,
+    period_col,
+    metric_col,
+    value_col,
+    LANG_SEGMENT_ORDER,
+    PALETTE_0
+):
+    """
+    Convert the language concentration visualization to Plotly
+    """
+    # Create figure
+    fig = go.Figure()
+    # Get unique time periods
+    time_periods = sorted(language_concentration_df[period_col].unique())
+    # Create stacked area traces
+    for i, metric in enumerate(LANG_SEGMENT_ORDER):
+        metric_data = language_concentration_df[language_concentration_df[metric_col] == metric]
+        # Sort by time and get values
+        metric_data = metric_data.sort_values(period_col)
+        x_vals = metric_data[period_col]
+        y_vals = metric_data[value_col]
+        # Add area trace
+        fig.add_trace(
+            go.Scatter(
+                x=x_vals,
+                y=y_vals,
+                name=metric,
+                mode='lines',
+                line=dict(width=0),
+                fill='tonexty' if i > 0 else 'tozeroy',
+                fillcolor=PALETTE_0[i % len(PALETTE_0)],
+                stackgroup='one',
+                hovertemplate='<b>%{fullData.name}</b><br>' +
+                             'Time: %{x}<br>' +
+                             'Value: %{y}<extra></extra>'
+            )
+        )
+    # Update layout
+    fig.update_layout(
+        width=1000,
+        height=200,
+        font_family="Times New Roman",
+        font_size=14,
+        showlegend=True,  # Show legend for language concentration
+        legend=dict(
+            title="Language Concentration",
+            orientation="v",
+            yanchor="top",
+            y=1,
+            xanchor="left",
+            x=1.02
+        ),
+        margin=dict(l=60, r=150, t=40, b=60),  # Extra right margin for legend
+        plot_bgcolor='white',
+        hovermode='x unified'
+    )
+    # Update x-axis
+    fig.update_xaxes(
+        title_text="",
+        showgrid=True,
+        gridcolor='lightgray',
+        gridwidth=1
+    )
+    # Update y-axis
+    fig.update_yaxes(
+        title_text="",
+        showgrid=True,
+        gridcolor='lightgray',
+        gridwidth=1
+    )
+    return fig

graphs/model_market_share.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+def create_plotly_stacked_area_chart(
+    model_topk_df,
+    model_gini_df,
+    model_hhi_df,
+    TEMP_MODEL_EVENTS,
+    PALETTE_0
+):
+    """
+    Convert the visualization_util stacked area chart to Plotly
+    """
+    # Create subplot with secondary y-axis
+    fig = make_subplots(specs=[[{"secondary_y": True}]])
+    # Define metric order
+    metric_order = ['Top 1', 'Top 1 - 10', 'Top 10 - 100', 'Top 100 - 1000', 'Top 1000 - 10000', 'Rest']
+    # Get unique time periods
+    time_periods = sorted(model_topk_df['time'].unique())
+    # Create stacked area traces
+    for i, metric in enumerate(metric_order):
+        metric_data = model_topk_df[model_topk_df['metric'] == metric]
+        # Sort by time and get values
+        metric_data = metric_data.sort_values('time')
+        x_vals = metric_data['time']
+        y_vals = metric_data['value']
+        # Add area trace
+        fig.add_trace(
+            go.Scatter(
+                x=x_vals,
+                y=y_vals,
+                name=metric,
+                mode='lines',
+                line=dict(width=0, color=PALETTE_0[i % len(PALETTE_0)]),
+                fill='tonexty' if i > 0 else 'tozeroy',
+                fillcolor=PALETTE_0[i % len(PALETTE_0)],  # Add opacity
+                stackgroup='one',
+                hovertemplate='<b>%{fullData.name}</b><br>' +
+                             'Time: %{x}<br>' +
+                             'Value: %{y}<extra></extra>'
+            ),
+            secondary_y=False
+        )
+    # Add overlay lines
+    # Gini Coefficient
+    gini_data = model_gini_df.sort_values('time')
+    fig.add_trace(
+        go.Scatter(
+            x=gini_data['time'],
+            y=gini_data['value'],
+            name='Gini Coefficient',
+            mode='lines',
+            line=dict(color='#6b46c1', width=3),
+            yaxis='y2',
+            hovertemplate='<b>Gini Coefficient</b><br>' +
+                         'Time: %{x}<br>' +
+                         'Value: %{y:.3f}<extra></extra>'
+        ),
+        secondary_y=True
+    )
+    # HHI (×10)
+    hhi_data = model_hhi_df.sort_values('time')
+    fig.add_trace(
+        go.Scatter(
+            x=hhi_data['time'],
+            y=hhi_data['value'] * 10,  # Multiply by 10 as indicated
+            name='HHI (×10)',
+            mode='lines',
+            line=dict(color='#ec4899', width=3),
+            yaxis='y2',
+            hovertemplate='<b>HHI (×10)</b><br>' +
+                         'Time: %{x}<br>' +
+                         'Value: %{y:.3f}<extra></extra>'
+        ),
+        secondary_y=True
+    )
+    # Add vertical lines for events
+    for event_name, event_date in TEMP_MODEL_EVENTS.items():
+        fig.add_shape(
+            type="line",
+            x0=event_date, x1=event_date,
+            y0=0, y1=1,
+            yref="paper",
+            line=dict(color='#333333', width=2, dash='dash')
+        )
+        # Add annotation for the event
+        fig.add_annotation(
+            x=event_date,
+            y=1,
+            yref="paper",
+            text=event_name,
+            showarrow=False,
+            yshift=10,
+            font=dict(size=12)
+        )
+    # Update layout
+    fig.update_layout(
+        width=1000,
+        height=200,
+        font_family="Inter",
+        font_size=14,
+        showlegend=False,  # Set to True if you want to show legend
+        margin=dict(l=60, r=60, t=40, b=60),
+        plot_bgcolor='white',
+        hovermode='x unified'
+    )
+    # Update x-axis
+    fig.update_xaxes(
+        title_text="",
+        showgrid=True,
+        gridcolor='lightgray',
+        gridwidth=1
+    )
+    # Update primary y-axis (left)
+    fig.update_yaxes(
+        title_text="Model Market Share",
+        showgrid=True,
+        gridcolor='lightgray',
+        gridwidth=1,
+        secondary_y=False
+    )
+    # Update secondary y-axis (right)
+    fig.update_yaxes(
+        title_text="Concentration Indices",
+        showgrid=False,
+        secondary_y=True
+    )
+    return fig

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+pandas
+dash
+plotly