|
|
import plotly.graph_objects as go |
|
|
from plotly.subplots import make_subplots |
|
|
import pandas as pd |
|
|
|
|
|
filtered_df = pd.read_pickle("data_frames/filtered_df.pkl") |
|
|
|
|
|
def create_stacked_area_chart( |
|
|
topk_df, gini_df, hhi_df, events, palette, start_time=None, end_time=None |
|
|
): |
|
|
|
|
|
|
|
|
fig = make_subplots(specs=[[{"secondary_y": True}]]) |
|
|
|
|
|
|
|
|
metric_order = [ |
|
|
"Top 1", |
|
|
"Top 1 - 10", |
|
|
"Top 10 - 100", |
|
|
"Top 100 - 1000", |
|
|
"Top 1000 - 10000", |
|
|
"Rest", |
|
|
] |
|
|
|
|
|
|
|
|
for i, metric in enumerate(metric_order): |
|
|
metric_data = topk_df[topk_df["metric"] == metric] |
|
|
|
|
|
|
|
|
metric_data = metric_data.sort_values("time") |
|
|
if start_time: |
|
|
metric_data = metric_data[metric_data["time"] >= start_time] |
|
|
if end_time: |
|
|
metric_data = metric_data[metric_data["time"] <= end_time] |
|
|
|
|
|
x_vals = metric_data["time"] |
|
|
y_vals = metric_data["value"] |
|
|
|
|
|
|
|
|
fig.add_trace( |
|
|
go.Scatter( |
|
|
x=x_vals, |
|
|
y=y_vals, |
|
|
name=metric, |
|
|
mode="lines", |
|
|
line=dict(width=0, color=palette[i % len(palette)]), |
|
|
fill="tonexty" if i > 0 else "tozeroy", |
|
|
fillcolor=palette[i % len(palette)], |
|
|
stackgroup="one", |
|
|
hovertemplate="<b>%{fullData.name}</b><br>" |
|
|
+ "Time: %{x}<br>" |
|
|
+ "Value: %{y}<extra></extra>", |
|
|
), |
|
|
secondary_y=False, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
gini_data = gini_df.sort_values("time") |
|
|
if start_time: |
|
|
gini_data = gini_data[gini_data["time"] >= start_time] |
|
|
if end_time: |
|
|
gini_data = gini_data[gini_data["time"] <= end_time] |
|
|
fig.add_trace( |
|
|
go.Scatter( |
|
|
x=gini_data["time"], |
|
|
y=gini_data["value"], |
|
|
name="Gini Coefficient", |
|
|
mode="lines", |
|
|
line=dict(color="#6b46c1", width=3), |
|
|
yaxis="y2", |
|
|
hovertemplate="<b>Gini Coefficient</b><br>" |
|
|
+ "Time: %{x}<br>" |
|
|
+ "Value: %{y:.3f}<extra></extra>", |
|
|
), |
|
|
secondary_y=True, |
|
|
) |
|
|
|
|
|
|
|
|
hhi_data = hhi_df.sort_values("time") |
|
|
if start_time: |
|
|
hhi_data = hhi_data[hhi_data["time"] >= start_time] |
|
|
if end_time: |
|
|
hhi_data = hhi_data[hhi_data["time"] <= end_time] |
|
|
fig.add_trace( |
|
|
go.Scatter( |
|
|
x=hhi_data["time"], |
|
|
y=hhi_data["value"] * 10, |
|
|
name="HHI (×10)", |
|
|
mode="lines", |
|
|
line=dict(color="#ec4899", width=3), |
|
|
yaxis="y2", |
|
|
hovertemplate="<b>HHI (×10)</b><br>" |
|
|
+ "Time: %{x}<br>" |
|
|
+ "Value: %{y:.3f}<extra></extra>", |
|
|
), |
|
|
secondary_y=True, |
|
|
) |
|
|
|
|
|
|
|
|
for event_name, event_date in events.items(): |
|
|
fig.add_shape( |
|
|
type="line", |
|
|
x0=event_date, |
|
|
x1=event_date, |
|
|
y0=0, |
|
|
y1=1, |
|
|
yref="paper", |
|
|
line=dict(color="#333333", width=2, dash="dash"), |
|
|
) |
|
|
|
|
|
|
|
|
fig.add_annotation( |
|
|
x=event_date, |
|
|
y=1, |
|
|
yref="paper", |
|
|
text=event_name, |
|
|
showarrow=False, |
|
|
yshift=10, |
|
|
font=dict(size=12), |
|
|
) |
|
|
|
|
|
fig.update_layout( |
|
|
autosize=True, |
|
|
font_size=14, |
|
|
showlegend=True, |
|
|
margin=dict(l=60, r=60, t=40, b=60), |
|
|
plot_bgcolor="white", |
|
|
hovermode="x unified", |
|
|
) |
|
|
|
|
|
|
|
|
xaxis_range = None |
|
|
if start_time is not None and end_time is not None: |
|
|
xaxis_range = [start_time, end_time] |
|
|
elif start_time is not None: |
|
|
xaxis_range = [start_time, None] |
|
|
elif end_time is not None: |
|
|
xaxis_range = [None, end_time] |
|
|
|
|
|
fig.update_xaxes( |
|
|
title_text="", |
|
|
showgrid=True, |
|
|
gridcolor="lightgray", |
|
|
gridwidth=1, |
|
|
range=xaxis_range, |
|
|
) |
|
|
|
|
|
|
|
|
fig.update_yaxes( |
|
|
title_text="Model Market Share", |
|
|
showgrid=True, |
|
|
gridcolor="lightgray", |
|
|
gridwidth=1, |
|
|
secondary_y=False, |
|
|
) |
|
|
|
|
|
|
|
|
fig.update_yaxes( |
|
|
title_text="Concentration Indices", showgrid=False, secondary_y=True |
|
|
) |
|
|
|
|
|
return fig |
|
|
|
|
|
|
|
|
def create_world_map( |
|
|
df, time_col="time", metric_col="metric", value_col="value", top_n_labels=10, start_time=None, end_time=None |
|
|
): |
|
|
|
|
|
times = sorted(df[time_col].unique()) |
|
|
|
|
|
|
|
|
country_code_map = { |
|
|
"Germany": "DEU", |
|
|
"United States of America": "USA", |
|
|
"China": "CHN", |
|
|
"France": "FRA", |
|
|
"India": "IND", |
|
|
"Israel": "ISR", |
|
|
"South Korea": "KOR", |
|
|
"United Kingdom": "GBR", |
|
|
"Switzerland": "CHE", |
|
|
"United Arab Emirates": "ARE", |
|
|
"Vietnam": "VNM", |
|
|
"Singapore": "SGP", |
|
|
"Chile": "CHL", |
|
|
"Hong Kong": "HKG", |
|
|
"Japan": "JPN", |
|
|
"Canada": "CAN", |
|
|
"Spain": "ESP", |
|
|
"Finland": "FIN", |
|
|
"Indonesia": "IDN", |
|
|
"Russia": "RUS", |
|
|
"Iran": "IRN", |
|
|
"Belarus": "BLR", |
|
|
"Thailand": "THA", |
|
|
"UAE": "ARE", |
|
|
"Argentina": "ARG", |
|
|
"Iceland": "ISL", |
|
|
"Poland": "POL", |
|
|
"Sweden": "SWE", |
|
|
"Taiwan": "TWN", |
|
|
"Lebanon": "LBN", |
|
|
"Algeria": "DZA", |
|
|
"Bulgaria": "BGR", |
|
|
"Norway": "NOR", |
|
|
"Netherlands": "NLD", |
|
|
"Hungary": "HUN", |
|
|
"Estonia": "EST", |
|
|
"Qatar": "QAT", |
|
|
"Brazil": "BRA", |
|
|
"Morocco": "MAR", |
|
|
"Slovenia": "SVN", |
|
|
"Ghana": "GHA", |
|
|
"Uganda": "UGA", |
|
|
"Turkey": "TUR", |
|
|
} |
|
|
|
|
|
df["country_code"] = df[metric_col].map(country_code_map) |
|
|
mapped_data = df.dropna(subset=["country_code"]) |
|
|
|
|
|
fig = make_subplots( |
|
|
rows=1, |
|
|
cols=1, |
|
|
specs=[[{"type": "geo"}]], |
|
|
) |
|
|
|
|
|
|
|
|
def aggregate_time_range(start_time, end_time): |
|
|
range_data = mapped_data[ |
|
|
(mapped_data[time_col] >= start_time) & (mapped_data[time_col] <= end_time) |
|
|
] |
|
|
|
|
|
agg_data = ( |
|
|
range_data.groupby([metric_col, "country_code"])[value_col] |
|
|
.mean() |
|
|
.reset_index() |
|
|
) |
|
|
agg_data["percentage"] = agg_data[value_col] * 100 |
|
|
return agg_data.sort_values("percentage", ascending=False) |
|
|
|
|
|
|
|
|
if start_time is None: |
|
|
start_time = times[0] |
|
|
if end_time is None: |
|
|
end_time = times[-1] |
|
|
initial_data = aggregate_time_range(start_time, end_time) |
|
|
|
|
|
|
|
|
|
|
|
hover_text = [] |
|
|
for _, row in initial_data.iterrows(): |
|
|
hover_text.append( |
|
|
f"<b>{row[metric_col]}</b><br>" |
|
|
f"Avg Downloads: {row['percentage']:.1f}% of total<br>" |
|
|
f"Avg Value: {row[value_col]:.6f}" |
|
|
) |
|
|
|
|
|
|
|
|
fig.add_trace( |
|
|
go.Choropleth( |
|
|
locations=initial_data["country_code"], |
|
|
z=initial_data["percentage"], |
|
|
text=hover_text, |
|
|
hovertemplate="%{text}<extra></extra>", |
|
|
colorscale=[ |
|
|
"#001219", |
|
|
"#0a9396", |
|
|
"#94d2bd", |
|
|
"#e9d8a6", |
|
|
"#ee9b00", |
|
|
"#ca6702", |
|
|
"#bb3e03", |
|
|
"#9b2226", |
|
|
], |
|
|
colorbar=dict( |
|
|
title="Avg % of Total Downloads", |
|
|
tickfont=dict(size=12), |
|
|
len=0.6, |
|
|
x=1.02, |
|
|
y=0.7, |
|
|
), |
|
|
marker_line_color="#ffffff", |
|
|
marker_line_width=1.5, |
|
|
geo="geo", |
|
|
), |
|
|
row=1, |
|
|
col=1, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fig.update_layout( |
|
|
title=dict( |
|
|
text="Model Downloads by Country", |
|
|
x=0.5, |
|
|
font=dict(size=20), |
|
|
), |
|
|
width=1200, |
|
|
height=800, |
|
|
plot_bgcolor="#ffffff", |
|
|
paper_bgcolor="#ffffff", |
|
|
margin=dict(l=0, r=120, t=100, b=60), |
|
|
) |
|
|
|
|
|
|
|
|
fig.update_geos( |
|
|
showframe=False, |
|
|
showland=True, |
|
|
landcolor="#d0cfcf", |
|
|
coastlinecolor="#b8b8b8", |
|
|
projection_type="natural earth", |
|
|
bgcolor="#ffffff", |
|
|
) |
|
|
|
|
|
return fig |
|
|
|
|
|
def create_range_slider(df): |
|
|
if df.empty or "time" not in df.columns: |
|
|
return go.Figure() |
|
|
|
|
|
times = sorted(df["time"].unique()) |
|
|
fig = go.Figure() |
|
|
|
|
|
|
|
|
fig.add_trace( |
|
|
go.Scatter( |
|
|
x=times, |
|
|
y=[0] * len(times), |
|
|
mode="lines", |
|
|
line=dict(color="rgba(0,0,0,0)"), |
|
|
hoverinfo="skip", |
|
|
showlegend=False |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
fig.update_layout( |
|
|
xaxis=dict( |
|
|
rangeslider=dict(visible=False), |
|
|
type="date" |
|
|
), |
|
|
yaxis=dict(visible=False), |
|
|
margin=dict(t=20, b=20, l=20, r=20), |
|
|
height=100 |
|
|
) |
|
|
|
|
|
return fig |
|
|
|
|
|
def create_leaderboard(country_df, developer_df, model_df, start_time=None, end_time=None, top_n=10): |
|
|
|
|
|
country_emoji_map = { |
|
|
"United States of America": "🇺🇸", |
|
|
"China": "🇨🇳", |
|
|
"Germany": "🇩🇪", |
|
|
"France": "🇫🇷", |
|
|
"India": "🇮🇳", |
|
|
"Italy": "🇮🇹", |
|
|
"Japan": "🇯🇵", |
|
|
"South Korea": "🇰🇷", |
|
|
"United Kingdom": "🇬🇧", |
|
|
"Canada": "🇨🇦", |
|
|
"Brazil": "🇧🇷", |
|
|
"Australia": "🇦🇺", |
|
|
"Unknown": "❓", |
|
|
"Finland": "🇫🇮", |
|
|
"Lebanon": "🇱🇧 ", |
|
|
} |
|
|
|
|
|
|
|
|
country_df["time"] = pd.to_datetime(country_df["time"]) |
|
|
developer_df["time"] = pd.to_datetime(developer_df["time"]) |
|
|
model_df["time"] = pd.to_datetime(model_df["time"]) |
|
|
|
|
|
|
|
|
|
|
|
developer_df = developer_df.merge( |
|
|
filtered_df[["author", "country"]].drop_duplicates(), |
|
|
left_on="metric", |
|
|
right_on="author", |
|
|
how="left" |
|
|
).rename(columns={"country": "country_metric"}).drop(columns=["author"]) |
|
|
model_df = model_df.merge( |
|
|
filtered_df[["model", "country"]].drop_duplicates(), |
|
|
left_on="metric", |
|
|
right_on="model", |
|
|
how="left" |
|
|
).rename(columns={"country": "country_metric"}).drop(columns=["model"]) |
|
|
|
|
|
if start_time is None: |
|
|
start_time = country_df["time"].min() |
|
|
if end_time is None: |
|
|
end_time = country_df["time"].max() |
|
|
|
|
|
|
|
|
country_df_filtered = country_df[ |
|
|
(country_df["time"] >= start_time) & (country_df["time"] <= end_time) |
|
|
] |
|
|
developer_df_filtered = developer_df[ |
|
|
(developer_df["time"] >= start_time) & (developer_df["time"] <= end_time) |
|
|
] |
|
|
model_df_filtered = model_df[ |
|
|
(model_df["time"] >= start_time) & (model_df["time"] <= end_time) |
|
|
] |
|
|
|
|
|
if country_df_filtered.empty and developer_df_filtered.empty and model_df_filtered.empty: |
|
|
return go.Figure() |
|
|
|
|
|
|
|
|
def get_top_n_leaderboard(df, group_col, label, top_n=10): |
|
|
top = ( |
|
|
df.groupby(group_col)["value"] |
|
|
.sum() |
|
|
.sort_values(ascending=False) |
|
|
.head(top_n) |
|
|
.reset_index() |
|
|
.rename(columns={group_col: label, "value": "Total Value"}) |
|
|
) |
|
|
total_value = top["Total Value"].sum() |
|
|
if total_value > 0: |
|
|
top["% of total"] = top["Total Value"] / total_value * 100 |
|
|
else: |
|
|
top["% of total"] = 0 |
|
|
|
|
|
|
|
|
if label == "Country": |
|
|
top["Attributes"] = top[label].map(country_emoji_map).fillna("") |
|
|
else: |
|
|
|
|
|
top = top.merge( |
|
|
df[[group_col, "country_metric"]].drop_duplicates(), |
|
|
left_on=label, |
|
|
right_on=group_col, |
|
|
how="left" |
|
|
).drop(columns=[group_col]) |
|
|
top["Attributes"] = top["country_metric"].map(country_emoji_map).fillna("") |
|
|
return top[[label, "Attributes", "% of total"]] |
|
|
|
|
|
top_countries = get_top_n_leaderboard(country_df_filtered, "metric", "Country", top_n=top_n) |
|
|
top_developers = get_top_n_leaderboard(developer_df_filtered, "metric", "Developer", top_n=top_n) |
|
|
top_models = get_top_n_leaderboard(model_df_filtered, "metric", "Model", top_n=top_n) |
|
|
|
|
|
|
|
|
fig = make_subplots( |
|
|
rows=1, cols=3, |
|
|
subplot_titles=("Top Countries", "Top Developers", "Top Models"), |
|
|
specs=[[{"type": "table"}, {"type": "table"}, {"type": "table"}]] |
|
|
) |
|
|
|
|
|
|
|
|
fig.add_trace( |
|
|
go.Table( |
|
|
header=dict(values=list(top_countries.columns), |
|
|
fill_color="lightgrey", align="left"), |
|
|
cells=dict(values=[top_countries[col] for col in top_countries.columns], |
|
|
fill_color="white", align="left"), |
|
|
), |
|
|
row=1, col=1 |
|
|
) |
|
|
|
|
|
|
|
|
fig.add_trace( |
|
|
go.Table( |
|
|
header=dict(values=list(top_developers.columns), |
|
|
fill_color="lightgrey", align="left"), |
|
|
cells=dict(values=[top_developers[col] for col in top_developers.columns], |
|
|
fill_color="white", align="left"), |
|
|
), |
|
|
row=1, col=2 |
|
|
) |
|
|
|
|
|
|
|
|
fig.add_trace( |
|
|
go.Table( |
|
|
header=dict(values=list(top_models.columns), |
|
|
fill_color="lightgrey", align="left"), |
|
|
cells=dict(values=[top_models[col] for col in top_models.columns], |
|
|
fill_color="white", align="left"), |
|
|
), |
|
|
row=1, col=3 |
|
|
) |
|
|
|
|
|
fig.update_layout( |
|
|
height=400, |
|
|
showlegend=False, |
|
|
title_text="Leaderboards" |
|
|
) |
|
|
|
|
|
return fig |
|
|
|
|
|
|