BI-dashboard / charts.py
antonymilne's picture
Clean up imports and CSS formatting
a72697d
raw
history blame
12.4 kB
"""Collection of custom charts."""
import calendar
import pandas as pd
import plotly.graph_objects as go
import vizro.plotly.express as px
from data_processing import COLUMN_TO_AGGFUNC, COLUMN_TO_METRIC, LAST_YEAR, THIS_YEAR, make_customer_sales_pareto_df
from vizro.models.types import capture
CURRENT_YEAR = 2017
PREVIOUS_YEAR = 2016
PRIMARY_COLOR = "#2251ff"
SECONDARY_COLOR = "#A0A2A8"
ORANGE_COLOR = "#f6c343"
GREEN_COLOR = "#60c96c"
YEAR_COLOR_MAP = {"This year": PRIMARY_COLOR, "Last year": SECONDARY_COLOR}
RED_COLOR = "#f17e7e"
DIVERGING_RED_GREEN = [
"#a84545",
"#b82525",
"#e33b3b",
"#f17e7e",
"#e6d7bc",
"#75f0e7",
"#0bdacb",
"#13b8ab",
"#108980",
]
DIVERGING_RED_BLUE = [
"#a84545",
"#b82525",
"#e33b3b",
"#f17e7e",
"#e6e6e6",
"#99c4ff",
"#2251ff",
"#061f79",
"#051c2c", # highest value
]
# TODO: modify Vizro chart template instead of putting colors here? Put colors in colors SimpleNamespace?
# Helper functions
def _aggregate_data(data_frame, group_columns, column):
"""Aggregate data using the appropriate function for the column."""
return data_frame.groupby(group_columns, as_index=False).agg({column: COLUMN_TO_AGGFUNC[column]})
# TODO: move to data_processing?
def _relabel_years(data_frame):
"""Relabel years for human-readable plotting."""
data_frame["Year"] = data_frame["Year"].map({THIS_YEAR: "This year", LAST_YEAR: "Last year"})
return data_frame
def _create_year_comparison_bar_chart(data_frame, group_column, column, title_suffix):
"""Helper function to create a grouped bar chart comparing years."""
grouped_df = _aggregate_data(data_frame, [group_column, "Year"], column)
grouped_df = _relabel_years(grouped_df)
fig = px.bar(
grouped_df,
x=group_column,
y=column,
color="Year",
barmode="group",
title=f"{COLUMN_TO_METRIC[column]} | {title_suffix}",
color_discrete_map=YEAR_COLOR_MAP,
)
fig.update_layout(xaxis_title=None, yaxis_title=None, bargap=0.4, showlegend=False)
return fig
# Chart functions in order of usage in app.py
@capture("graph")
def overview_by_month(data_frame, column):
grouped_df = _aggregate_data(data_frame, ["Year", "Month"], column)
# Relabel for plotting in human-readable way.
grouped_df["Month"] = grouped_df["Month"].map({i: calendar.month_abbr[i] for i in range(1, 13)})
grouped_df = _relabel_years(grouped_df)
fig = px.line(
grouped_df,
x="Month",
color="Year",
y=column,
markers=True,
title=f"{COLUMN_TO_METRIC[column]} | By Month",
color_discrete_map=YEAR_COLOR_MAP,
)
fig.data[1].update(line_width=2, fill="tozeroy")
fig.update_layout(
xaxis={"showgrid": False, "title": None, "range": [-0.1, 11.1]},
yaxis_title=None,
legend={"yanchor": "top", "y": 1.2, "xanchor": "right", "x": 1, "title": None},
)
return fig
@capture("graph")
def overview_by_order_status(data_frame, column):
grouped_df = _aggregate_data(data_frame, "Order Status", column)
fig = px.pie(
grouped_df,
names="Order Status",
values=column,
color="Order Status",
title=f"{COLUMN_TO_METRIC[column]} | By Order Status",
color_discrete_map={"In Transit": PRIMARY_COLOR, "Processing": ORANGE_COLOR, "Delivered": GREEN_COLOR},
hole=0.6,
)
fig.update_layout(
legend={"yanchor": "bottom", "y": -0.2, "xanchor": "right", "orientation": "v"},
)
return fig
@capture("graph")
def overview_by_region(data_frame, column):
grouped_df = _aggregate_data(data_frame, "Region", column).sort_values(column, ascending=True)
fig = px.bar(
grouped_df,
x=column,
y="Region",
orientation="h",
title=f"{COLUMN_TO_METRIC[column]} | By Region",
color_discrete_sequence=[PRIMARY_COLOR],
)
# Add scatter markers on top to create lollipop effect
fig.add_trace(
go.Scatter(
x=grouped_df[column],
y=grouped_df["Region"],
mode="markers",
marker={"size": 14, "color": PRIMARY_COLOR, "line": {"color": PRIMARY_COLOR, "width": 1.5}},
showlegend=False,
)
)
fig.update_layout(
xaxis_title=None,
yaxis_title=None,
bargap=0.8,
showlegend=False,
)
fig.update_traces(hoverinfo="skip", selector={"type": "bar"})
return fig
@capture("graph")
def overview_by_customer_segment(data_frame, column):
"""Bar chart comparing current year vs previous year by customer segment."""
return _create_year_comparison_bar_chart(data_frame, "Segment", column, "By Customer Segment")
@capture("graph")
def overview_by_product_category(data_frame, column):
"""Bar chart comparing current year vs previous year by category."""
return _create_year_comparison_bar_chart(data_frame, "Category", column, "By Product Category")
# TODO: label with dollars, change color scale depending on metric.
@capture("graph")
def regions_map_chart(data_frame, custom_data, column):
grouped_df = _aggregate_data(data_frame, ["State Code", "Region"], column)
fig = px.choropleth(
grouped_df,
locations="State Code",
locationmode="USA-states",
color=column,
hover_name="Region",
hover_data=column,
scope="usa",
custom_data=custom_data,
color_continuous_scale=DIVERGING_RED_BLUE,
color_continuous_midpoint=0,
)
fig.update_layout(
margin={"l": 10, "r": 10, "t": 10, "b": 10},
)
fig.update_coloraxes(colorbar={"thickness": 10, "title": {"side": "bottom"}, "orientation": "h", "x": 0.5, "y": 0})
return fig
# TODO: label with dollars
@capture("graph")
def regions_top_n_chart(data_frame, x, y, n):
top_df = data_frame.groupby(y).agg({x: COLUMN_TO_AGGFUNC[x]}).sort_values(x).head(n).reset_index()
fig = px.bar(top_df, x=x, y=y, orientation="h", color_discrete_sequence=[PRIMARY_COLOR], title=f"Top {n} by {x}")
return fig
# TODO: check and refactor
@capture("graph")
def bar_chart_by_category(data_frame, custom_data):
"""Custom bar chart made with Plotly."""
if not data_frame["Category"].eq(data_frame["Category"].iloc[0]).all():
x = "Category"
else:
x = "Sub-Category"
fig = px.bar(
data_frame,
x=x,
y="Sales",
title=f"Sales | By {x} <br><sup> 💡 Click on the category to drill-down to sub-category. "
f"Reset by using reset button next to the theme switch.</sup>",
custom_data=custom_data,
color_discrete_sequence=[PRIMARY_COLOR],
)
fig.update_layout(
yaxis={"visible": False},
showlegend=False,
bargap=0.6,
xaxis_title=None,
yaxis_title=None,
)
return fig
# TODO: check and refactor
@capture("graph")
def scatter_with_quadrants(
data_frame: pd.DataFrame,
x: str,
y: str,
custom_data: list[str],
highlight_sub_category=None,
):
"""Custom scatter plot with quadrants grouped by Sub-Category."""
fig = px.scatter(
data_frame=data_frame,
x=x,
y=y,
custom_data=custom_data,
color="Sub-Category",
color_discrete_sequence=[SECONDARY_COLOR],
size="Profit Absolute",
size_max=20,
opacity=0.7,
hover_data=["Sub-Category", "Profit", "Sales", "Profit Margin"],
title="Profit vs. Sales by Sub-Category",
)
if highlight_sub_category is not None:
mask = data_frame["Sub-Category"] == highlight_sub_category
if mask.any():
profit_values = data_frame.loc[mask, "Profit Absolute"]
max_profit = data_frame["Profit Absolute"].max()
marker_sizes = (profit_values / max_profit * 40).clip(lower=6)
fig.add_scatter(
x=data_frame.loc[mask, x],
y=data_frame.loc[mask, y],
mode="markers+text",
text=[highlight_sub_category],
marker={"color": ORANGE_COLOR, "size": marker_sizes, "line": {"width": 2, "color": "black"}},
hovertext=highlight_sub_category,
textposition="bottom center",
)
# Reference lines (based on all data)
x_reference_line = data_frame[x].quantile(0.5)
y_reference_line = data_frame[y].quantile(0.2)
fig.add_hline(y=y_reference_line, line_dash="dash", line_color=SECONDARY_COLOR)
fig.add_vline(x=x_reference_line, line_dash="dash", line_color=SECONDARY_COLOR)
# Quadrant shading
fig.add_shape(
type="rect",
xref="x",
yref="y",
x0=x_reference_line,
y0=data_frame[y].max() + 700,
x1=data_frame[x].max() + 700,
y1=y_reference_line,
fillcolor=PRIMARY_COLOR,
line_width=0,
opacity=0.8,
layer="below",
)
fig.add_shape(
type="rect",
xref="x",
yref="y",
x0=data_frame[x].min(),
y0=y_reference_line,
x1=x_reference_line,
y1=data_frame[y].max() + 700,
fillcolor=PRIMARY_COLOR,
line_width=0,
opacity=0.4,
layer="below",
)
fig.add_shape(
type="rect",
xref="x",
yref="y",
x0=data_frame[x].min(),
y0=data_frame[y].min() - 700,
x1=x_reference_line,
y1=y_reference_line,
fillcolor=RED_COLOR,
line_width=0,
opacity=0.8,
layer="below",
)
fig.add_shape(
type="rect",
xref="x",
yref="y",
x0=x_reference_line,
y0=data_frame[y].min() - 700,
x1=data_frame[x].max() + 700,
y1=y_reference_line,
fillcolor=RED_COLOR,
line_width=0,
opacity=0.4,
layer="below",
)
# Hover and legend tweaks
fig.update_traces(
showlegend=False,
hovertemplate="<br>".join(
[
"%{customdata[0]}",
"Profit: %{y:$,.2f}",
"Sales: %{x:$,.2f}",
]
)
+ "<extra></extra>",
)
fig.update_layout(
title_pad_t=24,
legend_title_text="Sub-Category",
legend={"itemsizing": "trace", "orientation": "h", "yanchor": "bottom", "y": -0.3},
)
return fig
@capture("graph")
def pareto_customers_chart(data_frame, highlight_customer=None):
# Need to run make_customer_sales_pareto_df after Filters are applied, hence it's inside this custom chart
# rather than passing a pareto df into this function.
data_frame = make_customer_sales_pareto_df(data_frame)
x, y = "% of Total Customers", "Cumulative % of Sales"
# Add origin point (0, 0) to the start of the dataframe so line joins to origin.
origin_row = pd.DataFrame(
{x: [0], y: [0], "Customer Name": [""], "Sales": [0], "Cumulative Sales": [0], "Rank": [0]}
)
data_frame = pd.concat([origin_row, data_frame], ignore_index=True)
fig = px.line(
data_frame,
x=x,
y=y,
markers=True,
title="Pareto Analysis of Customer Sales",
color_discrete_sequence=[ORANGE_COLOR],
)
# Add another trace for the highlighted customers.
if highlight_customer is not None:
highlight_customer_data = data_frame[data_frame["Customer Name"] == highlight_customer]
if not highlight_customer_data.empty:
highlight_customer_data = highlight_customer_data.iloc[0]
fig.add_scatter(
x=[highlight_customer_data[x]],
y=[highlight_customer_data[y]],
mode="markers+text",
textposition="top right",
text=[highlight_customer_data["Customer Name"]],
marker=dict(size=12, color=ORANGE_COLOR, line=dict(width=2)),
)
thresholds = [0, 20, 50, 100]
opacities = [0.6, 0.3, 0.1]
# Add coloured background for different thresholds.
for (x0, x1), opacity in zip(zip(thresholds, thresholds[1:]), opacities):
fig.add_shape(type="rect", x0=x0, x1=x1, opacity=opacity)
fig.update_shapes(y0=0, y1=100, fillcolor=PRIMARY_COLOR, layer="below", line_width=0)
fig.update_layout(showlegend=False, yaxis={"range": [0, 102]}, xaxis={"range": [0, 102]})
return fig