Spaces:
Running
Running
| """Collection of custom charts.""" | |
| import calendar | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| import vizro.plotly.express as px | |
| from data_processing import COLUMN_TO_AGGFUNC, COLUMN_TO_METRIC, LAST_YEAR, THIS_YEAR, make_customer_sales_pareto_df | |
| from vizro.models.types import capture | |
| CURRENT_YEAR = 2017 | |
| PREVIOUS_YEAR = 2016 | |
| PRIMARY_COLOR = "#2251ff" | |
| SECONDARY_COLOR = "#A0A2A8" | |
| ORANGE_COLOR = "#f6c343" | |
| GREEN_COLOR = "#60c96c" | |
| YEAR_COLOR_MAP = {"This year": PRIMARY_COLOR, "Last year": SECONDARY_COLOR} | |
| RED_COLOR = "#f17e7e" | |
| DIVERGING_RED_GREEN = [ | |
| "#a84545", | |
| "#b82525", | |
| "#e33b3b", | |
| "#f17e7e", | |
| "#e6d7bc", | |
| "#75f0e7", | |
| "#0bdacb", | |
| "#13b8ab", | |
| "#108980", | |
| ] | |
| DIVERGING_RED_BLUE = [ | |
| "#a84545", | |
| "#b82525", | |
| "#e33b3b", | |
| "#f17e7e", | |
| "#e6e6e6", | |
| "#99c4ff", | |
| "#2251ff", | |
| "#061f79", | |
| "#051c2c", # highest value | |
| ] | |
| # TODO: modify Vizro chart template instead of putting colors here? Put colors in colors SimpleNamespace? | |
| # Helper functions | |
| def _aggregate_data(data_frame, group_columns, column): | |
| """Aggregate data using the appropriate function for the column.""" | |
| return data_frame.groupby(group_columns, as_index=False).agg({column: COLUMN_TO_AGGFUNC[column]}) | |
| # TODO: move to data_processing? | |
| def _relabel_years(data_frame): | |
| """Relabel years for human-readable plotting.""" | |
| data_frame["Year"] = data_frame["Year"].map({THIS_YEAR: "This year", LAST_YEAR: "Last year"}) | |
| return data_frame | |
| def _create_year_comparison_bar_chart(data_frame, group_column, column, title_suffix): | |
| """Helper function to create a grouped bar chart comparing years.""" | |
| grouped_df = _aggregate_data(data_frame, [group_column, "Year"], column) | |
| grouped_df = _relabel_years(grouped_df) | |
| fig = px.bar( | |
| grouped_df, | |
| x=group_column, | |
| y=column, | |
| color="Year", | |
| barmode="group", | |
| title=f"{COLUMN_TO_METRIC[column]} | {title_suffix}", | |
| color_discrete_map=YEAR_COLOR_MAP, | |
| ) | |
| fig.update_layout(xaxis_title=None, yaxis_title=None, bargap=0.4, showlegend=False) | |
| return fig | |
| # Chart functions in order of usage in app.py | |
| def overview_by_month(data_frame, column): | |
| grouped_df = _aggregate_data(data_frame, ["Year", "Month"], column) | |
| # Relabel for plotting in human-readable way. | |
| grouped_df["Month"] = grouped_df["Month"].map({i: calendar.month_abbr[i] for i in range(1, 13)}) | |
| grouped_df = _relabel_years(grouped_df) | |
| fig = px.line( | |
| grouped_df, | |
| x="Month", | |
| color="Year", | |
| y=column, | |
| markers=True, | |
| title=f"{COLUMN_TO_METRIC[column]} | By Month", | |
| color_discrete_map=YEAR_COLOR_MAP, | |
| ) | |
| fig.data[1].update(line_width=2, fill="tozeroy") | |
| fig.update_layout( | |
| xaxis={"showgrid": False, "title": None, "range": [-0.1, 11.1]}, | |
| yaxis_title=None, | |
| legend={"yanchor": "top", "y": 1.2, "xanchor": "right", "x": 1, "title": None}, | |
| ) | |
| return fig | |
| def overview_by_order_status(data_frame, column): | |
| grouped_df = _aggregate_data(data_frame, "Order Status", column) | |
| fig = px.pie( | |
| grouped_df, | |
| names="Order Status", | |
| values=column, | |
| color="Order Status", | |
| title=f"{COLUMN_TO_METRIC[column]} | By Order Status", | |
| color_discrete_map={"In Transit": PRIMARY_COLOR, "Processing": ORANGE_COLOR, "Delivered": GREEN_COLOR}, | |
| hole=0.6, | |
| ) | |
| fig.update_layout( | |
| legend={"yanchor": "bottom", "y": -0.2, "xanchor": "right", "orientation": "v"}, | |
| ) | |
| return fig | |
| def overview_by_region(data_frame, column): | |
| grouped_df = _aggregate_data(data_frame, "Region", column).sort_values(column, ascending=True) | |
| fig = px.bar( | |
| grouped_df, | |
| x=column, | |
| y="Region", | |
| orientation="h", | |
| title=f"{COLUMN_TO_METRIC[column]} | By Region", | |
| color_discrete_sequence=[PRIMARY_COLOR], | |
| ) | |
| # Add scatter markers on top to create lollipop effect | |
| fig.add_trace( | |
| go.Scatter( | |
| x=grouped_df[column], | |
| y=grouped_df["Region"], | |
| mode="markers", | |
| marker={"size": 14, "color": PRIMARY_COLOR, "line": {"color": PRIMARY_COLOR, "width": 1.5}}, | |
| showlegend=False, | |
| ) | |
| ) | |
| fig.update_layout( | |
| xaxis_title=None, | |
| yaxis_title=None, | |
| bargap=0.8, | |
| showlegend=False, | |
| ) | |
| fig.update_traces(hoverinfo="skip", selector={"type": "bar"}) | |
| return fig | |
| def overview_by_customer_segment(data_frame, column): | |
| """Bar chart comparing current year vs previous year by customer segment.""" | |
| return _create_year_comparison_bar_chart(data_frame, "Segment", column, "By Customer Segment") | |
| def overview_by_product_category(data_frame, column): | |
| """Bar chart comparing current year vs previous year by category.""" | |
| return _create_year_comparison_bar_chart(data_frame, "Category", column, "By Product Category") | |
| # TODO: label with dollars, change color scale depending on metric. | |
| def regions_map_chart(data_frame, custom_data, column): | |
| grouped_df = _aggregate_data(data_frame, ["State Code", "Region"], column) | |
| fig = px.choropleth( | |
| grouped_df, | |
| locations="State Code", | |
| locationmode="USA-states", | |
| color=column, | |
| hover_name="Region", | |
| hover_data=column, | |
| scope="usa", | |
| custom_data=custom_data, | |
| color_continuous_scale=DIVERGING_RED_BLUE, | |
| color_continuous_midpoint=0, | |
| ) | |
| fig.update_layout( | |
| margin={"l": 10, "r": 10, "t": 10, "b": 10}, | |
| ) | |
| fig.update_coloraxes(colorbar={"thickness": 10, "title": {"side": "bottom"}, "orientation": "h", "x": 0.5, "y": 0}) | |
| return fig | |
| # TODO: label with dollars | |
| def regions_top_n_chart(data_frame, x, y, n): | |
| top_df = data_frame.groupby(y).agg({x: COLUMN_TO_AGGFUNC[x]}).sort_values(x).head(n).reset_index() | |
| fig = px.bar(top_df, x=x, y=y, orientation="h", color_discrete_sequence=[PRIMARY_COLOR], title=f"Top {n} by {x}") | |
| return fig | |
| # TODO: check and refactor | |
| def bar_chart_by_category(data_frame, custom_data): | |
| """Custom bar chart made with Plotly.""" | |
| if not data_frame["Category"].eq(data_frame["Category"].iloc[0]).all(): | |
| x = "Category" | |
| else: | |
| x = "Sub-Category" | |
| fig = px.bar( | |
| data_frame, | |
| x=x, | |
| y="Sales", | |
| title=f"Sales | By {x} <br><sup> 💡 Click on the category to drill-down to sub-category. " | |
| f"Reset by using reset button next to the theme switch.</sup>", | |
| custom_data=custom_data, | |
| color_discrete_sequence=[PRIMARY_COLOR], | |
| ) | |
| fig.update_layout( | |
| yaxis={"visible": False}, | |
| showlegend=False, | |
| bargap=0.6, | |
| xaxis_title=None, | |
| yaxis_title=None, | |
| ) | |
| return fig | |
| # TODO: check and refactor | |
| def scatter_with_quadrants( | |
| data_frame: pd.DataFrame, | |
| x: str, | |
| y: str, | |
| custom_data: list[str], | |
| highlight_sub_category=None, | |
| ): | |
| """Custom scatter plot with quadrants grouped by Sub-Category.""" | |
| fig = px.scatter( | |
| data_frame=data_frame, | |
| x=x, | |
| y=y, | |
| custom_data=custom_data, | |
| color="Sub-Category", | |
| color_discrete_sequence=[SECONDARY_COLOR], | |
| size="Profit Absolute", | |
| size_max=20, | |
| opacity=0.7, | |
| hover_data=["Sub-Category", "Profit", "Sales", "Profit Margin"], | |
| title="Profit vs. Sales by Sub-Category", | |
| ) | |
| if highlight_sub_category is not None: | |
| mask = data_frame["Sub-Category"] == highlight_sub_category | |
| if mask.any(): | |
| profit_values = data_frame.loc[mask, "Profit Absolute"] | |
| max_profit = data_frame["Profit Absolute"].max() | |
| marker_sizes = (profit_values / max_profit * 40).clip(lower=6) | |
| fig.add_scatter( | |
| x=data_frame.loc[mask, x], | |
| y=data_frame.loc[mask, y], | |
| mode="markers+text", | |
| text=[highlight_sub_category], | |
| marker={"color": ORANGE_COLOR, "size": marker_sizes, "line": {"width": 2, "color": "black"}}, | |
| hovertext=highlight_sub_category, | |
| textposition="bottom center", | |
| ) | |
| # Reference lines (based on all data) | |
| x_reference_line = data_frame[x].quantile(0.5) | |
| y_reference_line = data_frame[y].quantile(0.2) | |
| fig.add_hline(y=y_reference_line, line_dash="dash", line_color=SECONDARY_COLOR) | |
| fig.add_vline(x=x_reference_line, line_dash="dash", line_color=SECONDARY_COLOR) | |
| # Quadrant shading | |
| fig.add_shape( | |
| type="rect", | |
| xref="x", | |
| yref="y", | |
| x0=x_reference_line, | |
| y0=data_frame[y].max() + 700, | |
| x1=data_frame[x].max() + 700, | |
| y1=y_reference_line, | |
| fillcolor=PRIMARY_COLOR, | |
| line_width=0, | |
| opacity=0.8, | |
| layer="below", | |
| ) | |
| fig.add_shape( | |
| type="rect", | |
| xref="x", | |
| yref="y", | |
| x0=data_frame[x].min(), | |
| y0=y_reference_line, | |
| x1=x_reference_line, | |
| y1=data_frame[y].max() + 700, | |
| fillcolor=PRIMARY_COLOR, | |
| line_width=0, | |
| opacity=0.4, | |
| layer="below", | |
| ) | |
| fig.add_shape( | |
| type="rect", | |
| xref="x", | |
| yref="y", | |
| x0=data_frame[x].min(), | |
| y0=data_frame[y].min() - 700, | |
| x1=x_reference_line, | |
| y1=y_reference_line, | |
| fillcolor=RED_COLOR, | |
| line_width=0, | |
| opacity=0.8, | |
| layer="below", | |
| ) | |
| fig.add_shape( | |
| type="rect", | |
| xref="x", | |
| yref="y", | |
| x0=x_reference_line, | |
| y0=data_frame[y].min() - 700, | |
| x1=data_frame[x].max() + 700, | |
| y1=y_reference_line, | |
| fillcolor=RED_COLOR, | |
| line_width=0, | |
| opacity=0.4, | |
| layer="below", | |
| ) | |
| # Hover and legend tweaks | |
| fig.update_traces( | |
| showlegend=False, | |
| hovertemplate="<br>".join( | |
| [ | |
| "%{customdata[0]}", | |
| "Profit: %{y:$,.2f}", | |
| "Sales: %{x:$,.2f}", | |
| ] | |
| ) | |
| + "<extra></extra>", | |
| ) | |
| fig.update_layout( | |
| title_pad_t=24, | |
| legend_title_text="Sub-Category", | |
| legend={"itemsizing": "trace", "orientation": "h", "yanchor": "bottom", "y": -0.3}, | |
| ) | |
| return fig | |
| def pareto_customers_chart(data_frame, highlight_customer=None): | |
| # Need to run make_customer_sales_pareto_df after Filters are applied, hence it's inside this custom chart | |
| # rather than passing a pareto df into this function. | |
| data_frame = make_customer_sales_pareto_df(data_frame) | |
| x, y = "% of Total Customers", "Cumulative % of Sales" | |
| # Add origin point (0, 0) to the start of the dataframe so line joins to origin. | |
| origin_row = pd.DataFrame( | |
| {x: [0], y: [0], "Customer Name": [""], "Sales": [0], "Cumulative Sales": [0], "Rank": [0]} | |
| ) | |
| data_frame = pd.concat([origin_row, data_frame], ignore_index=True) | |
| fig = px.line( | |
| data_frame, | |
| x=x, | |
| y=y, | |
| markers=True, | |
| title="Pareto Analysis of Customer Sales", | |
| color_discrete_sequence=[ORANGE_COLOR], | |
| ) | |
| # Add another trace for the highlighted customers. | |
| if highlight_customer is not None: | |
| highlight_customer_data = data_frame[data_frame["Customer Name"] == highlight_customer] | |
| if not highlight_customer_data.empty: | |
| highlight_customer_data = highlight_customer_data.iloc[0] | |
| fig.add_scatter( | |
| x=[highlight_customer_data[x]], | |
| y=[highlight_customer_data[y]], | |
| mode="markers+text", | |
| textposition="top right", | |
| text=[highlight_customer_data["Customer Name"]], | |
| marker=dict(size=12, color=ORANGE_COLOR, line=dict(width=2)), | |
| ) | |
| thresholds = [0, 20, 50, 100] | |
| opacities = [0.6, 0.3, 0.1] | |
| # Add coloured background for different thresholds. | |
| for (x0, x1), opacity in zip(zip(thresholds, thresholds[1:]), opacities): | |
| fig.add_shape(type="rect", x0=x0, x1=x1, opacity=opacity) | |
| fig.update_shapes(y0=0, y1=100, fillcolor=PRIMARY_COLOR, layer="below", line_width=0) | |
| fig.update_layout(showlegend=False, yaxis={"range": [0, 102]}, xaxis={"range": [0, 102]}) | |
| return fig | |