| from collections import namedtuple |
| import altair as alt |
| import plotly.express as px |
| import math |
| import polars as pl |
| import streamlit as st |
| |
|
|
| @st.cache_resource |
| def get_values(_df, col_name): |
| return _df.collect()[col_name].unique() |
|
|
|
|
| @st.cache_resource |
| def filter_df(_df, stores:list, weeks:list, categories=[]) -> pl.DataFrame: |
| return _df.filter((pl.col("STORE_ID").is_in(stores)) & |
| (pl.col("comp_week").is_in(weeks)) & |
| (pl.col("SCAN_TYPE")==("GTIN")) & |
| (pl.col("CATEGORY").is_in(categories))) |
|
|
|
|
| def get_weeks(week_num, year, prior_weeks): |
| return [i + (year-2022) * 52 for i in range(week_num - prior_weeks, week_num +1)] |
|
|
|
|
| |
| def get_rank(_df, group_by=["comp_week", "SKUPOS_DESCRIPTION"], rank_by="QUANTITY", rank_to=5, desc=True): |
| return _df.group_by(group_by).agg([pl.col(rank_by).sum().alias(rank_by), pl.col("BRAND").first()])\ |
| .with_columns(pl.col(rank_by).rank("dense", descending=desc).over("comp_week").alias("rank"))\ |
| .filter(pl.col("rank")<=rank_to) |
|
|
|
|
| def get_brand(_df, desc=True): |
| return _df.filter(pl.col("SKUPOS_DESCRIPTION").is_not_null()).group_by("SKUPOS_DESCRIPTION").agg(pl.col("QUANTITY").sum(), pl.col("TOTAL_REVENUE_AMOUNT").sum())\ |
| .sort("TOTAL_REVENUE_AMOUNT", descending=desc).collect() |
|
|
|
|
| def get_avg_sales(weeks, brand): |
| daily.filter(pl.col("comp_week").is_in(weeks) & (pl.col("BRAND")==brand)).group_by(brand).agg(pl.col("QUANTITY").mean()) |
|
|
| @st.cache_data |
| def get_avg_sales_delta(df, week_start, week_end, brand): |
| values = df.filter((pl.col("comp_week").is_in([week_start, week_end])) & (pl.col("BRAND")==brand)).group_by("BRAND", "comp_week").agg(pl.col("QUANTITY").mean()).collect()["QUANTITY"] |
| if len(values)<2: |
| return "invalid" |
| else: |
| return values[1] - values[0] |
|
|
| @st.cache_data |
| def get_unsold_weeks(_df, count_weeks, week, year): |
| return _df.group_by("GTIN", "comp_week").agg(pl.col("SKUPOS_DESCRIPTION").first())\ |
| .group_by("GTIN").agg(pl.col("comp_week").count().alias("weeks_with_sales"), pl.col("SKUPOS_DESCRIPTION").first())\ |
| .with_columns(pl.lit(count_weeks).sub(pl.col("weeks_with_sales")).alias("num_0_weeks")) |
|
|
|
|
|
|
|
|
| if 'filter_cats' not in st.session_state: |
| st.session_state.filter_cats = False |
| if 'categories' not in st.session_state: |
| st.session_state.categories = [] |
|
|
|
|
|
|
| daily = pl.scan_parquet('data/cstore_transactions_daily_agg.parquet') |
| |
| daily = daily.with_columns(pl.col("DATE").dt.year().sub(2022).mul(pl.lit(52)).add(pl.col("WEEk")).alias("comp_week")) |
| category_types = get_values(daily, "CATEGORY") |
| store_ids = get_values(daily, "STORE_ID") |
|
|
|
|
|
|
|
|
|
|
| tab1, tab2 = st.tabs(["Products", "Customers"]) |
|
|
| with tab1: |
| main, side = st.columns([4, 1]) |
| main.header("Top Performers") |
| |
| |
| |
| side.markdown("Filter By") |
| stores = side.multiselect("Stores", store_ids) |
| year = side.slider("Year", min_value=2022, max_value=2024, value=2023) |
| week = side.slider("Week", min_value=1, max_value=52, value=36) |
| weekly_range = side.slider("Previous Weeks", min_value=0, max_value=20, value=4) |
| top_products = side.slider("Top # Products", min_value=1, max_value=10, value=3) |
| categories = side.multiselect("Categories", category_types) |
|
|
|
|
| if stores != []: |
|
|
|
|
| weeks = get_weeks(week, year, weekly_range) |
| if categories == []: |
| categories = category_types |
| df = filter_df(daily, stores, weeks, categories) |
|
|
|
|
| ranked = get_rank(df, rank_to=top_products) |
| col1, col2, col3 = main.columns([1, 1, 1]) |
| topbrand = get_brand(df) |
| if len(topbrand): |
| col1.metric(label=f"Top Product", value=topbrand["SKUPOS_DESCRIPTION"][0]) |
| if weekly_range: |
| col2.metric(label=f"Avg Weekly Sales", value=topbrand["QUANTITY"][0]//weekly_range) |
| col3.metric(label="Avg Weekly Revenue", value=f"${round(topbrand['TOTAL_REVENUE_AMOUNT'][0]//weekly_range, 2)}") |
| else: |
| col2.metric(label=f"Avg Weekly Quanitity Sold", value=topbrand["QUANTITY"][0]) |
| col3.metric(label="Avg Weekly Revenue", value=f"${round(topbrand['TOTAL_REVENUE_AMOUNT'][0], 2)}") |
|
|
| fig = px.line( |
| ranked.sort(["comp_week", "SKUPOS_DESCRIPTION"]).collect().to_pandas(), |
| x="comp_week", |
| y="QUANTITY", |
| line_group="SKUPOS_DESCRIPTION", |
| color="SKUPOS_DESCRIPTION", |
| markers=True, |
| hover_data=["BRAND"] |
| ) |
|
|
| fig.update_xaxes(dtick=1) |
| main.plotly_chart(fig) |
| |
| else: |
| main.markdown("We don't seem to have data for that week/store(s). Please choose another week or store.") |
|
|
| |
|
|
| |
| |
| else: |
| main.markdown("Please Select at least 1 Store Id") |
|
|
| main.header("Under Performing Products") |
| unsold = get_unsold_weeks(df, weekly_range, week, year).collect() |
|
|
| main.metric("Count of Items with no Sales", unsold.filter(pl.col("num_0_weeks")==0).shape[0]) |
| |
|
|
| fig2 = px.histogram( |
| unsold, |
| x="num_0_weeks" |
| ) |
|
|
| main.plotly_chart(fig2) |
|
|
| see_zero = main.checkbox("See list of Unsold Items") |
| if see_zero: |
| main.markdown([i for i in unsold.filter(pl.col("num_0_weeks")==0)["SKUPOS_DESCRIPTION"]]) |
| |
| |
|
|
|
|
|
|
|
|
|
|