jimmiewoo's picture
Update streamlit.py
b9d5fcf verified
from collections import namedtuple
import altair as alt
import plotly.express as px
import math
import polars as pl
import streamlit as st
# from great_tables import GT, md, html
@st.cache_resource
def get_values(_df, col_name):
return _df.collect()[col_name].unique()
@st.cache_resource
def filter_df(_df, stores:list, weeks:list, categories=[]) -> pl.DataFrame:
return _df.filter((pl.col("STORE_ID").is_in(stores)) &
(pl.col("comp_week").is_in(weeks)) &
(pl.col("SCAN_TYPE")==("GTIN")) &
(pl.col("CATEGORY").is_in(categories)))
def get_weeks(week_num, year, prior_weeks):
return [i + (year-2022) * 52 for i in range(week_num - prior_weeks, week_num +1)]
# @st.cache_resource
def get_rank(_df, group_by=["comp_week", "SKUPOS_DESCRIPTION"], rank_by="QUANTITY", rank_to=5, desc=True):
return _df.group_by(group_by).agg([pl.col(rank_by).sum().alias(rank_by), pl.col("BRAND").first()])\
.with_columns(pl.col(rank_by).rank("dense", descending=desc).over("comp_week").alias("rank"))\
.filter(pl.col("rank")<=rank_to)
def get_brand(_df, desc=True):
return _df.filter(pl.col("SKUPOS_DESCRIPTION").is_not_null()).group_by("SKUPOS_DESCRIPTION").agg(pl.col("QUANTITY").sum(), pl.col("TOTAL_REVENUE_AMOUNT").sum())\
.sort("TOTAL_REVENUE_AMOUNT", descending=desc).collect()
def get_avg_sales(weeks, brand):
daily.filter(pl.col("comp_week").is_in(weeks) & (pl.col("BRAND")==brand)).group_by(brand).agg(pl.col("QUANTITY").mean())
@st.cache_data
def get_avg_sales_delta(df, week_start, week_end, brand):
values = df.filter((pl.col("comp_week").is_in([week_start, week_end])) & (pl.col("BRAND")==brand)).group_by("BRAND", "comp_week").agg(pl.col("QUANTITY").mean()).collect()["QUANTITY"]
if len(values)<2:
return "invalid"
else:
return values[1] - values[0]
@st.cache_data
def get_unsold_weeks(_df, count_weeks, week, year):
return _df.group_by("GTIN", "comp_week").agg(pl.col("SKUPOS_DESCRIPTION").first())\
.group_by("GTIN").agg(pl.col("comp_week").count().alias("weeks_with_sales"), pl.col("SKUPOS_DESCRIPTION").first())\
.with_columns(pl.lit(count_weeks).sub(pl.col("weeks_with_sales")).alias("num_0_weeks"))
if 'filter_cats' not in st.session_state:
st.session_state.filter_cats = False
if 'categories' not in st.session_state:
st.session_state.categories = []
daily = pl.scan_parquet('data/cstore_transactions_daily_agg.parquet')
# adding a "" column
daily = daily.with_columns(pl.col("DATE").dt.year().sub(2022).mul(pl.lit(52)).add(pl.col("WEEk")).alias("comp_week"))
category_types = get_values(daily, "CATEGORY")
store_ids = get_values(daily, "STORE_ID")
tab1, tab2 = st.tabs(["Products", "Customers"])
with tab1:
main, side = st.columns([4, 1])
main.header("Top Performers")
side.markdown("Filter By")
stores = side.multiselect("Stores", store_ids)
year = side.slider("Year", min_value=2022, max_value=2024, value=2023)
week = side.slider("Week", min_value=1, max_value=52, value=36)
weekly_range = side.slider("Previous Weeks", min_value=0, max_value=20, value=4)
top_products = side.slider("Top # Products", min_value=1, max_value=10, value=3)
categories = side.multiselect("Categories", category_types)
if stores != []:
weeks = get_weeks(week, year, weekly_range)
if categories == []:
categories = category_types
df = filter_df(daily, stores, weeks, categories)
ranked = get_rank(df, rank_to=top_products)
col1, col2, col3 = main.columns([1, 1, 1])
topbrand = get_brand(df)
if len(topbrand):
col1.metric(label=f"Top Product", value=topbrand["SKUPOS_DESCRIPTION"][0])
if weekly_range:
col2.metric(label=f"Avg Weekly Sales", value=topbrand["QUANTITY"][0]//weekly_range)
col3.metric(label="Avg Weekly Revenue", value=f"${round(topbrand['TOTAL_REVENUE_AMOUNT'][0]//weekly_range, 2)}")
else:
col2.metric(label=f"Avg Weekly Quanitity Sold", value=topbrand["QUANTITY"][0])
col3.metric(label="Avg Weekly Revenue", value=f"${round(topbrand['TOTAL_REVENUE_AMOUNT'][0], 2)}")
fig = px.line(
ranked.sort(["comp_week", "SKUPOS_DESCRIPTION"]).collect().to_pandas(),
x="comp_week",
y="QUANTITY",
line_group="SKUPOS_DESCRIPTION",
color="SKUPOS_DESCRIPTION",
markers=True,
hover_data=["BRAND"]
)
fig.update_xaxes(dtick=1)
main.plotly_chart(fig)
else:
main.markdown("We don't seem to have data for that week/store(s). Please choose another week or store.")
# main.dataframe(df.sort(["comp_week", "GTIN"]))
else:
main.markdown("Please Select at least 1 Store Id")
main.header("Under Performing Products")
unsold = get_unsold_weeks(df, weekly_range, week, year).collect()
main.metric("Count of Items with no Sales", unsold.filter(pl.col("num_0_weeks")==0).shape[0])
fig2 = px.histogram(
unsold,
x="num_0_weeks"
)
main.plotly_chart(fig2)
see_zero = main.checkbox("See list of Unsold Items")
if see_zero:
main.markdown([i for i in unsold.filter(pl.col("num_0_weeks")==0)["SKUPOS_DESCRIPTION"]])