| | import streamlit as st |
| | import polars as pl |
| | import plotly.express as px |
| | import plotly.graph_objects as go |
| | import altair as alt |
| |
|
| | @st.cache_data |
| | def load_data(): |
| | gtin = pl.read_parquet('data/cstore_master_ctin.parquet') |
| | discounts = pl.read_parquet('data/cstore_discounts.parquet') |
| | stores = pl.read_parquet('data/cstore_stores.parquet') |
| | payments = pl.read_parquet('data/cstore_payments.parquet') |
| | daily = pl.read_parquet('data/cstore_transactions_daily_agg.parquet') |
| | shopper = pl.read_parquet('data/cstore_shopper.parquet') |
| | sets = pl.read_parquet('data/cstore_transaction_sets.parquet') |
| | status = pl.read_parquet('data/cstore_store_status.parquet') |
| | items = pl.scan_parquet("data/transaction_items/*.parquet").collect() |
| |
|
| | daily = daily.with_columns([ |
| | pl.col("DATE").dt.week().alias("WEEK_NUM"), |
| | pl.col("DATE").dt.year().alias("YEAR"), |
| | pl.col("DATE").dt.month().alias("MONTH"), |
| | pl.col("DATE").dt.strftime("%Y-%m").alias("YEAR_MONTH") |
| | ]) |
| |
|
| | items = items.with_columns([ |
| | pl.col("DATE_TIME").dt.year().alias("YEAR"), |
| | pl.col("DATE_TIME").dt.month().alias("MONTH"), |
| | pl.col("DATE_TIME").dt.week().alias("WEEK_NUM"), |
| | pl.col("DATE_TIME").dt.strftime("%Y-%m").alias("YEAR_MONTH") |
| | ]) |
| |
|
| | return gtin, discounts, stores, payments, daily, shopper, sets, status, items |
| |
|
| | gtin, discounts, stores, payments, daily, shopper, sets, status, items = load_data() |
| |
|
| | st.title("C-Store Analytics Dashboard") |
| |
|
| | available_months = sorted(daily["MONTH"].unique().to_list()) |
| | selected_months = st.sidebar.multiselect("Filter by Month:", available_months, default=available_months) |
| |
|
| | available_years_daily = sorted(daily["YEAR"].unique().to_list()) |
| | selected_years_daily = st.sidebar.multiselect("Filter by Year (Tab 1 & 2):", available_years_daily, default=available_years_daily) |
| |
|
| | available_years_items = sorted(items["YEAR"].unique().to_list()) |
| | selected_years_items = st.sidebar.multiselect("Filter by Year (Tab 3):", available_years_items, default=available_years_items) |
| |
|
| | selected_categories = st.sidebar.multiselect("Filter by Category:", daily["CATEGORY"].unique().to_list(), default=daily["CATEGORY"].unique().to_list()) |
| | selected_brands = st.sidebar.multiselect("Filter by Brand:", daily["BRAND"].unique().to_list(), default=daily["BRAND"].unique().to_list()) |
| |
|
| | interest_level_tab1 = st.sidebar.slider("Interest Level (Tab 1):", 0, 10, 5) |
| | interest_level_tab2 = st.sidebar.slider("Interest Level (Tab 2):", 0, 10, 5) |
| | interest_level_tab3 = st.sidebar.slider("Interest Level (Tab 3):", 0, 10, 5) |
| |
|
| | tabs = st.tabs(["Top 5 Products by Weekly Sales (Excl. Fuels)", "Brands to Drop in Packaged Beverages", "Cash vs Credit Customer Comparison"]) |
| |
|
| | with tabs[0]: |
| | filtered_daily = daily.filter(pl.col("MONTH").is_in(selected_months) & pl.col("YEAR").is_in(selected_years_daily) & pl.col("CATEGORY").is_in(selected_categories) & pl.col("BRAND").is_in(selected_brands)) |
| | dsmerged = filtered_daily.join(gtin.select(["GTIN", "CATEGORY", "SKUPOS_DESCRIPTION"]), on="GTIN", how="left") |
| | filtered = dsmerged.filter(pl.col("CATEGORY").str.to_lowercase() != "fuel") |
| |
|
| | weekly_sales = ( |
| | filtered |
| | .group_by(["SKUPOS_DESCRIPTION", "WEEK_NUM", "YEAR"]) |
| | .agg(pl.col("TOTAL_REVENUE_AMOUNT").sum().alias("WEEKLY_SALES")) |
| | ) |
| |
|
| | top_products = ( |
| | weekly_sales |
| | .group_by("SKUPOS_DESCRIPTION") |
| | .agg(pl.col("WEEKLY_SALES").sum().alias("TOTAL_SALES")) |
| | .sort("TOTAL_SALES", descending=True) |
| | .head(5) |
| | ) |
| |
|
| | top_product_names = top_products["SKUPOS_DESCRIPTION"].to_list() |
| |
|
| | top_weekly_sales = weekly_sales.filter(pl.col("SKUPOS_DESCRIPTION").is_in(top_product_names)).to_pandas() |
| |
|
| | table = top_weekly_sales.groupby("SKUPOS_DESCRIPTION")["WEEKLY_SALES"].sum().sort_values(ascending=False).reset_index() |
| |
|
| | kpi_1 = top_weekly_sales["WEEKLY_SALES"].sum() |
| | kpi_2 = top_products["TOTAL_SALES"].sum() |
| | kpi_3 = top_products["SKUPOS_DESCRIPTION"].n_unique() |
| | kpi_4 = top_weekly_sales["WEEKLY_SALES"].mean() |
| | kpi_5 = top_weekly_sales["WEEKLY_SALES"].std() |
| |
|
| | col1, col2, col3, col4, col5 = st.columns(5) |
| | col1.metric("Total Sales for Top 5 Products", f"${kpi_1:,.2f}") |
| | col2.metric("Total Sales Value (Top Products)", f"${kpi_2:,.2f}") |
| | col3.metric("Unique Products in Top 5", kpi_3) |
| | col4.metric("Avg Weekly Sales", f"${kpi_4:,.2f}") |
| | col5.metric("Weekly Sales Std Dev", f"${kpi_5:,.2f}") |
| |
|
| | st.write(table) |
| |
|
| | fig_line = px.line( |
| | top_weekly_sales, |
| | x="WEEK_NUM", |
| | y="WEEKLY_SALES", |
| | color="SKUPOS_DESCRIPTION", |
| | title="Weekly Sales Over Time - Top 5 Products (Excl. Fuel)", |
| | labels={"WEEK_NUM": "Week Number", "WEEKLY_SALES": "Weekly Sales"}, |
| | ) |
| | st.plotly_chart(fig_line) |
| |
|
| | box_chart = alt.Chart(top_weekly_sales).mark_boxplot().encode( |
| | x="SKUPOS_DESCRIPTION:N", |
| | y="WEEKLY_SALES:Q" |
| | ).properties( |
| | title="Distribution of Weekly Sales - Top 5 Products" |
| | ) |
| | st.altair_chart(box_chart) |
| |
|
| | fig_overlay = px.histogram( |
| | top_weekly_sales, |
| | x="WEEKLY_SALES", |
| | color="SKUPOS_DESCRIPTION", |
| | nbins=20, |
| | title="Distribution of Weekly Sales - Top 5 Products (Excl. Fuel)", |
| | labels={"WEEKLY_SALES": "Weekly Sales", "count": "Frequency"}, |
| | opacity=0.7, |
| | barmode="overlay" |
| | ) |
| | st.plotly_chart(fig_overlay) |
| |
|
| | with tabs[1]: |
| | filtered_daily = daily.filter(pl.col("MONTH").is_in(selected_months) & pl.col("YEAR").is_in(selected_years_daily) & pl.col("CATEGORY").is_in(selected_categories) & pl.col("BRAND").is_in(selected_brands)) |
| | dsmerged = filtered_daily.join(gtin.select(["GTIN", "CATEGORY", "SKUPOS_DESCRIPTION"]), on="GTIN", how="left") |
| | beverages = dsmerged.filter(pl.col("CATEGORY").str.contains("Beverage")) |
| |
|
| | weekly_sales = ( |
| | beverages |
| | .group_by(["SKUPOS_DESCRIPTION", "WEEK_NUM", "YEAR"]) |
| | .agg(pl.col("TOTAL_REVENUE_AMOUNT").sum().alias("WEEKLY_SALES")) |
| | ) |
| |
|
| | bottom_products = ( |
| | weekly_sales |
| | .group_by("SKUPOS_DESCRIPTION") |
| | .agg(pl.col("WEEKLY_SALES").sum().alias("TOTAL_SALES")) |
| | .sort("TOTAL_SALES") |
| | .head(5) |
| | ) |
| |
|
| | bottom_product_names = bottom_products["SKUPOS_DESCRIPTION"].to_list() |
| |
|
| | bottom_weekly_sales = weekly_sales.filter(pl.col("SKUPOS_DESCRIPTION").is_in(bottom_product_names)).to_pandas() |
| |
|
| | table = bottom_weekly_sales.groupby("SKUPOS_DESCRIPTION")["WEEKLY_SALES"].sum().sort_values().reset_index() |
| |
|
| | kpi_1 = bottom_weekly_sales["WEEKLY_SALES"].sum() |
| | kpi_2 = bottom_products["TOTAL_SALES"].sum() |
| | kpi_3 = bottom_products["SKUPOS_DESCRIPTION"].n_unique() |
| | kpi_4 = bottom_weekly_sales["WEEKLY_SALES"].mean() |
| | kpi_5 = bottom_weekly_sales["WEEKLY_SALES"].std() |
| |
|
| | col1, col2, col3, col4, col5 = st.columns(5) |
| | col1.metric("Total Sales for Bottom Products", f"${kpi_1:,.2f}") |
| | col2.metric("Total Sales Value (Bottom Products)", f"${kpi_2:,.2f}") |
| | col3.metric("Unique Products in Bottom 5", kpi_3) |
| | col4.metric("Avg Weekly Sales", f"${kpi_4:,.2f}") |
| | col5.metric("Weekly Sales Std Dev", f"${kpi_5:,.2f}") |
| |
|
| | st.write("Least Popular Packaged Beverages:") |
| | st.write(table) |
| |
|
| | fig = px.treemap( |
| | table, |
| | path=["SKUPOS_DESCRIPTION"], |
| | values="WEEKLY_SALES", |
| | title="Treemap - Total Sales for Least Popular Packaged Beverages" |
| | ) |
| | st.plotly_chart(fig) |
| |
|
| | fig = px.density_contour( |
| | bottom_weekly_sales, |
| | x="WEEKLY_SALES", |
| | y="SKUPOS_DESCRIPTION", |
| | title="Density Contour - Sales Concentration by Product" |
| | ) |
| | fig.update_traces(contours_coloring="fill", showscale=True) |
| | st.plotly_chart(fig) |
| |
|
| | fig_violin = px.violin( |
| | bottom_weekly_sales, |
| | y="WEEKLY_SALES", |
| | x="SKUPOS_DESCRIPTION", |
| | box=True, |
| | points="all", |
| | title="Violin Plot - Weekly Sales Distribution (Bottom 5 Beverages)" |
| | ) |
| | fig_violin.update_layout( |
| | xaxis_title="Product", |
| | yaxis_title="Weekly Sales" |
| | ) |
| | st.plotly_chart(fig_violin) |
| |
|
| | with tabs[2]: |
| | filtered_items = items.filter(pl.col("MONTH").is_in(selected_months) & pl.col("YEAR").is_in(selected_years_items)) |
| | merged = filtered_items.join( |
| | sets.select(["TRANSACTION_SET_ID", "PAYMENT_TYPE", "GRAND_TOTAL_AMOUNT"]), |
| | on="TRANSACTION_SET_ID", |
| | how="left" |
| | ).join( |
| | gtin.select(["GTIN", "SKUPOS_DESCRIPTION"]), |
| | on="GTIN", |
| | how="left" |
| | ) |
| |
|
| | grouped = merged.group_by(["PAYMENT_TYPE", "SKUPOS_DESCRIPTION"]).agg([ |
| | pl.col("UNIT_QUANTITY").sum().alias("TOTAL_ITEMS"), |
| | pl.col("GRAND_TOTAL_AMOUNT").sum().alias("TOTAL_SPENT") |
| | ]) |
| |
|
| | top_products_by_payment = ( |
| | grouped.sort("TOTAL_ITEMS", descending=True) |
| | .group_by("PAYMENT_TYPE") |
| | .head(5) |
| | ) |
| |
|
| | total_metrics = merged.group_by("PAYMENT_TYPE").agg([ |
| | pl.col("UNIT_QUANTITY").sum().alias("TOTAL_ITEMS"), |
| | pl.col("GRAND_TOTAL_AMOUNT").sum().alias("TOTAL_SPENT") |
| | ]) |
| |
|
| | kpi_1 = total_metrics["TOTAL_SPENT"].sum() |
| | kpi_2 = total_metrics["TOTAL_ITEMS"].sum() |
| | kpi_3 = total_metrics["PAYMENT_TYPE"].n_unique() |
| | kpi_4 = total_metrics["TOTAL_ITEMS"].mean() |
| | kpi_5 = total_metrics["TOTAL_SPENT"].std() |
| |
|
| | col1, col2, col3, col4, col5 = st.columns(5) |
| | col1.metric("Total Spend", f"${kpi_1:,.2f}") |
| | col2.metric("Total Items Sold", kpi_2) |
| | col3.metric("Unique Payment Methods", kpi_3) |
| | col4.metric("Avg Items Sold", kpi_4) |
| | col5.metric("Spend Std Dev", f"${kpi_5:,.2f}") |
| |
|
| | st.write("Top Products for Each Payment Type:") |
| | st.write(top_products_by_payment) |
| |
|
| | st.write("\nTotal Comparison - Cash vs Credit:") |
| | st.write(total_metrics) |
| |
|
| | time_series = merged.group_by(["YEAR_MONTH", "PAYMENT_TYPE"]).agg( |
| | pl.col("GRAND_TOTAL_AMOUNT").sum().alias("TOTAL_SPENT") |
| | ).sort("YEAR_MONTH").to_pandas() |
| |
|
| | fig = px.line( |
| | time_series, |
| | x="YEAR_MONTH", |
| | y="TOTAL_SPENT", |
| | color="PAYMENT_TYPE", |
| | title="Monthly Spend by Payment Type" |
| | ) |
| | st.plotly_chart(fig) |
| |
|
| | treemap_data = ( |
| | top_products_by_payment |
| | .filter( |
| | pl.col("PAYMENT_TYPE").is_not_null() & |
| | pl.col("SKUPOS_DESCRIPTION").is_not_null() & |
| | (pl.col("SKUPOS_DESCRIPTION") != "") |
| | ) |
| | .to_pandas() |
| | ) |
| |
|
| | fig = px.treemap( |
| | treemap_data, |
| | path=["PAYMENT_TYPE", "SKUPOS_DESCRIPTION"], |
| | values="TOTAL_SPENT", |
| | title="Treemap: Product Spend by Payment Type" |
| | ) |
| | st.plotly_chart(fig) |
| |
|
| | heatmap_data = top_products_by_payment.to_pandas().pivot( |
| | index="SKUPOS_DESCRIPTION", |
| | columns="PAYMENT_TYPE", |
| | values="TOTAL_ITEMS" |
| | ) |
| |
|
| | fig = go.Figure(data=go.Heatmap( |
| | z=heatmap_data.values, |
| | x=heatmap_data.columns, |
| | y=heatmap_data.index, |
| | colorscale='YlGnBu', |
| | colorbar=dict(title="Total Items"), |
| | )) |
| |
|
| | fig.update_layout( |
| | title="Top Products by Total Items Purchased (Cash vs Credit)", |
| | xaxis_title="Payment Type", |
| | yaxis_title="Product", |
| | xaxis=dict(tickmode='array', tickvals=list(range(len(heatmap_data.columns))), ticktext=heatmap_data.columns), |
| | yaxis=dict(tickmode='array', tickvals=list(range(len(heatmap_data.index))), ticktext=heatmap_data.index), |
| | height=600, |
| | width=1000 |
| | ) |
| |
|
| | st.plotly_chart(fig) |