Sam0398's picture
update
2395bff verified
import streamlit as st
import polars as pl
import plotly.express as px
from great_tables import GT
import streamlit as st
st.set_page_config(page_title="Test App", layout="wide")
st.title("🚀 Hello from Sam Anderson's Streamlit App")
st.write("✅ The app launched successfully on Hugging Face!")
# Minimal placeholder content
with st.expander("Next steps"):
st.markdown("""
- ✅ Now that it works, you can uncomment your data loading lines
- 🧠 Add store-level filters and plots again
- 🪄 Make sure at least one thing shows immediately when the app starts
""")
##############
st.title(" App Is Running!")
st.write("If you see this, the app is launching correctly. Now we’ll debug below.")
# Load local Parquet files from the data folder
@st.cache_data
def load_data():
gtin = pl.read_parquet('data/cstore_master_ctin.parquet').rename({'GTIN': 'gtin'})
items = pl.read_parquet('data/transaction_items/*.parquet').head(1000).rename({'GTIN': 'gtin'})
payments = pl.read_parquet('data/cstore_payments.parquet')
return gtin, items, payments
# Load data
gtin, items, payments = load_data()
# Streamlit UI
st.title("Convenience Store Dashboard")
tab1, tab2, tab3 = st.tabs([
"Top Products",
"Packaged Beverages",
"Cash vs Credit Customers"
])
# ---------------- Tab 1 ----------------
with tab1:
st.header("Top 5 Products by Weekly Sales (excluding fuels)")
try:
# Join and filter
non_fuel_items = items.join(gtin, on="gtin", how="inner").filter(
pl.col("CATEGORY") != "Fuels"
)
# Count unique dates
unique_dates = non_fuel_items.select(pl.col("DATE_TIME").n_unique()).item()
# Group by product and calculate weekly sales
top_products = non_fuel_items.group_by("POS_DESCRIPTION").agg(
pl.sum("UNIT_QUANTITY").alias("total_quantity")
).with_columns(
(pl.col("total_quantity") / unique_dates).alias("weekly_sales")
).sort("weekly_sales", descending=True).head(5)
# KPI
st.metric(
"Top Selling Product",
top_products[0, "POS_DESCRIPTION"],
f"{top_products[0, 'weekly_sales']:.1f} per week"
)
# Bar chart
fig = px.bar(
top_products.to_pandas(),
x="POS_DESCRIPTION",
y="weekly_sales",
labels={"weekly_sales": "Avg Weekly Sales", "POS_DESCRIPTION": "Product"},
title="Top 5 Products by Weekly Sales"
)
st.plotly_chart(fig, use_container_width=True)
# Table
gt_table = GT(top_products.to_pandas())
st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True)
except Exception as e:
st.error(f"Error loading or processing data (Top Products): {e}")
# ---------------- Tab 2 ----------------
with tab2:
st.header("Brands to Consider Dropping (Packaged Beverage)")
try:
# Join and filter for packaged beverages
beverage_items = items.join(gtin, on="gtin", how="inner").filter(
pl.col("CATEGORY") == "Packaged Beverages"
)
all_beverages = items.join(gtin, on="gtin", how="inner")
st.write("Unique CATEGORY values:", all_beverages["CATEGORY"].unique().to_list())
# Aggregate total units sold by brand
brand_sales = beverage_items.group_by("BRAND").agg(
pl.sum("UNIT_QUANTITY").alias("total_units")
).sort("total_units")
# User slider for number of brands to view
num_brands = st.slider("How many low-performing brands to show?", 5, 20, 10)
# Get lowest-selling brands
low_brands = brand_sales.head(num_brands)
if low_brands.shape[0] > 0:
st.metric(
"Lowest Selling Brand",
low_brands[0, "BRAND"],
f"{low_brands[0, 'total_units']} units"
)
fig = px.bar(
low_brands.to_pandas(),
x="BRAND",
y="total_units",
labels={"total_units": "Units Sold", "BRAND": "Brand"},
title=f"Lowest Selling {num_brands} Brands in Packaged Beverages"
)
st.plotly_chart(fig, use_container_width=True)
gt_table = GT(low_brands.to_pandas())
st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True)
else:
st.warning("No packaged beverage brand data available.")
except Exception as e:
st.error(f"Error loading or processing data (Packaged Beverages): {e}")
# ---------------- Tab 3 ----------------
with tab3:
st.header("Comparison of Cash vs Credit Customers")
try:
# Check available columns
st.write("Items columns:", items.columns)
# TEMP: simulate tender type if it's missing (for structure testing)
# You can remove this in real use if you have TENDER_TYPE elsewhere
import random
items = items.with_columns([
pl.Series("TENDER_TYPE", [random.choice(["Cash", "Credit"]) for _ in range(items.shape[0])])
])
# Total spend by tender type
amount_summary = items.group_by("TENDER_TYPE").agg(
pl.sum("GRAND_TOTAL_AMOUNT").alias("total_amount")
).sort("total_amount", descending=True)
# Total item count by tender type
quantity_summary = items.group_by("TENDER_TYPE").agg(
pl.sum("UNIT_QUANTITY").alias("total_items")
).sort("total_items", descending=True)
# Top products by tender type
top_products = items.group_by(["TENDER_TYPE", "POS_DESCRIPTION"]).agg(
pl.sum("UNIT_QUANTITY").alias("total_qty")
).sort("total_qty", descending=True).filter(pl.col("total_qty") > 0).head(20)
# KPIs
if amount_summary.shape[0] >= 2:
st.subheader("💰 Total Spend")
st.metric("Credit", f"${amount_summary[0, 'total_amount']:.2f}")
st.metric("Cash", f"${amount_summary[1, 'total_amount']:.2f}")
if quantity_summary.shape[0] >= 2:
st.subheader("🛒 Total Items Purchased")
st.metric("Credit", f"{int(quantity_summary[0, 'total_items'])} items")
st.metric("Cash", f"{int(quantity_summary[1, 'total_items'])} items")
# Bar chart
st.subheader("Top Products by Payment Method")
fig = px.bar(
top_products.to_pandas(),
x="POS_DESCRIPTION",
y="total_qty",
color="TENDER_TYPE",
barmode="group",
title="Top Products by Payment Method"
)
st.plotly_chart(fig, use_container_width=True)
# Table
st.subheader("Spend Summary Table")
gt_table = GT(amount_summary.to_pandas())
st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True)
except Exception as e:
st.error(f"Error processing Tab 3 (Cash vs Credit): {e}")