Spaces:
Runtime error
Runtime error
File size: 6,936 Bytes
e930f2e f563244 e930f2e 2395bff e930f2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
import streamlit as st
import polars as pl
import plotly.express as px
from great_tables import GT
import streamlit as st
st.set_page_config(page_title="Test App", layout="wide")
st.title("🚀 Hello from Sam Anderson's Streamlit App")
st.write("✅ The app launched successfully on Hugging Face!")
# Minimal placeholder content
with st.expander("Next steps"):
st.markdown("""
- ✅ Now that it works, you can uncomment your data loading lines
- 🧠 Add store-level filters and plots again
- 🪄 Make sure at least one thing shows immediately when the app starts
""")
##############
st.title(" App Is Running!")
st.write("If you see this, the app is launching correctly. Now we’ll debug below.")
# Load local Parquet files from the data folder
@st.cache_data
def load_data():
gtin = pl.read_parquet('data/cstore_master_ctin.parquet').rename({'GTIN': 'gtin'})
items = pl.read_parquet('data/transaction_items/*.parquet').head(1000).rename({'GTIN': 'gtin'})
payments = pl.read_parquet('data/cstore_payments.parquet')
return gtin, items, payments
# Load data
gtin, items, payments = load_data()
# Streamlit UI
st.title("Convenience Store Dashboard")
tab1, tab2, tab3 = st.tabs([
"Top Products",
"Packaged Beverages",
"Cash vs Credit Customers"
])
# ---------------- Tab 1 ----------------
with tab1:
st.header("Top 5 Products by Weekly Sales (excluding fuels)")
try:
# Join and filter
non_fuel_items = items.join(gtin, on="gtin", how="inner").filter(
pl.col("CATEGORY") != "Fuels"
)
# Count unique dates
unique_dates = non_fuel_items.select(pl.col("DATE_TIME").n_unique()).item()
# Group by product and calculate weekly sales
top_products = non_fuel_items.group_by("POS_DESCRIPTION").agg(
pl.sum("UNIT_QUANTITY").alias("total_quantity")
).with_columns(
(pl.col("total_quantity") / unique_dates).alias("weekly_sales")
).sort("weekly_sales", descending=True).head(5)
# KPI
st.metric(
"Top Selling Product",
top_products[0, "POS_DESCRIPTION"],
f"{top_products[0, 'weekly_sales']:.1f} per week"
)
# Bar chart
fig = px.bar(
top_products.to_pandas(),
x="POS_DESCRIPTION",
y="weekly_sales",
labels={"weekly_sales": "Avg Weekly Sales", "POS_DESCRIPTION": "Product"},
title="Top 5 Products by Weekly Sales"
)
st.plotly_chart(fig, use_container_width=True)
# Table
gt_table = GT(top_products.to_pandas())
st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True)
except Exception as e:
st.error(f"Error loading or processing data (Top Products): {e}")
# ---------------- Tab 2 ----------------
with tab2:
st.header("Brands to Consider Dropping (Packaged Beverage)")
try:
# Join and filter for packaged beverages
beverage_items = items.join(gtin, on="gtin", how="inner").filter(
pl.col("CATEGORY") == "Packaged Beverages"
)
all_beverages = items.join(gtin, on="gtin", how="inner")
st.write("Unique CATEGORY values:", all_beverages["CATEGORY"].unique().to_list())
# Aggregate total units sold by brand
brand_sales = beverage_items.group_by("BRAND").agg(
pl.sum("UNIT_QUANTITY").alias("total_units")
).sort("total_units")
# User slider for number of brands to view
num_brands = st.slider("How many low-performing brands to show?", 5, 20, 10)
# Get lowest-selling brands
low_brands = brand_sales.head(num_brands)
if low_brands.shape[0] > 0:
st.metric(
"Lowest Selling Brand",
low_brands[0, "BRAND"],
f"{low_brands[0, 'total_units']} units"
)
fig = px.bar(
low_brands.to_pandas(),
x="BRAND",
y="total_units",
labels={"total_units": "Units Sold", "BRAND": "Brand"},
title=f"Lowest Selling {num_brands} Brands in Packaged Beverages"
)
st.plotly_chart(fig, use_container_width=True)
gt_table = GT(low_brands.to_pandas())
st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True)
else:
st.warning("No packaged beverage brand data available.")
except Exception as e:
st.error(f"Error loading or processing data (Packaged Beverages): {e}")
# ---------------- Tab 3 ----------------
with tab3:
st.header("Comparison of Cash vs Credit Customers")
try:
# Check available columns
st.write("Items columns:", items.columns)
# TEMP: simulate tender type if it's missing (for structure testing)
# You can remove this in real use if you have TENDER_TYPE elsewhere
import random
items = items.with_columns([
pl.Series("TENDER_TYPE", [random.choice(["Cash", "Credit"]) for _ in range(items.shape[0])])
])
# Total spend by tender type
amount_summary = items.group_by("TENDER_TYPE").agg(
pl.sum("GRAND_TOTAL_AMOUNT").alias("total_amount")
).sort("total_amount", descending=True)
# Total item count by tender type
quantity_summary = items.group_by("TENDER_TYPE").agg(
pl.sum("UNIT_QUANTITY").alias("total_items")
).sort("total_items", descending=True)
# Top products by tender type
top_products = items.group_by(["TENDER_TYPE", "POS_DESCRIPTION"]).agg(
pl.sum("UNIT_QUANTITY").alias("total_qty")
).sort("total_qty", descending=True).filter(pl.col("total_qty") > 0).head(20)
# KPIs
if amount_summary.shape[0] >= 2:
st.subheader("💰 Total Spend")
st.metric("Credit", f"${amount_summary[0, 'total_amount']:.2f}")
st.metric("Cash", f"${amount_summary[1, 'total_amount']:.2f}")
if quantity_summary.shape[0] >= 2:
st.subheader("🛒 Total Items Purchased")
st.metric("Credit", f"{int(quantity_summary[0, 'total_items'])} items")
st.metric("Cash", f"{int(quantity_summary[1, 'total_items'])} items")
# Bar chart
st.subheader("Top Products by Payment Method")
fig = px.bar(
top_products.to_pandas(),
x="POS_DESCRIPTION",
y="total_qty",
color="TENDER_TYPE",
barmode="group",
title="Top Products by Payment Method"
)
st.plotly_chart(fig, use_container_width=True)
# Table
st.subheader("Spend Summary Table")
gt_table = GT(amount_summary.to_pandas())
st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True)
except Exception as e:
st.error(f"Error processing Tab 3 (Cash vs Credit): {e}")
|