import streamlit as st import polars as pl import plotly.express as px from great_tables import GT import streamlit as st st.set_page_config(page_title="Test App", layout="wide") st.title("🚀 Hello from Sam Anderson's Streamlit App") st.write("✅ The app launched successfully on Hugging Face!") # Minimal placeholder content with st.expander("Next steps"): st.markdown(""" - ✅ Now that it works, you can uncomment your data loading lines - 🧠 Add store-level filters and plots again - 🪄 Make sure at least one thing shows immediately when the app starts """) ############## st.title(" App Is Running!") st.write("If you see this, the app is launching correctly. Now we’ll debug below.") # Load local Parquet files from the data folder @st.cache_data def load_data(): gtin = pl.read_parquet('data/cstore_master_ctin.parquet').rename({'GTIN': 'gtin'}) items = pl.read_parquet('data/transaction_items/*.parquet').head(1000).rename({'GTIN': 'gtin'}) payments = pl.read_parquet('data/cstore_payments.parquet') return gtin, items, payments # Load data gtin, items, payments = load_data() # Streamlit UI st.title("Convenience Store Dashboard") tab1, tab2, tab3 = st.tabs([ "Top Products", "Packaged Beverages", "Cash vs Credit Customers" ]) # ---------------- Tab 1 ---------------- with tab1: st.header("Top 5 Products by Weekly Sales (excluding fuels)") try: # Join and filter non_fuel_items = items.join(gtin, on="gtin", how="inner").filter( pl.col("CATEGORY") != "Fuels" ) # Count unique dates unique_dates = non_fuel_items.select(pl.col("DATE_TIME").n_unique()).item() # Group by product and calculate weekly sales top_products = non_fuel_items.group_by("POS_DESCRIPTION").agg( pl.sum("UNIT_QUANTITY").alias("total_quantity") ).with_columns( (pl.col("total_quantity") / unique_dates).alias("weekly_sales") ).sort("weekly_sales", descending=True).head(5) # KPI st.metric( "Top Selling Product", top_products[0, "POS_DESCRIPTION"], f"{top_products[0, 'weekly_sales']:.1f} per week" ) # Bar chart fig = px.bar( top_products.to_pandas(), x="POS_DESCRIPTION", y="weekly_sales", labels={"weekly_sales": "Avg Weekly Sales", "POS_DESCRIPTION": "Product"}, title="Top 5 Products by Weekly Sales" ) st.plotly_chart(fig, use_container_width=True) # Table gt_table = GT(top_products.to_pandas()) st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True) except Exception as e: st.error(f"Error loading or processing data (Top Products): {e}") # ---------------- Tab 2 ---------------- with tab2: st.header("Brands to Consider Dropping (Packaged Beverage)") try: # Join and filter for packaged beverages beverage_items = items.join(gtin, on="gtin", how="inner").filter( pl.col("CATEGORY") == "Packaged Beverages" ) all_beverages = items.join(gtin, on="gtin", how="inner") st.write("Unique CATEGORY values:", all_beverages["CATEGORY"].unique().to_list()) # Aggregate total units sold by brand brand_sales = beverage_items.group_by("BRAND").agg( pl.sum("UNIT_QUANTITY").alias("total_units") ).sort("total_units") # User slider for number of brands to view num_brands = st.slider("How many low-performing brands to show?", 5, 20, 10) # Get lowest-selling brands low_brands = brand_sales.head(num_brands) if low_brands.shape[0] > 0: st.metric( "Lowest Selling Brand", low_brands[0, "BRAND"], f"{low_brands[0, 'total_units']} units" ) fig = px.bar( low_brands.to_pandas(), x="BRAND", y="total_units", labels={"total_units": "Units Sold", "BRAND": "Brand"}, title=f"Lowest Selling {num_brands} Brands in Packaged Beverages" ) st.plotly_chart(fig, use_container_width=True) gt_table = GT(low_brands.to_pandas()) st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True) else: st.warning("No packaged beverage brand data available.") except Exception as e: st.error(f"Error loading or processing data (Packaged Beverages): {e}") # ---------------- Tab 3 ---------------- with tab3: st.header("Comparison of Cash vs Credit Customers") try: # Check available columns st.write("Items columns:", items.columns) # TEMP: simulate tender type if it's missing (for structure testing) # You can remove this in real use if you have TENDER_TYPE elsewhere import random items = items.with_columns([ pl.Series("TENDER_TYPE", [random.choice(["Cash", "Credit"]) for _ in range(items.shape[0])]) ]) # Total spend by tender type amount_summary = items.group_by("TENDER_TYPE").agg( pl.sum("GRAND_TOTAL_AMOUNT").alias("total_amount") ).sort("total_amount", descending=True) # Total item count by tender type quantity_summary = items.group_by("TENDER_TYPE").agg( pl.sum("UNIT_QUANTITY").alias("total_items") ).sort("total_items", descending=True) # Top products by tender type top_products = items.group_by(["TENDER_TYPE", "POS_DESCRIPTION"]).agg( pl.sum("UNIT_QUANTITY").alias("total_qty") ).sort("total_qty", descending=True).filter(pl.col("total_qty") > 0).head(20) # KPIs if amount_summary.shape[0] >= 2: st.subheader("💰 Total Spend") st.metric("Credit", f"${amount_summary[0, 'total_amount']:.2f}") st.metric("Cash", f"${amount_summary[1, 'total_amount']:.2f}") if quantity_summary.shape[0] >= 2: st.subheader("🛒 Total Items Purchased") st.metric("Credit", f"{int(quantity_summary[0, 'total_items'])} items") st.metric("Cash", f"{int(quantity_summary[1, 'total_items'])} items") # Bar chart st.subheader("Top Products by Payment Method") fig = px.bar( top_products.to_pandas(), x="POS_DESCRIPTION", y="total_qty", color="TENDER_TYPE", barmode="group", title="Top Products by Payment Method" ) st.plotly_chart(fig, use_container_width=True) # Table st.subheader("Spend Summary Table") gt_table = GT(amount_summary.to_pandas()) st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True) except Exception as e: st.error(f"Error processing Tab 3 (Cash vs Credit): {e}")