Spaces:

Sam0398
/

streamlit_docker_wi25_challenge_SamAnderson

Runtime error

streamlit_docker_wi25_challenge_SamAnderson

File size: 6,936 Bytes

import streamlit as st
import polars as pl
import plotly.express as px
from great_tables import GT

import streamlit as st

st.set_page_config(page_title="Test App", layout="wide")

st.title("🚀 Hello from Sam Anderson's Streamlit App")
st.write("✅ The app launched successfully on Hugging Face!")

# Minimal placeholder content
with st.expander("Next steps"):
    st.markdown("""
    - ✅ Now that it works, you can uncomment your data loading lines
    - 🧠 Add store-level filters and plots again
    - 🪄 Make sure at least one thing shows immediately when the app starts
    """)
 ##############

st.title(" App Is Running!")
st.write("If you see this, the app is launching correctly. Now we’ll debug below.")


# Load local Parquet files from the data folder
@st.cache_data
def load_data():
    gtin = pl.read_parquet('data/cstore_master_ctin.parquet').rename({'GTIN': 'gtin'})
    items = pl.read_parquet('data/transaction_items/*.parquet').head(1000).rename({'GTIN': 'gtin'})
    payments = pl.read_parquet('data/cstore_payments.parquet')
    return gtin, items, payments


# Load data
gtin, items, payments = load_data()

# Streamlit UI
st.title("Convenience Store Dashboard")

tab1, tab2, tab3 = st.tabs([
    "Top Products",
    "Packaged Beverages",
    "Cash vs Credit Customers"
])

# ---------------- Tab 1 ----------------
with tab1:
    st.header("Top 5 Products by Weekly Sales (excluding fuels)")

    try:
        # Join and filter
        non_fuel_items = items.join(gtin, on="gtin", how="inner").filter(
            pl.col("CATEGORY") != "Fuels"
        )

        # Count unique dates
        unique_dates = non_fuel_items.select(pl.col("DATE_TIME").n_unique()).item()

        # Group by product and calculate weekly sales
        top_products = non_fuel_items.group_by("POS_DESCRIPTION").agg(
            pl.sum("UNIT_QUANTITY").alias("total_quantity")
        ).with_columns(
            (pl.col("total_quantity") / unique_dates).alias("weekly_sales")
        ).sort("weekly_sales", descending=True).head(5)

        # KPI
        st.metric(
            "Top Selling Product",
            top_products[0, "POS_DESCRIPTION"],
            f"{top_products[0, 'weekly_sales']:.1f} per week"
        )

        # Bar chart
        fig = px.bar(
            top_products.to_pandas(),
            x="POS_DESCRIPTION",
            y="weekly_sales",
            labels={"weekly_sales": "Avg Weekly Sales", "POS_DESCRIPTION": "Product"},
            title="Top 5 Products by Weekly Sales"
        )
        st.plotly_chart(fig, use_container_width=True)

        # Table
        gt_table = GT(top_products.to_pandas())
        st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True)

    except Exception as e:
        st.error(f"Error loading or processing data (Top Products): {e}")

# ---------------- Tab 2 ----------------
with tab2:
    st.header("Brands to Consider Dropping (Packaged Beverage)")

    try:
        # Join and filter for packaged beverages
        beverage_items = items.join(gtin, on="gtin", how="inner").filter(
            pl.col("CATEGORY") == "Packaged Beverages"
        )

        all_beverages = items.join(gtin, on="gtin", how="inner")
        st.write("Unique CATEGORY values:", all_beverages["CATEGORY"].unique().to_list())


        # Aggregate total units sold by brand
        brand_sales = beverage_items.group_by("BRAND").agg(
            pl.sum("UNIT_QUANTITY").alias("total_units")
        ).sort("total_units")

        # User slider for number of brands to view
        num_brands = st.slider("How many low-performing brands to show?", 5, 20, 10)

        # Get lowest-selling brands
        low_brands = brand_sales.head(num_brands)

        if low_brands.shape[0] > 0:
            st.metric(
    "Lowest Selling Brand",
    low_brands[0, "BRAND"],
    f"{low_brands[0, 'total_units']} units"
)



            fig = px.bar(
                low_brands.to_pandas(),
                x="BRAND",
                y="total_units",
                labels={"total_units": "Units Sold", "BRAND": "Brand"},
                title=f"Lowest Selling {num_brands} Brands in Packaged Beverages"
            )
            st.plotly_chart(fig, use_container_width=True)

            gt_table = GT(low_brands.to_pandas())
            st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True)

        else:
            st.warning("No packaged beverage brand data available.")

    except Exception as e:
        st.error(f"Error loading or processing data (Packaged Beverages): {e}")

# ---------------- Tab 3 ----------------
with tab3:
    st.header("Comparison of Cash vs Credit Customers")

    try:
        # Check available columns
        st.write("Items columns:", items.columns)

        # TEMP: simulate tender type if it's missing (for structure testing)
        # You can remove this in real use if you have TENDER_TYPE elsewhere
        import random
        items = items.with_columns([
            pl.Series("TENDER_TYPE", [random.choice(["Cash", "Credit"]) for _ in range(items.shape[0])])
        ])

        # Total spend by tender type
        amount_summary = items.group_by("TENDER_TYPE").agg(
            pl.sum("GRAND_TOTAL_AMOUNT").alias("total_amount")
        ).sort("total_amount", descending=True)

        # Total item count by tender type
        quantity_summary = items.group_by("TENDER_TYPE").agg(
            pl.sum("UNIT_QUANTITY").alias("total_items")
        ).sort("total_items", descending=True)

        # Top products by tender type
        top_products = items.group_by(["TENDER_TYPE", "POS_DESCRIPTION"]).agg(
            pl.sum("UNIT_QUANTITY").alias("total_qty")
        ).sort("total_qty", descending=True).filter(pl.col("total_qty") > 0).head(20)

        # KPIs
        if amount_summary.shape[0] >= 2:
            st.subheader("💰 Total Spend")
            st.metric("Credit", f"${amount_summary[0, 'total_amount']:.2f}")
            st.metric("Cash", f"${amount_summary[1, 'total_amount']:.2f}")

        if quantity_summary.shape[0] >= 2:
            st.subheader("🛒 Total Items Purchased")
            st.metric("Credit", f"{int(quantity_summary[0, 'total_items'])} items")
            st.metric("Cash", f"{int(quantity_summary[1, 'total_items'])} items")

        # Bar chart
        st.subheader("Top Products by Payment Method")
        fig = px.bar(
            top_products.to_pandas(),
            x="POS_DESCRIPTION",
            y="total_qty",
            color="TENDER_TYPE",
            barmode="group",
            title="Top Products by Payment Method"
        )
        st.plotly_chart(fig, use_container_width=True)

        # Table
        st.subheader("Spend Summary Table")
        gt_table = GT(amount_summary.to_pandas())
        st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True)

    except Exception as e:
        st.error(f"Error processing Tab 3 (Cash vs Credit): {e}")