File size: 6,936 Bytes
e930f2e
 
 
 
 
f563244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e930f2e
 
 
 
 
 
 
 
 
2395bff
 
e930f2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import streamlit as st
import polars as pl
import plotly.express as px
from great_tables import GT

import streamlit as st

st.set_page_config(page_title="Test App", layout="wide")

st.title("🚀 Hello from Sam Anderson's Streamlit App")
st.write("✅ The app launched successfully on Hugging Face!")

# Minimal placeholder content
with st.expander("Next steps"):
    st.markdown("""
    - ✅ Now that it works, you can uncomment your data loading lines
    - 🧠 Add store-level filters and plots again
    - 🪄 Make sure at least one thing shows immediately when the app starts
    """)
 ##############

st.title(" App Is Running!")
st.write("If you see this, the app is launching correctly. Now we’ll debug below.")


# Load local Parquet files from the data folder
@st.cache_data
def load_data():
    gtin = pl.read_parquet('data/cstore_master_ctin.parquet').rename({'GTIN': 'gtin'})
    items = pl.read_parquet('data/transaction_items/*.parquet').head(1000).rename({'GTIN': 'gtin'})
    payments = pl.read_parquet('data/cstore_payments.parquet')
    return gtin, items, payments


# Load data
gtin, items, payments = load_data()

# Streamlit UI
st.title("Convenience Store Dashboard")

tab1, tab2, tab3 = st.tabs([
    "Top Products",
    "Packaged Beverages",
    "Cash vs Credit Customers"
])

# ---------------- Tab 1 ----------------
with tab1:
    st.header("Top 5 Products by Weekly Sales (excluding fuels)")

    try:
        # Join and filter
        non_fuel_items = items.join(gtin, on="gtin", how="inner").filter(
            pl.col("CATEGORY") != "Fuels"
        )

        # Count unique dates
        unique_dates = non_fuel_items.select(pl.col("DATE_TIME").n_unique()).item()

        # Group by product and calculate weekly sales
        top_products = non_fuel_items.group_by("POS_DESCRIPTION").agg(
            pl.sum("UNIT_QUANTITY").alias("total_quantity")
        ).with_columns(
            (pl.col("total_quantity") / unique_dates).alias("weekly_sales")
        ).sort("weekly_sales", descending=True).head(5)

        # KPI
        st.metric(
            "Top Selling Product",
            top_products[0, "POS_DESCRIPTION"],
            f"{top_products[0, 'weekly_sales']:.1f} per week"
        )

        # Bar chart
        fig = px.bar(
            top_products.to_pandas(),
            x="POS_DESCRIPTION",
            y="weekly_sales",
            labels={"weekly_sales": "Avg Weekly Sales", "POS_DESCRIPTION": "Product"},
            title="Top 5 Products by Weekly Sales"
        )
        st.plotly_chart(fig, use_container_width=True)

        # Table
        gt_table = GT(top_products.to_pandas())
        st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True)

    except Exception as e:
        st.error(f"Error loading or processing data (Top Products): {e}")

# ---------------- Tab 2 ----------------
with tab2:
    st.header("Brands to Consider Dropping (Packaged Beverage)")

    try:
        # Join and filter for packaged beverages
        beverage_items = items.join(gtin, on="gtin", how="inner").filter(
            pl.col("CATEGORY") == "Packaged Beverages"
        )

        all_beverages = items.join(gtin, on="gtin", how="inner")
        st.write("Unique CATEGORY values:", all_beverages["CATEGORY"].unique().to_list())


        # Aggregate total units sold by brand
        brand_sales = beverage_items.group_by("BRAND").agg(
            pl.sum("UNIT_QUANTITY").alias("total_units")
        ).sort("total_units")

        # User slider for number of brands to view
        num_brands = st.slider("How many low-performing brands to show?", 5, 20, 10)

        # Get lowest-selling brands
        low_brands = brand_sales.head(num_brands)

        if low_brands.shape[0] > 0:
            st.metric(
    "Lowest Selling Brand",
    low_brands[0, "BRAND"],
    f"{low_brands[0, 'total_units']} units"
)



            fig = px.bar(
                low_brands.to_pandas(),
                x="BRAND",
                y="total_units",
                labels={"total_units": "Units Sold", "BRAND": "Brand"},
                title=f"Lowest Selling {num_brands} Brands in Packaged Beverages"
            )
            st.plotly_chart(fig, use_container_width=True)

            gt_table = GT(low_brands.to_pandas())
            st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True)

        else:
            st.warning("No packaged beverage brand data available.")

    except Exception as e:
        st.error(f"Error loading or processing data (Packaged Beverages): {e}")

# ---------------- Tab 3 ----------------
with tab3:
    st.header("Comparison of Cash vs Credit Customers")

    try:
        # Check available columns
        st.write("Items columns:", items.columns)

        # TEMP: simulate tender type if it's missing (for structure testing)
        # You can remove this in real use if you have TENDER_TYPE elsewhere
        import random
        items = items.with_columns([
            pl.Series("TENDER_TYPE", [random.choice(["Cash", "Credit"]) for _ in range(items.shape[0])])
        ])

        # Total spend by tender type
        amount_summary = items.group_by("TENDER_TYPE").agg(
            pl.sum("GRAND_TOTAL_AMOUNT").alias("total_amount")
        ).sort("total_amount", descending=True)

        # Total item count by tender type
        quantity_summary = items.group_by("TENDER_TYPE").agg(
            pl.sum("UNIT_QUANTITY").alias("total_items")
        ).sort("total_items", descending=True)

        # Top products by tender type
        top_products = items.group_by(["TENDER_TYPE", "POS_DESCRIPTION"]).agg(
            pl.sum("UNIT_QUANTITY").alias("total_qty")
        ).sort("total_qty", descending=True).filter(pl.col("total_qty") > 0).head(20)

        # KPIs
        if amount_summary.shape[0] >= 2:
            st.subheader("💰 Total Spend")
            st.metric("Credit", f"${amount_summary[0, 'total_amount']:.2f}")
            st.metric("Cash", f"${amount_summary[1, 'total_amount']:.2f}")

        if quantity_summary.shape[0] >= 2:
            st.subheader("🛒 Total Items Purchased")
            st.metric("Credit", f"{int(quantity_summary[0, 'total_items'])} items")
            st.metric("Cash", f"{int(quantity_summary[1, 'total_items'])} items")

        # Bar chart
        st.subheader("Top Products by Payment Method")
        fig = px.bar(
            top_products.to_pandas(),
            x="POS_DESCRIPTION",
            y="total_qty",
            color="TENDER_TYPE",
            barmode="group",
            title="Top Products by Payment Method"
        )
        st.plotly_chart(fig, use_container_width=True)

        # Table
        st.subheader("Spend Summary Table")
        gt_table = GT(amount_summary.to_pandas())
        st.markdown(gt_table.render("streamlit"), unsafe_allow_html=True)

    except Exception as e:
        st.error(f"Error processing Tab 3 (Cash vs Credit): {e}")