mrciomnl's picture
initial commit
728589d
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.frequent_patterns import apriori, association_rules
# Load dataset
def load_data():
file_path = "Groceries_dataset.csv"
df = pd.read_csv(file_path)
return df
# Preprocess dataset for Apriori Algorithm
def preprocess_data(df):
basket = df.groupby(['Member_number', 'itemDescription'])['Date'].count().unstack().fillna(0)
basket = basket.applymap(lambda x: 1 if x > 0 else 0)
return basket
# Train Apriori Model
def train_apriori(basket, min_support=0.01):
frequent_itemsets = apriori(basket, min_support=min_support, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
return frequent_itemsets, rules
# Streamlit UI
st.title("Market Basket Analysis using Apriori Algorithm")
# Sidebar Navigation
st.sidebar.title("Navigation")
page = st.sidebar.radio("Go to", ["Introduction", "Data Exploration", "Results", "Testing"])
# Load and preprocess data
df = load_data()
basket = preprocess_data(df)
frequent_itemsets, rules = train_apriori(basket)
# Introduction Page
if page == "Introduction":
st.header("Introduction")
st.write("This application uses the Apriori algorithm to find associations between different grocery items.")
st.write("### How to Use:")
st.write("- Navigate using the sidebar.")
st.write("- Explore the dataset and model results.")
st.write("- Test the model by selecting an item to get predictions.")
# Data Exploration Page
elif page == "Data Exploration":
st.header("Dataset Overview")
st.write("### Sample Data")
st.write(df.head())
st.write("### Unique Items")
st.write(len(df['itemDescription'].unique()))
# Plot Most Frequent Items
st.write("### Most Frequent Items")
item_counts = df['itemDescription'].value_counts().head(10)
plt.figure(figsize=(10,5))
sns.barplot(x=item_counts.values, y=item_counts.index)
plt.xlabel("Count")
plt.ylabel("Item")
plt.title("Top 10 Most Purchased Items")
st.pyplot(plt)
# Results Page
elif page == "Results":
st.header("Apriori Model Results")
st.write("### Frequent Itemsets")
st.write(frequent_itemsets)
st.write("### Association Rules")
st.write(rules)
# Plot Lift Distribution
st.write("### Lift Distribution")
plt.figure(figsize=(10,5))
sns.histplot(rules['lift'], bins=20, kde=True)
plt.xlabel("Lift")
plt.title("Distribution of Lift Values in Association Rules")
st.pyplot(plt)
# Testing Page
elif page == "Testing":
st.header("Test the Model")
item_list = sorted(df['itemDescription'].unique())
selected_item = st.selectbox("Select an item to find associated items:", item_list)
if selected_item:
recommendations = rules[rules['antecedents'].apply(lambda x: selected_item in str(x))]
st.write("### Recommended Items")
if not recommendations.empty:
st.write(recommendations[['antecedents', 'consequents', 'confidence', 'lift']])
else:
st.write("No strong associations found.")