Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from mlxtend.frequent_patterns import apriori, association_rules | |
| # Load dataset | |
| def load_data(): | |
| file_path = "Groceries_dataset.csv" | |
| df = pd.read_csv(file_path) | |
| return df | |
| # Preprocess dataset for Apriori Algorithm | |
| def preprocess_data(df): | |
| basket = df.groupby(['Member_number', 'itemDescription'])['Date'].count().unstack().fillna(0) | |
| basket = basket.applymap(lambda x: 1 if x > 0 else 0) | |
| return basket | |
| # Train Apriori Model | |
| def train_apriori(basket, min_support=0.01): | |
| frequent_itemsets = apriori(basket, min_support=min_support, use_colnames=True) | |
| rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1) | |
| return frequent_itemsets, rules | |
| # Streamlit UI | |
| st.title("Market Basket Analysis using Apriori Algorithm") | |
| # Sidebar Navigation | |
| st.sidebar.title("Navigation") | |
| page = st.sidebar.radio("Go to", ["Introduction", "Data Exploration", "Results", "Testing"]) | |
| # Load and preprocess data | |
| df = load_data() | |
| basket = preprocess_data(df) | |
| frequent_itemsets, rules = train_apriori(basket) | |
| # Introduction Page | |
| if page == "Introduction": | |
| st.header("Introduction") | |
| st.write("This application uses the Apriori algorithm to find associations between different grocery items.") | |
| st.write("### How to Use:") | |
| st.write("- Navigate using the sidebar.") | |
| st.write("- Explore the dataset and model results.") | |
| st.write("- Test the model by selecting an item to get predictions.") | |
| # Data Exploration Page | |
| elif page == "Data Exploration": | |
| st.header("Dataset Overview") | |
| st.write("### Sample Data") | |
| st.write(df.head()) | |
| st.write("### Unique Items") | |
| st.write(len(df['itemDescription'].unique())) | |
| # Plot Most Frequent Items | |
| st.write("### Most Frequent Items") | |
| item_counts = df['itemDescription'].value_counts().head(10) | |
| plt.figure(figsize=(10,5)) | |
| sns.barplot(x=item_counts.values, y=item_counts.index) | |
| plt.xlabel("Count") | |
| plt.ylabel("Item") | |
| plt.title("Top 10 Most Purchased Items") | |
| st.pyplot(plt) | |
| # Results Page | |
| elif page == "Results": | |
| st.header("Apriori Model Results") | |
| st.write("### Frequent Itemsets") | |
| st.write(frequent_itemsets) | |
| st.write("### Association Rules") | |
| st.write(rules) | |
| # Plot Lift Distribution | |
| st.write("### Lift Distribution") | |
| plt.figure(figsize=(10,5)) | |
| sns.histplot(rules['lift'], bins=20, kde=True) | |
| plt.xlabel("Lift") | |
| plt.title("Distribution of Lift Values in Association Rules") | |
| st.pyplot(plt) | |
| # Testing Page | |
| elif page == "Testing": | |
| st.header("Test the Model") | |
| item_list = sorted(df['itemDescription'].unique()) | |
| selected_item = st.selectbox("Select an item to find associated items:", item_list) | |
| if selected_item: | |
| recommendations = rules[rules['antecedents'].apply(lambda x: selected_item in str(x))] | |
| st.write("### Recommended Items") | |
| if not recommendations.empty: | |
| st.write(recommendations[['antecedents', 'consequents', 'confidence', 'lift']]) | |
| else: | |
| st.write("No strong associations found.") |