Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| from mlxtend.frequent_patterns import fpgrowth | |
| import itertools | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| # Load the dataset | |
| def load_data(): | |
| try: | |
| df = pd.read_csv("retail_sales_dataset.csv") | |
| return df | |
| except Exception as e: | |
| st.error(f"Error loading dataset: {e}") | |
| return None | |
| def preprocess_data(df): | |
| """Prepares data for Eclat Algorithm.""" | |
| if df is not None and 'Transaction ID' in df.columns and 'Product Category' in df.columns: | |
| transactions = df.groupby(['Transaction ID'])['Product Category'].apply(list) | |
| unique_items = set(itertools.chain.from_iterable(transactions)) | |
| basket_encoded = transactions.apply(lambda x: {item: (item in x) for item in unique_items}) | |
| basket_encoded = basket_encoded.apply(pd.Series).fillna(False) | |
| return transactions, basket_encoded | |
| else: | |
| st.error("Dataset does not contain expected columns: 'Transaction ID' and 'Product Category'.") | |
| return None, None | |
| # Load and preprocess data | |
| df = load_data() | |
| transactions, basket_data = (None, None) | |
| if df is not None: | |
| transactions, basket_data = preprocess_data(df) | |
| frequent_itemsets = fpgrowth(basket_data, min_support=0.05, use_colnames=True) if basket_data is not None else None | |
| title_text = "Eclat Algorithm - Buying Pattern Discovery" | |
| st.title(title_text) | |
| st.sidebar.title("Navigation") | |
| st.sidebar.write("Use the sidebar to navigate through different sections of the app.") | |
| section = st.sidebar.radio("Go to", ("Introduction", "Data Exploration", "Results", "Testing")) | |
| if section == "Introduction": | |
| st.header("Introduction") | |
| st.write("This application analyzes retail sales data to discover hidden buying patterns using the Eclat Algorithm.") | |
| st.write("Navigate using the sidebar to explore the dataset, view results, or test the model with your own inputs.") | |
| st.write("### How to Use This App:") | |
| st.write("1. Explore the dataset in the 'Data Exploration' section.") | |
| st.write("2. View frequent product purchase patterns in the 'Results' section.") | |
| st.write("3. Test the model by entering products in the 'Testing' section.") | |
| elif section == "Data Exploration" and df is not None: | |
| st.header("Data Exploration") | |
| st.write("### First 5 Rows of the Dataset") | |
| st.write(df.head()) | |
| st.write("### Data Overview") | |
| st.write(df.describe()) | |
| # Visualizations | |
| st.write("### Product Category Distribution in Transactions") | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| df['Product Category'].value_counts().plot(kind='bar', ax=(arr:=plt.gca())) | |
| plt.xticks(rotation=90) | |
| plt.ylabel("Count") | |
| plt.title("Distribution of Product Categories") | |
| st.pyplot(fig) | |
| if section == "Results": | |
| st.header("Results") | |
| if basket_data is not None: | |
| st.write("### Frequent Itemsets") | |
| frequent_itemsets = fpgrowth(basket_data, min_support=0.02, use_colnames=True) | |
| st.dataframe(frequent_itemsets) | |
| else: | |
| st.error("No transaction data available. Please check the dataset.") | |
| if section == "Testing": | |
| if basket_data is not None and frequent_itemsets is not None: | |
| st.header("Test the Model") | |
| unique_items = list(basket_data.columns) | |
| user_input = st.multiselect("Select products to see associated patterns:", unique_items) | |
| if user_input: | |
| user_set = set(user_input) | |
| matching_sets = frequent_itemsets[frequent_itemsets["itemsets"].apply(lambda x: user_set.issubset(x))] | |
| st.write("### Matching Itemsets") | |
| st.dataframe(matching_sets) | |
| else: | |
| st.write("Please select at least one product to see associations.") | |
| else: | |
| st.error("No transaction data available. Please check the dataset structure.") | |