Spaces:

mrciomnl
/

association_rule_learning

Sleeping

App Files Files Community

association_rule_learning / app.py

mrciomnl

initial commit

728589d about 1 year ago

raw

history blame contribute delete

3.16 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	from mlxtend.frequent_patterns import apriori, association_rules

	# Load dataset
	def load_data():
	file_path = "Groceries_dataset.csv"
	df = pd.read_csv(file_path)
	return df

	# Preprocess dataset for Apriori Algorithm
	def preprocess_data(df):
	basket = df.groupby(['Member_number', 'itemDescription'])['Date'].count().unstack().fillna(0)
	basket = basket.applymap(lambda x: 1 if x > 0 else 0)
	return basket

	# Train Apriori Model
	def train_apriori(basket, min_support=0.01):
	frequent_itemsets = apriori(basket, min_support=min_support, use_colnames=True)
	rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
	return frequent_itemsets, rules

	# Streamlit UI
	st.title("Market Basket Analysis using Apriori Algorithm")

	# Sidebar Navigation
	st.sidebar.title("Navigation")
	page = st.sidebar.radio("Go to", ["Introduction", "Data Exploration", "Results", "Testing"])

	# Load and preprocess data
	df = load_data()
	basket = preprocess_data(df)
	frequent_itemsets, rules = train_apriori(basket)

	# Introduction Page
	if page == "Introduction":
	st.header("Introduction")
	st.write("This application uses the Apriori algorithm to find associations between different grocery items.")
	st.write("### How to Use:")
	st.write("- Navigate using the sidebar.")
	st.write("- Explore the dataset and model results.")
	st.write("- Test the model by selecting an item to get predictions.")

	# Data Exploration Page
	elif page == "Data Exploration":
	st.header("Dataset Overview")
	st.write("### Sample Data")
	st.write(df.head())
	st.write("### Unique Items")
	st.write(len(df['itemDescription'].unique()))

	# Plot Most Frequent Items
	st.write("### Most Frequent Items")
	item_counts = df['itemDescription'].value_counts().head(10)
	plt.figure(figsize=(10,5))
	sns.barplot(x=item_counts.values, y=item_counts.index)
	plt.xlabel("Count")
	plt.ylabel("Item")
	plt.title("Top 10 Most Purchased Items")
	st.pyplot(plt)

	# Results Page
	elif page == "Results":
	st.header("Apriori Model Results")
	st.write("### Frequent Itemsets")
	st.write(frequent_itemsets)

	st.write("### Association Rules")
	st.write(rules)

	# Plot Lift Distribution
	st.write("### Lift Distribution")
	plt.figure(figsize=(10,5))
	sns.histplot(rules['lift'], bins=20, kde=True)
	plt.xlabel("Lift")
	plt.title("Distribution of Lift Values in Association Rules")
	st.pyplot(plt)

	# Testing Page
	elif page == "Testing":
	st.header("Test the Model")
	item_list = sorted(df['itemDescription'].unique())
	selected_item = st.selectbox("Select an item to find associated items:", item_list)

	if selected_item:
	recommendations = rules[rules['antecedents'].apply(lambda x: selected_item in str(x))]
	st.write("### Recommended Items")
	if not recommendations.empty:
	st.write(recommendations[['antecedents', 'consequents', 'confidence', 'lift']])
	else:
	st.write("No strong associations found.")