Spaces:

Riya1217
/

assignment3

Sleeping

App Files Files Community

assignment3 / assignmentwine.py

Riya1217

Upload 2 files

1a2e1fc verified 5 months ago

raw

history blame contribute delete

3.5 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	import numpy as np
	from sklearn.preprocessing import StandardScaler
	from sklearn.decomposition import PCA
	from sklearn.cluster import KMeans
	from sklearn.metrics import silhouette_score

	# Title
	st.title("🍷 Wine Quality Analysis App")

	# Load dataset
	@st.cache_data
	def load_data():
	url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
	return pd.read_csv(url, sep=";")

	df = load_data()
	st.subheader("Preview of Dataset")
	st.write(df.head())

	# PCA
	features = df.drop("quality", axis=1)
	scaler = StandardScaler()
	scaled_features = scaler.fit_transform(features)

	pca = PCA()
	pca_result = pca.fit_transform(scaled_features)
	explained_variance = np.cumsum(pca.explained_variance_ratio_)

	st.subheader("PCA Explained Variance")
	fig, ax = plt.subplots()
	ax.plot(range(1, len(explained_variance)+1), explained_variance, marker="o")
	ax.set_xlabel("Number of Principal Components")
	ax.set_ylabel("Cumulative Explained Variance")
	st.pyplot(fig)

	# Clustering
	pca_features = PCA(n_components=0.85).fit_transform(scaled_features)

	inertia, silhouette = [], []
	K = range(2, 11)
	for k in K:
	km = KMeans(n_clusters=k, random_state=42, n_init=10)
	labels = km.fit_predict(pca_features)
	inertia.append(km.inertia_)
	silhouette.append(silhouette_score(pca_features, labels))

	st.subheader("Elbow & Silhouette Method")
	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
	ax1.plot(K, inertia, marker="o")
	ax1.set_title("Elbow Method")
	ax1.set_xlabel("Clusters")
	ax1.set_ylabel("Inertia")

	ax2.plot(K, silhouette, marker="o", color="orange")
	ax2.set_title("Silhouette Score")
	ax2.set_xlabel("Clusters")
	ax2.set_ylabel("Score")
	st.pyplot(fig)

	# Apply clustering with 3 clusters
	kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
	df["Cluster"] = kmeans.fit_predict(pca_features)

	st.subheader("Cluster Profiles")
	st.write(df.groupby("Cluster").mean())

	# Business Insights
	cluster_insights = {
	0: "Premium Taste Wines: High alcohol, balanced acidity, high quality",
	1: "Sweet & Mild Wines: High sugar, low acidity, moderate quality",
	2: "Sharp & Preservative-heavy Wines: High acidity, high sulfates, lower quality"
	}
	st.subheader("Business Insights")
	for cluster, desc in cluster_insights.items():
	st.write(f"Cluster {cluster}: {desc}")

	# ----------------------
	# Interactive Section
	# ----------------------
	st.subheader("🍷 Explore Wines Interactively")

	# Slider for alcohol content
	alcohol_val = st.slider(
	"Select minimum alcohol content",
	float(df['alcohol'].min()),
	float(df['alcohol'].max()),
	float(df['alcohol'].min())
	)
	filtered_df = df[df['alcohol'] >= alcohol_val]
	st.write(f"Wines with alcohol ≥ {alcohol_val}")
	st.dataframe(filtered_df)

	# Slider for pH
	ph_val = st.slider(
	"Select maximum pH",
	float(df['pH'].min()),
	float(df['pH'].max()),
	float(df['pH'].max())
	)
	ph_filtered = filtered_df[filtered_df['pH'] <= ph_val]
	st.write(f"Wines with alcohol ≥ {alcohol_val} and pH ≤ {ph_val}")
	st.dataframe(ph_filtered)

	# Dropdown for cluster selection
	cluster_select = st.selectbox("Select Cluster to View", options=sorted(df['Cluster'].unique()))
	cluster_filtered = df[df['Cluster'] == cluster_select]
	st.write(f"Wines in Cluster {cluster_select}")
	st.dataframe(cluster_filtered)