assignment3 / assignmentwine.py
Riya1217's picture
Upload 2 files
1a2e1fc verified
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
# Title
st.title("🍷 Wine Quality Analysis App")
# Load dataset
@st.cache_data
def load_data():
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
return pd.read_csv(url, sep=";")
df = load_data()
st.subheader("Preview of Dataset")
st.write(df.head())
# PCA
features = df.drop("quality", axis=1)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
pca = PCA()
pca_result = pca.fit_transform(scaled_features)
explained_variance = np.cumsum(pca.explained_variance_ratio_)
st.subheader("PCA Explained Variance")
fig, ax = plt.subplots()
ax.plot(range(1, len(explained_variance)+1), explained_variance, marker="o")
ax.set_xlabel("Number of Principal Components")
ax.set_ylabel("Cumulative Explained Variance")
st.pyplot(fig)
# Clustering
pca_features = PCA(n_components=0.85).fit_transform(scaled_features)
inertia, silhouette = [], []
K = range(2, 11)
for k in K:
km = KMeans(n_clusters=k, random_state=42, n_init=10)
labels = km.fit_predict(pca_features)
inertia.append(km.inertia_)
silhouette.append(silhouette_score(pca_features, labels))
st.subheader("Elbow & Silhouette Method")
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
ax1.plot(K, inertia, marker="o")
ax1.set_title("Elbow Method")
ax1.set_xlabel("Clusters")
ax1.set_ylabel("Inertia")
ax2.plot(K, silhouette, marker="o", color="orange")
ax2.set_title("Silhouette Score")
ax2.set_xlabel("Clusters")
ax2.set_ylabel("Score")
st.pyplot(fig)
# Apply clustering with 3 clusters
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
df["Cluster"] = kmeans.fit_predict(pca_features)
st.subheader("Cluster Profiles")
st.write(df.groupby("Cluster").mean())
# Business Insights
cluster_insights = {
0: "Premium Taste Wines: High alcohol, balanced acidity, high quality",
1: "Sweet & Mild Wines: High sugar, low acidity, moderate quality",
2: "Sharp & Preservative-heavy Wines: High acidity, high sulfates, lower quality"
}
st.subheader("Business Insights")
for cluster, desc in cluster_insights.items():
st.write(f"**Cluster {cluster}:** {desc}")
# ----------------------
# Interactive Section
# ----------------------
st.subheader("🍷 Explore Wines Interactively")
# Slider for alcohol content
alcohol_val = st.slider(
"Select minimum alcohol content",
float(df['alcohol'].min()),
float(df['alcohol'].max()),
float(df['alcohol'].min())
)
filtered_df = df[df['alcohol'] >= alcohol_val]
st.write(f"Wines with alcohol ≥ {alcohol_val}")
st.dataframe(filtered_df)
# Slider for pH
ph_val = st.slider(
"Select maximum pH",
float(df['pH'].min()),
float(df['pH'].max()),
float(df['pH'].max())
)
ph_filtered = filtered_df[filtered_df['pH'] <= ph_val]
st.write(f"Wines with alcohol ≥ {alcohol_val} and pH ≤ {ph_val}")
st.dataframe(ph_filtered)
# Dropdown for cluster selection
cluster_select = st.selectbox("Select Cluster to View", options=sorted(df['Cluster'].unique()))
cluster_filtered = df[df['Cluster'] == cluster_select]
st.write(f"Wines in Cluster {cluster_select}")
st.dataframe(cluster_filtered)