Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.decomposition import PCA | |
| from sklearn.cluster import KMeans | |
| from sklearn.metrics import silhouette_score | |
| # Title | |
| st.title("🍷 Wine Quality Analysis App") | |
| # Load dataset | |
| def load_data(): | |
| url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv" | |
| return pd.read_csv(url, sep=";") | |
| df = load_data() | |
| st.subheader("Preview of Dataset") | |
| st.write(df.head()) | |
| # PCA | |
| features = df.drop("quality", axis=1) | |
| scaler = StandardScaler() | |
| scaled_features = scaler.fit_transform(features) | |
| pca = PCA() | |
| pca_result = pca.fit_transform(scaled_features) | |
| explained_variance = np.cumsum(pca.explained_variance_ratio_) | |
| st.subheader("PCA Explained Variance") | |
| fig, ax = plt.subplots() | |
| ax.plot(range(1, len(explained_variance)+1), explained_variance, marker="o") | |
| ax.set_xlabel("Number of Principal Components") | |
| ax.set_ylabel("Cumulative Explained Variance") | |
| st.pyplot(fig) | |
| # Clustering | |
| pca_features = PCA(n_components=0.85).fit_transform(scaled_features) | |
| inertia, silhouette = [], [] | |
| K = range(2, 11) | |
| for k in K: | |
| km = KMeans(n_clusters=k, random_state=42, n_init=10) | |
| labels = km.fit_predict(pca_features) | |
| inertia.append(km.inertia_) | |
| silhouette.append(silhouette_score(pca_features, labels)) | |
| st.subheader("Elbow & Silhouette Method") | |
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) | |
| ax1.plot(K, inertia, marker="o") | |
| ax1.set_title("Elbow Method") | |
| ax1.set_xlabel("Clusters") | |
| ax1.set_ylabel("Inertia") | |
| ax2.plot(K, silhouette, marker="o", color="orange") | |
| ax2.set_title("Silhouette Score") | |
| ax2.set_xlabel("Clusters") | |
| ax2.set_ylabel("Score") | |
| st.pyplot(fig) | |
| # Apply clustering with 3 clusters | |
| kmeans = KMeans(n_clusters=3, random_state=42, n_init=10) | |
| df["Cluster"] = kmeans.fit_predict(pca_features) | |
| st.subheader("Cluster Profiles") | |
| st.write(df.groupby("Cluster").mean()) | |
| # Business Insights | |
| cluster_insights = { | |
| 0: "Premium Taste Wines: High alcohol, balanced acidity, high quality", | |
| 1: "Sweet & Mild Wines: High sugar, low acidity, moderate quality", | |
| 2: "Sharp & Preservative-heavy Wines: High acidity, high sulfates, lower quality" | |
| } | |
| st.subheader("Business Insights") | |
| for cluster, desc in cluster_insights.items(): | |
| st.write(f"**Cluster {cluster}:** {desc}") | |
| # ---------------------- | |
| # Interactive Section | |
| # ---------------------- | |
| st.subheader("🍷 Explore Wines Interactively") | |
| # Slider for alcohol content | |
| alcohol_val = st.slider( | |
| "Select minimum alcohol content", | |
| float(df['alcohol'].min()), | |
| float(df['alcohol'].max()), | |
| float(df['alcohol'].min()) | |
| ) | |
| filtered_df = df[df['alcohol'] >= alcohol_val] | |
| st.write(f"Wines with alcohol ≥ {alcohol_val}") | |
| st.dataframe(filtered_df) | |
| # Slider for pH | |
| ph_val = st.slider( | |
| "Select maximum pH", | |
| float(df['pH'].min()), | |
| float(df['pH'].max()), | |
| float(df['pH'].max()) | |
| ) | |
| ph_filtered = filtered_df[filtered_df['pH'] <= ph_val] | |
| st.write(f"Wines with alcohol ≥ {alcohol_val} and pH ≤ {ph_val}") | |
| st.dataframe(ph_filtered) | |
| # Dropdown for cluster selection | |
| cluster_select = st.selectbox("Select Cluster to View", options=sorted(df['Cluster'].unique())) | |
| cluster_filtered = df[df['Cluster'] == cluster_select] | |
| st.write(f"Wines in Cluster {cluster_select}") | |
| st.dataframe(cluster_filtered) | |