Riya1217 commited on
Commit
1a2e1fc
·
verified ·
1 Parent(s): 1d6dfd8

Upload 2 files

Browse files
Files changed (1) hide show
  1. assignmentwine.py +111 -0
assignmentwine.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+ from sklearn.preprocessing import StandardScaler
6
+ from sklearn.decomposition import PCA
7
+ from sklearn.cluster import KMeans
8
+ from sklearn.metrics import silhouette_score
9
+
10
+ # Title
11
+ st.title("🍷 Wine Quality Analysis App")
12
+
13
+ # Load dataset
14
+ @st.cache_data
15
+ def load_data():
16
+ url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
17
+ return pd.read_csv(url, sep=";")
18
+
19
+ df = load_data()
20
+ st.subheader("Preview of Dataset")
21
+ st.write(df.head())
22
+
23
+ # PCA
24
+ features = df.drop("quality", axis=1)
25
+ scaler = StandardScaler()
26
+ scaled_features = scaler.fit_transform(features)
27
+
28
+ pca = PCA()
29
+ pca_result = pca.fit_transform(scaled_features)
30
+ explained_variance = np.cumsum(pca.explained_variance_ratio_)
31
+
32
+ st.subheader("PCA Explained Variance")
33
+ fig, ax = plt.subplots()
34
+ ax.plot(range(1, len(explained_variance)+1), explained_variance, marker="o")
35
+ ax.set_xlabel("Number of Principal Components")
36
+ ax.set_ylabel("Cumulative Explained Variance")
37
+ st.pyplot(fig)
38
+
39
+ # Clustering
40
+ pca_features = PCA(n_components=0.85).fit_transform(scaled_features)
41
+
42
+ inertia, silhouette = [], []
43
+ K = range(2, 11)
44
+ for k in K:
45
+ km = KMeans(n_clusters=k, random_state=42, n_init=10)
46
+ labels = km.fit_predict(pca_features)
47
+ inertia.append(km.inertia_)
48
+ silhouette.append(silhouette_score(pca_features, labels))
49
+
50
+ st.subheader("Elbow & Silhouette Method")
51
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
52
+ ax1.plot(K, inertia, marker="o")
53
+ ax1.set_title("Elbow Method")
54
+ ax1.set_xlabel("Clusters")
55
+ ax1.set_ylabel("Inertia")
56
+
57
+ ax2.plot(K, silhouette, marker="o", color="orange")
58
+ ax2.set_title("Silhouette Score")
59
+ ax2.set_xlabel("Clusters")
60
+ ax2.set_ylabel("Score")
61
+ st.pyplot(fig)
62
+
63
+ # Apply clustering with 3 clusters
64
+ kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
65
+ df["Cluster"] = kmeans.fit_predict(pca_features)
66
+
67
+ st.subheader("Cluster Profiles")
68
+ st.write(df.groupby("Cluster").mean())
69
+
70
+ # Business Insights
71
+ cluster_insights = {
72
+ 0: "Premium Taste Wines: High alcohol, balanced acidity, high quality",
73
+ 1: "Sweet & Mild Wines: High sugar, low acidity, moderate quality",
74
+ 2: "Sharp & Preservative-heavy Wines: High acidity, high sulfates, lower quality"
75
+ }
76
+ st.subheader("Business Insights")
77
+ for cluster, desc in cluster_insights.items():
78
+ st.write(f"**Cluster {cluster}:** {desc}")
79
+
80
+ # ----------------------
81
+ # Interactive Section
82
+ # ----------------------
83
+ st.subheader("🍷 Explore Wines Interactively")
84
+
85
+ # Slider for alcohol content
86
+ alcohol_val = st.slider(
87
+ "Select minimum alcohol content",
88
+ float(df['alcohol'].min()),
89
+ float(df['alcohol'].max()),
90
+ float(df['alcohol'].min())
91
+ )
92
+ filtered_df = df[df['alcohol'] >= alcohol_val]
93
+ st.write(f"Wines with alcohol ≥ {alcohol_val}")
94
+ st.dataframe(filtered_df)
95
+
96
+ # Slider for pH
97
+ ph_val = st.slider(
98
+ "Select maximum pH",
99
+ float(df['pH'].min()),
100
+ float(df['pH'].max()),
101
+ float(df['pH'].max())
102
+ )
103
+ ph_filtered = filtered_df[filtered_df['pH'] <= ph_val]
104
+ st.write(f"Wines with alcohol ≥ {alcohol_val} and pH ≤ {ph_val}")
105
+ st.dataframe(ph_filtered)
106
+
107
+ # Dropdown for cluster selection
108
+ cluster_select = st.selectbox("Select Cluster to View", options=sorted(df['Cluster'].unique()))
109
+ cluster_filtered = df[df['Cluster'] == cluster_select]
110
+ st.write(f"Wines in Cluster {cluster_select}")
111
+ st.dataframe(cluster_filtered)