Spaces:

Riya1217
/

assignment33

Sleeping

App Files Files Community

Riya1217 commited on Sep 21, 2025

Commit

ec41e13

verified ·

1 Parent(s): e19311f

Upload 2 files

Browse files

Files changed (2) hide show

assignment3.py +190 -0
requriements.txt +0 -0

assignment3.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import streamlit as st
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+from sklearn.decomposition import PCA
+from sklearn.cluster import KMeans
+import matplotlib.pyplot as plt
+import numpy as np
+# Set matplotlib backend for Streamlit compatibility
+plt.switch_backend('Agg')
+# --- Application Title and Introduction ---
+st.title('Wine Quality Clustering Insights')
+st.markdown("""
+This application explores insights derived from applying unsupervised learning (PCA and K-Means clustering)
+to a dataset of red wines based on their chemical properties. The goal is to identify distinct
+segments of wines that can inform business strategies related to marketing, production,
+and product development.
+""")
+# --- Data Loading and Preparation ---
+@st.cache_data # Cache the data loading and preprocessing steps
+def load_data():
+    """Loads the dataset and performs preprocessing."""
+    wine_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
+    wine_data = pd.read_csv(wine_url, sep=';')
+    # Separate features and target
+    features = wine_data.drop('quality', axis=1)
+    # Standardize features
+    scaler = StandardScaler()
+    scaled_features = scaler.fit_transform(features)
+    # Apply PCA (keeping components explaining >= 80% variance)
+    pca = PCA(n_components=0.80)
+    pca_features = pca.fit_transform(scaled_features)
+    # Apply KMeans clustering with 3 clusters (based on previous analysis)
+    kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
+    wine_data['Cluster'] = kmeans.fit_predict(pca_features)
+    return wine_data
+# Load the processed data
+wine_data = load_data()
+# --- Methodology Explanation (Expandable Section) ---
+with st.expander("Explain the Methodology"):
+    st.markdown("""
+    This analysis used the following steps:
+    1.  **Data Preparation:** The dataset was loaded and chemical features were standardized to ensure they
+        are on a similar scale.
+    2.  **Dimensionality Reduction (PCA):** Principal Component Analysis was used to reduce the number
+        of features while retaining most of the original data's variance. This helps in handling
+        multicollinearity and preparing data for clustering.
+    3.  **Clustering (K-Means):** K-Means clustering was applied to the reduced data to group wines
+        with similar chemical properties into distinct clusters. We chose 3 clusters based on
+        evaluation metrics like the Elbow and Silhouette methods (performed in the notebook).
+    """)
+# --- User Interface Controls ---
+st.sidebar.header('Explore Clusters')
+# Get unique cluster numbers and sort them
+cluster_numbers = sorted(wine_data['Cluster'].unique())
+# Create a selectbox for cluster selection in the sidebar
+selected_cluster = st.sidebar.selectbox(
+    'Select Cluster',
+    cluster_numbers
+)
+# Get unique quality ratings and sort them
+quality_ratings = sorted(wine_data['quality'].unique())
+# Create a slider for quality rating selection in the sidebar
+selected_quality = st.sidebar.slider(
+    'Select Quality Rating',
+    min_value=min(quality_ratings),
+    max_value=max(quality_ratings),
+    value=min(quality_ratings), # Set a default value
+    step=1 # Ensure only integer quality values are selected
+)
+# --- Implement Visualizations ---
+st.subheader(f'Characteristics for Cluster {selected_cluster}, Quality {selected_quality}')
+# Filter the wine_data DataFrame based on user selection
+filtered_data = wine_data[
+    (wine_data['Cluster'] == selected_cluster) &
+    (wine_data['quality'] == selected_quality)
+]
+# Check if filtered data is empty
+if filtered_data.empty:
+    st.warning(f"No data found for Cluster {selected_cluster} with Quality {selected_quality}.")
+else:
+    # Create a bar chart for average chemical features
+    avg_features = filtered_data.drop(['quality', 'Cluster'], axis=1).mean()
+    fig1, ax1 = plt.subplots(figsize=(10, 5)) # Adjusted figure size
+    avg_features.plot(kind='bar', ax=ax1, color='skyblue') # Added color
+    # Add labels and title to the bar chart
+    ax1.set_xlabel('Chemical Features')
+    ax1.set_ylabel('Average Value')
+    ax1.set_title(f'Average Chemical Features for Cluster {selected_cluster}, Quality {selected_quality}')
+    plt.xticks(rotation=45, ha='right') # Rotate labels for readability
+    fig1.tight_layout() # Adjust layout to prevent labels overlapping
+    # Display the bar chart
+    st.pyplot(fig1)
+    plt.close(fig1) # Close the figure to free memory
+    # Create a scatter plot of 'alcohol' vs 'volatile acidity'
+    fig2, ax2 = plt.subplots(figsize=(8, 5)) # Adjusted figure size
+    ax2.scatter(filtered_data['alcohol'], filtered_data['volatile acidity'], alpha=0.6, color='lightcoral') # Added alpha and color
+    # Add labels and title to the scatter plot
+    ax2.set_xlabel('Alcohol')
+    ax2.set_ylabel('Volatile Acidity')
+    ax2.set_title(f'Alcohol vs Volatile Acidity for Cluster {selected_cluster}, Quality {selected_quality}')
+    ax2.grid(True, linestyle='--', alpha=0.6) # Add grid
+    # Display the scatter plot
+    st.pyplot(fig2)
+    plt.close(fig2) # Close the figure
+# --- Display Dynamic Insights ---
+st.subheader("Cluster Insights and Recommendations")
+# Define cluster insights and recommendations
+cluster_insights = {
+    0: {
+        "Description": "Premium Taste Wines: Balanced acidity, high alcohol, high quality",
+        "Recommendation": "Market to wine connoisseurs; premium pricing; emphasize quality in promotions."
+    },
+    1: {
+        "Description": "Sweet & Mild Wines: Higher sugar, lower acidity, moderate quality",
+        "Recommendation": "Target casual drinkers; affordable pricing; highlight smooth and approachable taste."
+    },
+    2: {
+        "Description": "Sharp & Preservative-heavy Wines: High acidity, higher sulfates, lower quality",
+        "Recommendation": "Target budget-conscious customers; optimize production to reduce sulfates; focus on cost-efficiency."
+    }
+}
+if selected_cluster in cluster_insights:
+    insight = cluster_insights[selected_cluster]
+    st.markdown(f"**Cluster {selected_cluster}:**")
+    st.markdown(f"- **Description:** {insight['Description']}")
+    st.markdown(f"- **Recommendation:** {insight['Recommendation']}")
+    # Add a dynamic message based on quality
+    if selected_quality >= 6:
+        st.info(f"Based on your selection, wines in this segment (Cluster {selected_cluster}, Quality {selected_quality}) show characteristics often associated with *higher quality* wines.")
+    else:
+        st.info(f"Based on your selection, wines in this segment (Cluster {selected_cluster}, Quality {selected_quality}) show characteristics often associated with *moderate to lower quality* wines. This segment might be suitable for value-focused markets or present opportunities for quality improvement.")
+else:
+    st.write("Select a cluster to see insights.")
+# --- Concluding Section ---
+st.markdown("""
+---
+**Key Takeaways:**
+*   The clustering analysis reveals distinct groups of wines based on their chemical composition.
+*   Understanding these clusters allows for targeted marketing and product strategies.
+*   Wines in Cluster 0 tend to align with 'Premium Taste', Cluster 1 with 'Sweet & Mild', and Cluster 2 with 'Sharp & Preservative-heavy'.
+*   Quality ratings within each cluster can vary, providing further granularity for decision-making.
+**Next Steps:**
+*   Validate these clusters with sensory evaluation data.
+*   Integrate these insights into marketing campaigns and production planning.
+*   Explore other clustering algorithms or feature engineering techniques.
+""")
+# --- requirements.txt content ---
+requirements_content = """streamlit
+pandas
+scikit-learn
+matplotlib
+numpy
+"""
+# Print the requirements.txt content for deployment
+print("\n--- requirements.txt content ---")
+print(requirements_content)
+print("--- end requirements.txt content ---")

requriements.txt ADDED Viewed

Binary file (202 Bytes). View file