import streamlit as st import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score # -------------------------- Page Config -------------------------- st.set_page_config( page_title="Wine Quality Master 🍷", page_icon="🍇", layout="centered", initial_sidebar_state="expanded" ) # -------------------------- Custom CSS - Dark Purple Magic -------------------------- st.markdown(""" """, unsafe_allow_html=True) # -------------------------- Load Full Wine Quality Dataset (Red + White) -------------------------- @st.cache_data def load_wine_data(): # Red wine red = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=";") red["type"] = "Red" # White wine white = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep=";") white["type"] = "White" # Combine df = pd.concat([red, white], ignore_index=True) # Binary classification: Good (>=6), Bad (<6) df["is_good"] = (df["quality"] >= 6).astype(int) return df df = load_wine_data() # -------------------------- Hero Section -------------------------- col1, col2, col3 = st.columns([1,3,1]) with col2: st.markdown("

Wine Quality Master

", unsafe_allow_html=True) st.markdown("

VinoVerdict

", unsafe_allow_html=True) st.markdown("

Red or White – Will it be divine... or declined?

", unsafe_allow_html=True) st.markdown("---") # -------------------------- Dataset Info -------------------------- with st.container(): st.markdown("

", unsafe_allow_html=True) col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Total Wines", f"{len(df):,}") with col2: st.metric("Red Wines", f"{len(df[df['type']=='Red']):,}", "Red Wine") with col3: st.metric("White Wines", f"{len(df[df['type']=='White']):,}", "White Wine") with col4: st.metric("Good Wines (≥6)", f"{df['is_good'].sum():,}") st.markdown("
", unsafe_allow_html=True) st.dataframe(df.head(), use_container_width=True) st.markdown("

", unsafe_allow_html=True) # -------------------------- Prepare Features -------------------------- X = df.drop(columns=["quality", "is_good"]) y = df["is_good"] # One-hot encode wine type (ensures consistent column order) X = pd.get_dummies(X, columns=["type"], drop_first=False) # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) # Scale numerical features (preserve exact column order) scaler = StandardScaler() numerical_cols = [col for col in X.columns if col not in ["type_Red", "type_White"]] X_train_num_scaled = scaler.fit_transform(X_train[numerical_cols]) X_train = pd.concat([ pd.DataFrame(X_train_num_scaled, columns=numerical_cols, index=X_train.index), X_train[["type_Red", "type_White"]] ], axis=1) X_test_num_scaled = scaler.transform(X_test[numerical_cols]) X_test = pd.concat([ pd.DataFrame(X_test_num_scaled, columns=numerical_cols, index=X_test.index), X_test[["type_Red", "type_White"]] ], axis=1) # Train model @st.cache_resource def train_model(): model = RandomForestClassifier( n_estimators=1000, max_depth=15, random_state=42, n_jobs=-1, class_weight="balanced" ) model.fit(X_train, y_train) return model model = train_model() # Accuracy y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) with st.container(): st.markdown("

", unsafe_allow_html=True) st.success(f"Model Accuracy on Test Set: *{accuracy:.4f}* ({accuracy*100:.2f}%)") st.markdown("

", unsafe_allow_html=True) # -------------------------- Interactive Prediction -------------------------- st.markdown("

", unsafe_allow_html=True) st.header("Predict Your Wine's Destiny") # Wine type selector wine_type = st.selectbox("Choose Wine Type", options=["Red", "White"], index=0) col1, col2 = st.columns(2) input_data = {"type_Red": 0, "type_White": 0} input_data[f"type_{wine_type}"] = 1 features = [col for col in X.columns if col not in ["type_Red", "type_White"]] for i, feature in enumerate(features): col = col1 if i % 2 == 0 else col2 with col: min_v, max_v = float(df[feature].min()), float(df[feature].max()) mean_v = float(df[feature].mean()) val = st.slider( feature.replace("_", " ").title(), min_value=min_v, max_value=max_v, value=mean_v, step=0.1, format="%.2f" ) input_data[feature] = val if st.button("Reveal the Quality!", use_container_width=True, type="primary"): input_df = pd.DataFrame([input_data]) # Scale numerical columns (FIX: Reconstruct to match exact column order) input_num_scaled = scaler.transform(input_df[numerical_cols]) input_scaled = pd.concat([ pd.DataFrame(input_num_scaled, columns=numerical_cols, index=input_df.index), input_df[["type_Red", "type_White"]] ], axis=1) pred = model.predict(input_scaled)[0] prob = model.predict_proba(input_scaled)[0] st.markdown("
", unsafe_allow_html=True) if pred == 1: st.balloons() st.markdown(f"

OUTSTANDING WINE! {wine_type} Wine

", unsafe_allow_html=True) st.success(f"Confidence: *{prob[1]:.1%}* – This belongs in a museum... or your glass right now!") else: st.error(f"Not quite a masterpiece... {wine_type} Wine") st.warning(f"Confidence: *{prob[0]:.1%}* – Maybe use it for cooking?") # Feature importance hint st.info(f"Pro tip: For {wine_type.lower()} wines, alcohol, sulphates, and volatile acidity matter most!") st.markdown("

", unsafe_allow_html=True) # -------------------------- Footer -------------------------- st.markdown("---") st.caption("Made with passion | Dataset: UCI Wine Quality (Red + White) | Model: Random Forest |Created by: MAN JETANI")