# app.py # Wine Quality Predictor – Fixed & Bulletproof (November 2025) # 100% original, self-contained synthetic data, zero external links import streamlit as st import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score # ------------------ Page Config ------------------ st.set_page_config(page_title="Wine Judge", page_icon="Wine Glass", layout="centered") # ------------------ Style ------------------ st.markdown(""" """, unsafe_allow_html=True) # ------------------ Synthetic Data (no internet needed) ------------------ @st.cache_data def make_data(n=600): np.random.seed(42) data = pd.DataFrame({ 'fixed_acidity' : np.random.uniform(4, 16, n), 'volatile_acidity' : np.random.uniform(0.08, 1.6, n), 'citric_acid' : np.random.uniform(0, 1, n), 'residual_sugar' : np.random.uniform(0.5, 20, n), 'chlorides' : np.random.uniform(0.005, 0.4, n), 'free_sulfur_dioxide' : np.random.uniform(1, 80, n), 'total_sulfur_dioxide': np.random.uniform(6, 300, n), 'density' : np.random.uniform(0.987, 1.01, n), 'pH' : np.random.uniform(2.7, 4.0, n), 'sulphates' : np.random.uniform(0.3, 2.0, n), 'alcohol' : np.random.uniform(8, 15, n), }) data['wine_type'] = np.random.choice(['Red', 'White'], n, p=[0.4, 0.6]) # Simple but realistic quality formula quality = (data['alcohol']*0.8 - data['volatile_acidity']*3 + data['sulphates']*2 + + np.random.normal(0,1,n)).clip(3,9).astype(int) data['quality'] = quality data['good_wine'] = (quality >= 6).astype(int) return data df = make_data() st.markdown("
Legendary or forgettable?
", unsafe_allow_html=True) # Stats c1,c2,c3 = st.columns(3) c1.metric("Total Bottles", len(df)) c2.metric("Red", len(df[df.wine_type=='Red'])) c3.metric("White", len(df[df.wine_type=='White'])) # ------------------ Model ------------------ X = df.drop(columns=['quality','good_wine']) y = df['good_wine'] X = pd.get_dummies(X, columns=['wine_type'], drop_first=False) # keep both columns # Save the exact column order for later TRAIN_COLUMNS = X.columns.tolist() scaler = StandardScaler() X[TRAIN_COLUMNS] = scaler.fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) @st.cache_resource def get_model(): clf = RandomForestClassifier(n_estimators=300, max_depth=12, random_state=42, class_weight='balanced', n_jobs=-1) clf.fit(X_train, y_train) return clf model = get_model() acc = accuracy_score(y_test, model.predict(X_test)) st.success(f"Model Accuracy: {acc:.1%}") # ------------------ Prediction ------------------ st.markdown("