File size: 2,681 Bytes
08dae5c 6a2423a 0e4dfa8 531c80d 0e4dfa8 08dae5c 4fd4e56 08dae5c 4ad2739 4241a4b 08dae5c 0e4dfa8 08dae5c 2c170d3 0e4dfa8 2c170d3 08dae5c 2c170d3 08dae5c 0e4dfa8 08dae5c 0e4dfa8 08dae5c 0e4dfa8 08dae5c 0e4dfa8 08dae5c 0e4dfa8 08dae5c 0e4dfa8 08dae5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from datasets import load_dataset
# -------------------------- Title --------------------------
st.title("🍷 Wine Quality Prediction")
st.write("Using Random Forest on the famous Wine Quality dataset")
# -------------------------- Load Data --------------------------
@st.cache_data
def get_data():
ds = load_dataset("codesignal/wine-quality")
df = ds[list(ds.keys())[0]].to_pandas()
return df
ds = load_dataset("codesignal/wine-quality")
df = ds['test'].to_pandas()
return df
df = get_data()
st.write("Dataset loaded! Here's a preview:")
st.dataframe(df.head())
# -------------------------- Preprocessing --------------------------
X = df.drop("quality", axis=1) # ← fixed: no "Id" column exists
y = df["quality"]
# Make it binary classification: good (≥6) vs bad (<6)
y = (y >= 6).astype(int)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# -------------------------- Train Model --------------------------
@st.cache_resource
def train_model():
model = RandomForestClassifier(
n_estimators=200,
random_state=42,
n_jobs=-1
)
model.fit(X_train_scaled, y_train)
return model
model = train_model()
# Predictions & accuracy
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
st.success(f"Model Accuracy: *{accuracy:.4f}* ({accuracy*100:.2f}%)")
# -------------------------- Interactive Prediction --------------------------
st.header("Predict quality of a new wine")
cols = st.columns(3)
input_data = {}
features = X.columns.tolist()
for i, feature in enumerate(features):
with cols[i % 3]:
val = st.slider(
feature,
float(X[feature].min()),
float(X[feature].max()),
float(X[feature].mean())
)
input_data[feature] = val
if st.button("Predict Quality"):
input_df = pd.DataFrame([input_data])
input_scaled = scaler.transform(input_df)
pred = model.predict(input_scaled)[0]
prob = model.predict_proba(input_scaled)[0]
if pred == 1:
st.balloons()
st.success(f"*Good wine!* 🍾 (confidence: {prob[1]:.2%})")
else:
st.error(f"*Not great wine* 😢 (confidence: {prob[0]:.2%})") |