|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
from sklearn.ensemble import RandomForestClassifier |
|
|
from sklearn.metrics import accuracy_score |
|
|
from datasets import load_dataset |
|
|
|
|
|
|
|
|
st.title("π· Wine Quality Prediction") |
|
|
st.write("Using Random Forest on the famous Wine Quality dataset") |
|
|
|
|
|
|
|
|
@st.cache_data |
|
|
def get_data(): |
|
|
ds = load_dataset("codesignal/wine-quality") |
|
|
|
|
|
df = ds[list(ds.keys())[0]].to_pandas() |
|
|
|
|
|
return df |
|
|
ds = load_dataset("codesignal/wine-quality") |
|
|
df = ds['test'].to_pandas() |
|
|
return df |
|
|
|
|
|
df = get_data() |
|
|
st.write("Dataset loaded! Here's a preview:") |
|
|
st.dataframe(df.head()) |
|
|
|
|
|
X = df.drop("quality", axis=1) |
|
|
y = df["quality"] |
|
|
|
|
|
|
|
|
y = (y >= 6).astype(int) |
|
|
|
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split( |
|
|
X, y, test_size=0.2, random_state=42, stratify=y |
|
|
) |
|
|
|
|
|
|
|
|
scaler = StandardScaler() |
|
|
X_train_scaled = scaler.fit_transform(X_train) |
|
|
X_test_scaled = scaler.transform(X_test) |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def train_model(): |
|
|
model = RandomForestClassifier( |
|
|
n_estimators=200, |
|
|
random_state=42, |
|
|
n_jobs=-1 |
|
|
) |
|
|
model.fit(X_train_scaled, y_train) |
|
|
return model |
|
|
|
|
|
model = train_model() |
|
|
|
|
|
|
|
|
y_pred = model.predict(X_test_scaled) |
|
|
accuracy = accuracy_score(y_test, y_pred) |
|
|
|
|
|
st.success(f"Model Accuracy: *{accuracy:.4f}* ({accuracy*100:.2f}%)") |
|
|
|
|
|
|
|
|
st.header("Predict quality of a new wine") |
|
|
cols = st.columns(3) |
|
|
|
|
|
input_data = {} |
|
|
features = X.columns.tolist() |
|
|
|
|
|
for i, feature in enumerate(features): |
|
|
with cols[i % 3]: |
|
|
val = st.slider( |
|
|
feature, |
|
|
float(X[feature].min()), |
|
|
float(X[feature].max()), |
|
|
float(X[feature].mean()) |
|
|
) |
|
|
input_data[feature] = val |
|
|
|
|
|
if st.button("Predict Quality"): |
|
|
input_df = pd.DataFrame([input_data]) |
|
|
input_scaled = scaler.transform(input_df) |
|
|
pred = model.predict(input_scaled)[0] |
|
|
prob = model.predict_proba(input_scaled)[0] |
|
|
|
|
|
if pred == 1: |
|
|
st.balloons() |
|
|
st.success(f"*Good wine!* πΎ (confidence: {prob[1]:.2%})") |
|
|
else: |
|
|
st.error(f"*Not great wine* π’ (confidence: {prob[0]:.2%})") |