trial / src /streamlit_app.py
Man0707's picture
Update src/streamlit_app.py
2c170d3 verified
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from datasets import load_dataset
# -------------------------- Title --------------------------
st.title("🍷 Wine Quality Prediction")
st.write("Using Random Forest on the famous Wine Quality dataset")
# -------------------------- Load Data --------------------------
@st.cache_data
def get_data():
ds = load_dataset("codesignal/wine-quality")
df = ds[list(ds.keys())[0]].to_pandas()
return df
ds = load_dataset("codesignal/wine-quality")
df = ds['test'].to_pandas()
return df
df = get_data()
st.write("Dataset loaded! Here's a preview:")
st.dataframe(df.head())
# -------------------------- Preprocessing --------------------------
X = df.drop("quality", axis=1) # ← fixed: no "Id" column exists
y = df["quality"]
# Make it binary classification: good (β‰₯6) vs bad (<6)
y = (y >= 6).astype(int)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# -------------------------- Train Model --------------------------
@st.cache_resource
def train_model():
model = RandomForestClassifier(
n_estimators=200,
random_state=42,
n_jobs=-1
)
model.fit(X_train_scaled, y_train)
return model
model = train_model()
# Predictions & accuracy
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
st.success(f"Model Accuracy: *{accuracy:.4f}* ({accuracy*100:.2f}%)")
# -------------------------- Interactive Prediction --------------------------
st.header("Predict quality of a new wine")
cols = st.columns(3)
input_data = {}
features = X.columns.tolist()
for i, feature in enumerate(features):
with cols[i % 3]:
val = st.slider(
feature,
float(X[feature].min()),
float(X[feature].max()),
float(X[feature].mean())
)
input_data[feature] = val
if st.button("Predict Quality"):
input_df = pd.DataFrame([input_data])
input_scaled = scaler.transform(input_df)
pred = model.predict(input_scaled)[0]
prob = model.predict_proba(input_scaled)[0]
if pred == 1:
st.balloons()
st.success(f"*Good wine!* 🍾 (confidence: {prob[1]:.2%})")
else:
st.error(f"*Not great wine* 😒 (confidence: {prob[0]:.2%})")