Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.metrics import accuracy_score | |
| from datasets import load_dataset | |
| # -------------------------- Title -------------------------- | |
| st.title("π· Wine Quality Prediction") | |
| st.write("Using Random Forest on the famous Wine Quality dataset") | |
| # -------------------------- Load Data -------------------------- | |
| def get_data(): | |
| ds = load_dataset("codesignal/wine-quality") | |
| df = ds[list(ds.keys())[0]].to_pandas() | |
| return df | |
| ds = load_dataset("codesignal/wine-quality") | |
| df = ds['test'].to_pandas() | |
| return df | |
| df = get_data() | |
| st.write("Dataset loaded! Here's a preview:") | |
| st.dataframe(df.head()) | |
| # -------------------------- Preprocessing -------------------------- | |
| X = df.drop("quality", axis=1) # β fixed: no "Id" column exists | |
| y = df["quality"] | |
| # Make it binary classification: good (β₯6) vs bad (<6) | |
| y = (y >= 6).astype(int) | |
| # Train-test split | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, random_state=42, stratify=y | |
| ) | |
| # Scale features | |
| scaler = StandardScaler() | |
| X_train_scaled = scaler.fit_transform(X_train) | |
| X_test_scaled = scaler.transform(X_test) | |
| # -------------------------- Train Model -------------------------- | |
| def train_model(): | |
| model = RandomForestClassifier( | |
| n_estimators=200, | |
| random_state=42, | |
| n_jobs=-1 | |
| ) | |
| model.fit(X_train_scaled, y_train) | |
| return model | |
| model = train_model() | |
| # Predictions & accuracy | |
| y_pred = model.predict(X_test_scaled) | |
| accuracy = accuracy_score(y_test, y_pred) | |
| st.success(f"Model Accuracy: *{accuracy:.4f}* ({accuracy*100:.2f}%)") | |
| # -------------------------- Interactive Prediction -------------------------- | |
| st.header("Predict quality of a new wine") | |
| cols = st.columns(3) | |
| input_data = {} | |
| features = X.columns.tolist() | |
| for i, feature in enumerate(features): | |
| with cols[i % 3]: | |
| val = st.slider( | |
| feature, | |
| float(X[feature].min()), | |
| float(X[feature].max()), | |
| float(X[feature].mean()) | |
| ) | |
| input_data[feature] = val | |
| if st.button("Predict Quality"): | |
| input_df = pd.DataFrame([input_data]) | |
| input_scaled = scaler.transform(input_df) | |
| pred = model.predict(input_scaled)[0] | |
| prob = model.predict_proba(input_scaled)[0] | |
| if pred == 1: | |
| st.balloons() | |
| st.success(f"*Good wine!* πΎ (confidence: {prob[1]:.2%})") | |
| else: | |
| st.error(f"*Not great wine* π’ (confidence: {prob[0]:.2%})") |