File size: 2,213 Bytes
399c0ee
 
8793be7
7ffd0d5
 
 
4f2f3c2
7ffd0d5
 
 
 
 
 
58dc25e
7ffd0d5
 
 
399c0ee
7ffd0d5
 
 
 
 
1b00095
7ffd0d5
 
399c0ee
7ffd0d5
 
 
4f2f3c2
7ffd0d5
 
 
 
 
 
 
4f2f3c2
7ffd0d5
b88ade0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
import pandas as pd
import joblib
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

MODEL_PATH = "rf_model.pkl"
DATA_URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"

# ---------------------------
# TRAIN MODEL (only if needed)
# ---------------------------

def train_model():
    print("Downloading white wine dataset...")
    df = pd.read_csv(DATA_URL, sep=';')

    feature_names = [
        'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
        'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
        'pH', 'sulphates', 'alcohol'
    ]

    X = df[feature_names]
    y = df['quality']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    print("Training Random Forest model...")
    model = RandomForestClassifier(
        n_estimators=300,
        max_depth=12,
        random_state=42
    )
    model.fit(X_train, y_train)

    joblib.dump(model, MODEL_PATH)
    print("Model saved as rf_model.pkl")
    return model


# Load or train model
if os.path.exists(MODEL_PATH):
    print("Loading existing model...")
    model = joblib.load(MODEL_PATH)
else:
    model = train_model()


# ---------------------------
# PREDICTION FUNCTION
# ---------------------------

feature_names = [
    'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
    'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
    'pH', 'sulphates', 'alcohol'
]

def predict_quality(*inputs):
    df = pd.DataFrame([inputs], columns=feature_names)
    prediction = model.predict(df)[0]
    return f"Predicted Wine Quality: {prediction}"


# ---------------------------
# GRADIO UI
# ---------------------------

inputs_ui = [gr.Number(label=name) for name in feature_names]

demo = gr.Interface(
    fn=predict_quality,
    inputs=inputs_ui,
    outputs=gr.Textbox(label="Prediction"),
    title="🍾 White Wine Quality Predictor (Trains on HF Space)",
    description="Random Forest model trained on the UCI White Wine Quality dataset."
)

demo.launch()