Man0707 commited on
Commit
08dae5c
·
verified ·
1 Parent(s): 64a8f54

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +70 -16
src/streamlit_app.py CHANGED
@@ -1,34 +1,88 @@
 
1
  import pandas as pd
2
- import numpy as np
3
  from sklearn.model_selection import train_test_split
4
  from sklearn.preprocessing import StandardScaler
5
  from sklearn.ensemble import RandomForestClassifier
6
  from sklearn.metrics import accuracy_score
7
  from datasets import load_dataset
8
 
 
 
 
9
 
10
- ds = load_dataset("codesignal/wine-quality")
11
- print(df.head())
 
 
 
 
12
 
 
 
 
 
 
 
13
  X = df.drop(["quality", "Id"], axis=1)
14
  y = df["quality"]
15
 
16
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
 
 
 
 
 
17
 
 
18
  scaler = StandardScaler()
19
- X_train = scaler.fit_transform(X_train)
20
- X_test = scaler.transform(X_test)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- model = RandomForestClassifier()
23
- model.fit(X_train, y_train)
 
24
 
25
- pred = model.predict(X_test)
26
- accuracy = accuracy_score(y_test, pred)
27
- print(accuracy)
28
 
29
- sample = pd.DataFrame([[7.4, 0.7, 0, 1.9, 0.076, 11, 34, 0.9978, 3.51, 0.56, 9.4]],
30
- columns=X.columns)
 
 
 
 
 
 
 
31
 
32
- sample_scaled = scaler.transform(sample)
33
- prediction = model.predict(sample_scaled)
34
- print(prediction[0])
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  import pandas as pd
 
3
  from sklearn.model_selection import train_test_split
4
  from sklearn.preprocessing import StandardScaler
5
  from sklearn.ensemble import RandomForestClassifier
6
  from sklearn.metrics import accuracy_score
7
  from datasets import load_dataset
8
 
9
+ # -------------------------- Title --------------------------
10
+ st.title("🍷 Wine Quality Prediction")
11
+ st.write("Using Random Forest on the famous Wine Quality dataset")
12
 
13
+ # -------------------------- Load Data --------------------------
14
+ @st.cache_data
15
+ def get_data():
16
+ ds = load_dataset("codesignal/wine-quality")
17
+ df = ds['train'].to_pandas()
18
+ return df
19
 
20
+ df = get_data()
21
+ st.write("Dataset loaded! Here's a preview:")
22
+ st.dataframe(df.head())
23
+
24
+ # -------------------------- Preprocessing --------------------------
25
+ # Drop ID (not useful) and separate features/target
26
  X = df.drop(["quality", "Id"], axis=1)
27
  y = df["quality"]
28
 
29
+ # Make it a binary classification (good 6, bad < 6) - common practice
30
+ y = (y >= 6).astype(int)
31
+
32
+ # Train-test split
33
+ X_train, X_test, y_train, y_test = train_test_split(
34
+ X, y, test_size=0.2, random_state=42, stratify=y
35
+ )
36
 
37
+ # Scale features
38
  scaler = StandardScaler()
39
+ X_train_scaled = scaler.fit_transform(X_train)
40
+ X_test_scaled = scaler.transform(X_test)
41
+
42
+ # -------------------------- Train Model --------------------------
43
+ @st.cache_resource
44
+ def train_model():
45
+ model = RandomForestClassifier(
46
+ n_estimators=200,
47
+ random_state=42,
48
+ n_jobs=-1
49
+ )
50
+ model.fit(X_train_scaled, y_train)
51
+ return model
52
+
53
+ model = train_model()
54
+
55
+ # Predictions & accuracy
56
+ y_pred = model.predict(X_test_scaled)
57
+ accuracy = accuracy_score(y_test, y_pred)
58
+
59
+ st.success(f"Model Accuracy: *{accuracy:.4f}* ({accuracy*100:.2f}%)")
60
 
61
+ # -------------------------- Interactive Prediction --------------------------
62
+ st.header("Predict quality of a new wine")
63
+ cols = st.columns(3)
64
 
65
+ input_data = {}
66
+ features = X.columns.tolist()
 
67
 
68
+ for i, feature in enumerate(features):
69
+ with cols[i % 3]:
70
+ val = st.slider(
71
+ feature,
72
+ float(X[feature].min()),
73
+ float(X[feature].max()),
74
+ float(X[feature].mean())
75
+ )
76
+ input_data[feature] = val
77
 
78
+ if st.button("Predict Quality"):
79
+ input_df = pd.DataFrame([input_data])
80
+ input_scaled = scaler.transform(input_df)
81
+ pred = model.predict(input_scaled)[0]
82
+ prob = model.predict_proba(input_scaled)[0]
83
+
84
+ if pred == 1:
85
+ st.balloons()
86
+ st.success(f"*Good wine!* 🍾 (confidence: {prob[1]:.2%})")
87
+ else:
88
+ st.error(f"*Not great wine* 😢 (confidence: {prob[0]:.2%})")