jinyi14 commited on
Commit
4bbb9be
·
1 Parent(s): d09b3bb

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +129 -0
  2. requirements.txt +5 -0
  3. wine_quality_prediction.pkl +3 -0
app.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import streamlit as st
3
+ import joblib
4
+ import pandas as pd
5
+ import numpy as np
6
+ from sklearn.base import BaseEstimator, TransformerMixin
7
+ from sklearn.preprocessing import QuantileTransformer, StandardScaler
8
+ from sklearn.cluster import KMeans
9
+
10
+ seed = 42
11
+
12
+ selected_features = ['volatile_acidity', 'citric_acid', 'chlorides',
13
+ 'total_sulfur_dioxide','density', 'pH',
14
+ 'sulphates','alcohol', 'total_acidity',
15
+ 'acidity_to_pH_ratio','free_sulfur_dioxide_to_total_sulfur_dioxide_ratio',
16
+ 'alcohol_to_acidity_ratio', 'residual_sugar_to_citric_acid_ratio',
17
+ 'alcohol_to_density_ratio', 'total_alkalinity', 'total_minerals']
18
+
19
+ def feat_eng(df):
20
+ df.columns = df.columns.str.replace(' ', '_')
21
+ df['total_acidity'] = df['fixed_acidity'] + df['volatile_acidity'] + df['citric_acid']
22
+ df['acidity_to_pH_ratio'] = df['total_acidity'] / df['pH']
23
+ df['free_sulfur_dioxide_to_total_sulfur_dioxide_ratio'] = df['free_sulfur_dioxide'] / df['total_sulfur_dioxide']
24
+ df['alcohol_to_acidity_ratio'] = df['alcohol'] / df['total_acidity']
25
+ df['residual_sugar_to_citric_acid_ratio'] = df['residual_sugar'] / df['citric_acid']
26
+ df['alcohol_to_density_ratio'] = df['alcohol'] / df['density']
27
+ df['total_alkalinity'] = df['pH'] + df['alcohol']
28
+ df['total_minerals'] = df['chlorides'] + df['sulphates'] + df['residual_sugar']
29
+
30
+ df = df.replace([np.inf, -np.inf], 0)
31
+ df = df.dropna()
32
+
33
+ df = df[selected_features]
34
+
35
+ return df
36
+
37
+ class CustomQuantileTransformer(BaseEstimator, TransformerMixin):
38
+ def __init__(self, random_state=None):
39
+ self.random_state = random_state
40
+ self.quantile_transformer = QuantileTransformer(output_distribution='normal', random_state=self.random_state)
41
+
42
+ def fit(self, X_train, y=None):
43
+ self.quantile_transformer.fit(X_train)
44
+ return self
45
+
46
+ def transform(self, X):
47
+ X_transformed = self.quantile_transformer.transform(X)
48
+ X = pd.DataFrame(X_transformed, columns=X.columns)
49
+ return X
50
+
51
+ class CustomStandardScaler(BaseEstimator, TransformerMixin):
52
+ def __init__(self):
53
+ self.scaler = StandardScaler()
54
+
55
+ def fit(self, X_train, y=None):
56
+ self.scaler.fit(X_train)
57
+ return self
58
+
59
+ def transform(self, X):
60
+ X_transformed = self.scaler.transform(X)
61
+ X = pd.DataFrame(X_transformed, columns=X.columns)
62
+ return X
63
+
64
+ class KMeansTransformer(BaseEstimator, TransformerMixin):
65
+
66
+ def __init__(self, n_clusters=3, random_state=seed):
67
+ self.n_clusters = n_clusters
68
+ self.random_state = random_state
69
+ self.kmeans = KMeans(n_clusters=self.n_clusters, random_state=self.random_state)
70
+
71
+ def fit(self, X_train, y=None):
72
+ self.kmeans.fit(X_train)
73
+ return self
74
+
75
+ def transform(self, X):
76
+ X_clustered = pd.DataFrame(X.copy())
77
+ cluster_labels = self.kmeans.predict(X)
78
+ X_clustered['Cluster'] = cluster_labels
79
+ return X_clustered
80
+
81
+ # Loading the model
82
+ pipe = joblib.load('wine_quality_prediction.pkl')
83
+
84
+ input_features = [
85
+ "fixed_acidity", "volatile_acidity", "citric_acid", "residual_sugar",
86
+ "chlorides", "free_sulfur_dioxide", "total_sulfur_dioxide", "density",
87
+ "pH", "sulphates", "alcohol"
88
+ ]
89
+
90
+
91
+ st.title('Wine Quality Predictor Model')
92
+
93
+
94
+ def get_user_input():
95
+ input_dict = {}
96
+
97
+
98
+ with st.form(key='my_form'):
99
+ for feat in input_features:
100
+ input_value = st.number_input(f"Enter value for {feat}", value=0.0, step=0.01)
101
+ input_dict[feat] = input_value
102
+
103
+
104
+ submit_button = st.form_submit_button(label='Submit')
105
+
106
+ return pd.DataFrame([input_dict]), submit_button
107
+
108
+
109
+ user_input, submit_button = get_user_input()
110
+
111
+
112
+ # When the 'Submit' button is pressed, perform the prediction
113
+ if submit_button:
114
+ # Predict wine quality
115
+ prediction = pipe.predict(user_input)
116
+ prediction_value = prediction[0]
117
+
118
+ # Display the prediction
119
+ st.header("Predicted Quality")
120
+ st.write(prediction_value)
121
+
122
+
123
+ st.markdown(
124
+ """
125
+ See how this model was created on Kaggle:<br>
126
+ [🍷 Wine Quality - EDA, Prediction and Deploy](https://www.kaggle.com/code/lusfernandotorres/wine-quality-eda-prediction-and-deploy/notebook)
127
+ """, unsafe_allow_html=True
128
+ )
129
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ joblib
4
+ catboost
5
+ scikit-learn
wine_quality_prediction.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d32233832b105ead94ce7c430b1a120fd3793091768a4535e8392976ed1d3bb
3
+ size 402943