|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import joblib |
|
|
from sklearn.base import BaseEstimator, TransformerMixin |
|
|
from sklearn.preprocessing import QuantileTransformer, StandardScaler |
|
|
from sklearn.cluster import KMeans |
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
def feat_eng(df): |
|
|
|
|
|
df.columns = df.columns.str.replace(' ', '_') |
|
|
|
|
|
|
|
|
df['total_acidity'] = df['fixed_acidity'] + df['volatile_acidity'] + df['citric_acid'] |
|
|
df['acidity_to_pH_ratio'] = df['total_acidity'] / df['pH'] |
|
|
df['free_sulfur_dioxide_to_total_sulfur_dioxide_ratio'] = df['free_sulfur_dioxide'] / df['total_sulfur_dioxide'] |
|
|
df['alcohol_to_acidity_ratio'] = df['alcohol'] / df['total_acidity'] |
|
|
df['residual_sugar_to_citric_acid_ratio'] = df['residual_sugar'] / df['citric_acid'] |
|
|
df['alcohol_to_density_ratio'] = df['alcohol'] / df['density'] |
|
|
df['total_alkalinity'] = df['pH'] + df['alcohol'] |
|
|
df['total_minerals'] = df['chlorides'] + df['sulphates'] + df['residual_sugar'] |
|
|
|
|
|
|
|
|
df = df.replace([np.inf, -np.inf], 0) |
|
|
df = df.dropna() |
|
|
|
|
|
|
|
|
selected_features = [ |
|
|
'total_acidity', 'acidity_to_pH_ratio', |
|
|
'free_sulfur_dioxide_to_total_sulfur_dioxide_ratio', |
|
|
'alcohol_to_acidity_ratio', 'residual_sugar_to_citric_acid_ratio', |
|
|
'alcohol_to_density_ratio', 'total_alkalinity', 'total_minerals' |
|
|
] |
|
|
return df[selected_features] |
|
|
|
|
|
|
|
|
class CustomQuantileTransformer(BaseEstimator, TransformerMixin): |
|
|
def __init__(self, random_state=None): |
|
|
self.random_state = random_state |
|
|
self.quantile_transformer = QuantileTransformer(output_distribution='normal', random_state=self.random_state) |
|
|
|
|
|
def fit(self, X, y=None): |
|
|
self.quantile_transformer.fit(X) |
|
|
return self |
|
|
|
|
|
def transform(self, X): |
|
|
X_transformed = self.quantile_transformer.transform(X) |
|
|
return pd.DataFrame(X_transformed, columns=X.columns) |
|
|
|
|
|
|
|
|
class CustomStandardScaler(BaseEstimator, TransformerMixin): |
|
|
def __init__(self): |
|
|
self.scaler = StandardScaler() |
|
|
|
|
|
def fit(self, X, y=None): |
|
|
self.scaler.fit(X) |
|
|
return self |
|
|
|
|
|
def transform(self, X): |
|
|
X_transformed = self.scaler.transform(X) |
|
|
return pd.DataFrame(X_transformed, columns=X.columns) |
|
|
|
|
|
|
|
|
class KMeansTransformer(BaseEstimator, TransformerMixin): |
|
|
def __init__(self, n_clusters=3, random_state=None): |
|
|
self.n_clusters = n_clusters |
|
|
self.random_state = random_state |
|
|
self.kmeans = KMeans(n_clusters=self.n_clusters, random_state=self.random_state) |
|
|
|
|
|
def fit(self, X, y=None): |
|
|
self.kmeans.fit(X) |
|
|
return self |
|
|
|
|
|
def transform(self, X): |
|
|
cluster_labels = self.kmeans.predict(X) |
|
|
X_clustered = X.copy() |
|
|
X_clustered['Cluster'] = cluster_labels |
|
|
return X_clustered |
|
|
|
|
|
|
|
|
pipeline = joblib.load("pipeline.pkl") |
|
|
|
|
|
|
|
|
def predict(fixed_acidity, volatile_acidity, citric_acid, residual_sugar, |
|
|
chlorides, free_sulfur_dioxide, total_sulfur_dioxide, density, |
|
|
pH, sulphates, alcohol, Id=None): |
|
|
|
|
|
input_data = { |
|
|
'fixed_acidity': [float(fixed_acidity)], |
|
|
'volatile_acidity': [float(volatile_acidity)], |
|
|
'citric_acid': [float(citric_acid)], |
|
|
'residual_sugar': [float(residual_sugar)], |
|
|
'chlorides': [float(chlorides)], |
|
|
'free_sulfur_dioxide': [float(free_sulfur_dioxide)], |
|
|
'total_sulfur_dioxide': [float(total_sulfur_dioxide)], |
|
|
'density': [float(density)], |
|
|
'pH': [float(pH)], |
|
|
'sulphates': [float(sulphates)], |
|
|
'alcohol': [float(alcohol)], |
|
|
'Id': [Id] if Id else [0] |
|
|
} |
|
|
df = pd.DataFrame(input_data) |
|
|
|
|
|
|
|
|
prediction = pipeline.predict(df) |
|
|
probabilities = pipeline.predict_proba(df) |
|
|
|
|
|
result = { |
|
|
"Predicted Quality": int(prediction[0]), |
|
|
"Class Probabilities": {str(i): prob for i, prob in enumerate(probabilities[0])} |
|
|
} |
|
|
return result |
|
|
|
|
|
|
|
|
inputs = [ |
|
|
gr.Number(label='Fixed Acidity'), |
|
|
gr.Number(label='Volatile Acidity'), |
|
|
gr.Number(label='Citric Acid'), |
|
|
gr.Number(label='Residual Sugar'), |
|
|
gr.Number(label='Chlorides'), |
|
|
gr.Number(label='Free Sulfur Dioxide'), |
|
|
gr.Number(label='Total Sulfur Dioxide'), |
|
|
gr.Number(label='Density'), |
|
|
gr.Number(label='pH'), |
|
|
gr.Number(label='Sulphates'), |
|
|
gr.Number(label='Alcohol'), |
|
|
gr.Textbox(label='Id (Optional)', placeholder="Optional"), |
|
|
] |
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=predict, |
|
|
inputs=inputs, |
|
|
outputs=gr.Json(label="Prediction Output"), |
|
|
title="Wine Quality Prediction", |
|
|
description="Enter wine parameters to predict its quality." |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
interface.launch() |
|
|
|