#https://www.kaggle.com/competitions/playground-series-s3e8/data?select=train.csv
#https://www.americangemsociety.org/ags-diamond-grading-system/

import gradio as gr
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv('gemstone.csv')
df=df.drop(labels=['id'],axis=1)
X = df.drop(labels=['price'],axis=1)
Y = df[['price']]
categorical_cols = X.select_dtypes(include='object').columns
numerical_cols = X.select_dtypes(exclude='object').columns
cut_categories = ['Fair', 'Good', 'Very Good','Premium','Ideal']
color_categories = ['D', 'E', 'F', 'G', 'H', 'I', 'J']
clarity_categories = ['I1','SI2','SI1','VS2','VS1','VVS2','VVS1','IF']
# Numerical Pipeline
num_pipeline=Pipeline(
    steps=[
    ('imputer',SimpleImputer(strategy='median')),
    ('scaler',StandardScaler())
    ]
)
# Categorigal Pipeline
cat_pipeline=Pipeline(
    steps=[
    ('imputer',SimpleImputer(strategy='most_frequent')),
    ('ordinalencoder',OrdinalEncoder(categories=[cut_categories,color_categories,clarity_categories])),
    ('scaler',StandardScaler())
    ]
)
preprocessor=ColumnTransformer([
    ('num_pipeline',num_pipeline,numerical_cols),
    ('cat_pipeline',cat_pipeline,categorical_cols)
    ]
)

# Train test split
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.30,random_state=30)

X_train=pd.DataFrame(preprocessor.fit_transform(X_train),columns=preprocessor.get_feature_names_out())
X_test=pd.DataFrame(preprocessor.transform(X_test),columns=preprocessor.get_feature_names_out())

imputer = SimpleImputer(strategy='median')

y_train = imputer.fit_transform(y_train)
y_test = imputer.transform(y_test)

randomforestregressor=RandomForestRegressor()
randomforestregressor.fit(X_train,y_train)

def predict_price(carat, cut, color, clarity, depth, table, x, y, z):
    data = {
        'carat': [carat],
        'depth': [depth],
        'table': [table],
        'x': [x],
        'y': [y],
        'z': [z],
        'cut': [cut],
        'color': [color],
        'clarity': [clarity]
    }

    data_df = pd.DataFrame(data)

    processed_data = preprocessor.transform(data_df)

    price_prediction = randomforestregressor.predict(processed_data)
    return price_prediction[0]

iface = gr.Interface(
    fn=predict_price,
    inputs=[
        gr.Number(label="Carat"),
        gr.Dropdown(choices=['Fair', 'Good', 'Very Good','Premium','Ideal'], label="Cut"),
        gr.Dropdown(choices=['D', 'E', 'F', 'G', 'H', 'I', 'J'], label="Color"),
        gr.Dropdown(choices=['I1','SI2','SI1','VS2','VS1','VVS2','VVS1','IF'], label="Clarity"),
        gr.Number(label="Depth"),
        gr.Number(label="Table"),
        gr.Number(label="X"),
        gr.Number(label="Y"),
        gr.Number(label="Z")        
    ],
    outputs="number",
    title="Diamond Price Prediction",
    description="Enter Diamond Characteristics to Predict its Price"
)

iface.launch(debug=True,share=True)