Pre_Salary / app.py
thanhcong2001's picture
Update app.py
3c0e41e verified
import pandas as pd
df = pd.read_csv('Employers_data.csv')
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder,StandardScaler
num_feature = df.select_dtypes(include=['int64']).columns.drop(['Employee_ID','Salary']).to_list()
cat_feature = df.select_dtypes(include=['object']).columns.drop(['Name']).to_list()
preprocess = ColumnTransformer(transformers=[
('num',StandardScaler(),num_feature),
('cat',OneHotEncoder(drop='first'),cat_feature)
])
from sklearn.model_selection import train_test_split
# Split dataset
x = df.drop(columns=['Employee_ID','Name','Salary'])
y = df['Salary']
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.4,random_state=42)
x_train_scaled = preprocess.fit_transform(x_train)
x_test_scaled = preprocess.transform(x_test)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input
from tensorflow.keras.callbacks import EarlyStopping
import keras_tuner as kt
def build_model(hp):
model = Sequential([
Input(shape=(x_test_scaled.shape[1],)),
Dense(units=hp.Int('u1',32,256,step=32),activation='relu'),
Dense(units=hp.Int('u2',16,128,step=16),activation='relu'),
Dense(units=hp.Int('u3',8,64,step=8),activation='relu'),
Dense(1)])
model.compile(optimizer='adam',loss='mae',metrics=['mae'])
return model
tuner = kt.RandomSearch(build_model,objective='val_loss',max_trials=5)
early_stop = EarlyStopping(monitor='val_loss',patience=20,restore_best_weights=True)
tuner.search(x_train_scaled,y_train,epochs=500,batch_size=64,validation_split=0.3,callbacks=[early_stop],verbose=1)
best_model = tuner.get_best_models(1)[0]
y_pred = best_model.predict(x_test_scaled).flatten()
for pre,ac in zip(y_pred[:5],y_test[:5]):
print(f'Predict: {pre:,.0f}$ - Actual: {ac:,.0f}$')
def pre_salary(Employee_ID, Name, Age, Gender, Department, Job_Title,
Experience_Years, Education_Level, Location):
cols = x.columns
vals = [Employee_ID, Name, Age, Gender, Department, Job_Title,
Experience_Years, Education_Level, Location]
user = pd.DataFrame([dict(zip(cols,vals))])
prediction = best_model.predict(preprocess.transform(user)).item()
return f'Salary: {prediction:,.0f}$'
import gradio as gr
inputs = [
gr.Number(label='Age'),
gr.Radio(choices=['Female', 'Male'],label='Gender'),
gr.Radio(choices=['Engineering', 'Sales', 'Finance', 'HR', 'Marketing', 'Product'],label='Department'),
gr.Radio(choices=['Engineer', 'Executive', 'Intern', 'Analyst', 'Manager'],label='Job Title'),
gr.Number(label='Experience Years'),
gr.Radio(choices=['Master', 'Bachelor', 'PhD'],label='Education Level'),
gr.Radio(choices=['Austin', 'Seattle', 'New York', 'San Francisco', 'Chicago'],label='Location'),
]
demo = gr.Interface(fn=pre_salary,inputs=inputs,outputs='text',title='Predict Salary!')
demo.launch()