Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| df = pd.read_csv('Employers_data.csv') | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.preprocessing import OneHotEncoder,StandardScaler | |
| num_feature = df.select_dtypes(include=['int64']).columns.drop(['Employee_ID','Salary']).to_list() | |
| cat_feature = df.select_dtypes(include=['object']).columns.drop(['Name']).to_list() | |
| preprocess = ColumnTransformer(transformers=[ | |
| ('num',StandardScaler(),num_feature), | |
| ('cat',OneHotEncoder(drop='first'),cat_feature) | |
| ]) | |
| from sklearn.model_selection import train_test_split | |
| # Split dataset | |
| x = df.drop(columns=['Employee_ID','Name','Salary']) | |
| y = df['Salary'] | |
| x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.4,random_state=42) | |
| x_train_scaled = preprocess.fit_transform(x_train) | |
| x_test_scaled = preprocess.transform(x_test) | |
| from tensorflow.keras.models import Sequential | |
| from tensorflow.keras.layers import Dense | |
| from tensorflow.keras import Input | |
| from tensorflow.keras.callbacks import EarlyStopping | |
| import keras_tuner as kt | |
| def build_model(hp): | |
| model = Sequential([ | |
| Input(shape=(x_test_scaled.shape[1],)), | |
| Dense(units=hp.Int('u1',32,256,step=32),activation='relu'), | |
| Dense(units=hp.Int('u2',16,128,step=16),activation='relu'), | |
| Dense(units=hp.Int('u3',8,64,step=8),activation='relu'), | |
| Dense(1)]) | |
| model.compile(optimizer='adam',loss='mae',metrics=['mae']) | |
| return model | |
| tuner = kt.RandomSearch(build_model,objective='val_loss',max_trials=5) | |
| early_stop = EarlyStopping(monitor='val_loss',patience=20,restore_best_weights=True) | |
| tuner.search(x_train_scaled,y_train,epochs=500,batch_size=64,validation_split=0.3,callbacks=[early_stop],verbose=1) | |
| best_model = tuner.get_best_models(1)[0] | |
| y_pred = best_model.predict(x_test_scaled).flatten() | |
| for pre,ac in zip(y_pred[:5],y_test[:5]): | |
| print(f'Predict: {pre:,.0f}$ - Actual: {ac:,.0f}$') | |
| def pre_salary(Employee_ID, Name, Age, Gender, Department, Job_Title, | |
| Experience_Years, Education_Level, Location): | |
| cols = x.columns | |
| vals = [Employee_ID, Name, Age, Gender, Department, Job_Title, | |
| Experience_Years, Education_Level, Location] | |
| user = pd.DataFrame([dict(zip(cols,vals))]) | |
| prediction = best_model.predict(preprocess.transform(user)).item() | |
| return f'Salary: {prediction:,.0f}$' | |
| import gradio as gr | |
| inputs = [ | |
| gr.Number(label='Age'), | |
| gr.Radio(choices=['Female', 'Male'],label='Gender'), | |
| gr.Radio(choices=['Engineering', 'Sales', 'Finance', 'HR', 'Marketing', 'Product'],label='Department'), | |
| gr.Radio(choices=['Engineer', 'Executive', 'Intern', 'Analyst', 'Manager'],label='Job Title'), | |
| gr.Number(label='Experience Years'), | |
| gr.Radio(choices=['Master', 'Bachelor', 'PhD'],label='Education Level'), | |
| gr.Radio(choices=['Austin', 'Seattle', 'New York', 'San Francisco', 'Chicago'],label='Location'), | |
| ] | |
| demo = gr.Interface(fn=pre_salary,inputs=inputs,outputs='text',title='Predict Salary!') | |
| demo.launch() |