Spaces:

thanhcong2001
/

Pre_Salary

Sleeping

App Files Files Community

Pre_Salary / app.py

thanhcong2001

Update app.py

3c0e41e verified 4 months ago

raw

history blame contribute delete

2.92 kB

	import pandas as pd
	df = pd.read_csv('Employers_data.csv')
	from sklearn.compose import ColumnTransformer
	from sklearn.preprocessing import OneHotEncoder,StandardScaler
	num_feature = df.select_dtypes(include=['int64']).columns.drop(['Employee_ID','Salary']).to_list()
	cat_feature = df.select_dtypes(include=['object']).columns.drop(['Name']).to_list()
	preprocess = ColumnTransformer(transformers=[
	('num',StandardScaler(),num_feature),
	('cat',OneHotEncoder(drop='first'),cat_feature)
	])
	from sklearn.model_selection import train_test_split
	# Split dataset
	x = df.drop(columns=['Employee_ID','Name','Salary'])
	y = df['Salary']
	x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.4,random_state=42)
	x_train_scaled = preprocess.fit_transform(x_train)
	x_test_scaled = preprocess.transform(x_test)
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.layers import Dense
	from tensorflow.keras import Input
	from tensorflow.keras.callbacks import EarlyStopping
	import keras_tuner as kt
	def build_model(hp):
	model = Sequential([
	Input(shape=(x_test_scaled.shape[1],)),
	Dense(units=hp.Int('u1',32,256,step=32),activation='relu'),
	Dense(units=hp.Int('u2',16,128,step=16),activation='relu'),
	Dense(units=hp.Int('u3',8,64,step=8),activation='relu'),
	Dense(1)])
	model.compile(optimizer='adam',loss='mae',metrics=['mae'])
	return model
	tuner = kt.RandomSearch(build_model,objective='val_loss',max_trials=5)
	early_stop = EarlyStopping(monitor='val_loss',patience=20,restore_best_weights=True)
	tuner.search(x_train_scaled,y_train,epochs=500,batch_size=64,validation_split=0.3,callbacks=[early_stop],verbose=1)
	best_model = tuner.get_best_models(1)[0]
	y_pred = best_model.predict(x_test_scaled).flatten()
	for pre,ac in zip(y_pred[:5],y_test[:5]):
	print(f'Predict: {pre:,.0f}$ - Actual: {ac:,.0f}$')
	def pre_salary(Employee_ID, Name, Age, Gender, Department, Job_Title,
	Experience_Years, Education_Level, Location):
	cols = x.columns
	vals = [Employee_ID, Name, Age, Gender, Department, Job_Title,
	Experience_Years, Education_Level, Location]
	user = pd.DataFrame([dict(zip(cols,vals))])
	prediction = best_model.predict(preprocess.transform(user)).item()
	return f'Salary: {prediction:,.0f}$'
	import gradio as gr

	inputs = [
	gr.Number(label='Age'),
	gr.Radio(choices=['Female', 'Male'],label='Gender'),
	gr.Radio(choices=['Engineering', 'Sales', 'Finance', 'HR', 'Marketing', 'Product'],label='Department'),
	gr.Radio(choices=['Engineer', 'Executive', 'Intern', 'Analyst', 'Manager'],label='Job Title'),
	gr.Number(label='Experience Years'),
	gr.Radio(choices=['Master', 'Bachelor', 'PhD'],label='Education Level'),
	gr.Radio(choices=['Austin', 'Seattle', 'New York', 'San Francisco', 'Chicago'],label='Location'),
	]
	demo = gr.Interface(fn=pre_salary,inputs=inputs,outputs='text',title='Predict Salary!')
	demo.launch()